chore: initial public snapshot for github upload

2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/proxy/common_utils/debug_utils.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/proxy/common_utils/debug_utils.py
@@ -0,0 +1,832 @@
+# Start tracing memory allocations
+import asyncio
+import gc
+import json
+import os
+import sys
+import tracemalloc
+from collections import Counter
+from typing import Any, Dict, List, Optional, Tuple
+
+from fastapi import APIRouter, Depends, HTTPException, Query
+
+from litellm import get_secret_str
+from litellm._logging import verbose_proxy_logger
+from litellm.constants import PYTHON_GC_THRESHOLD
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+
+router = APIRouter()
+
+
+# Configure garbage collection thresholds from environment variables
+def configure_gc_thresholds():
+    """Configure Python garbage collection thresholds from environment variables."""
+    gc_threshold_env = PYTHON_GC_THRESHOLD
+    if gc_threshold_env:
+        try:
+            # Parse threshold string like "1000,50,50"
+            thresholds = [int(x.strip()) for x in gc_threshold_env.split(",")]
+            if len(thresholds) == 3:
+                gc.set_threshold(*thresholds)
+                verbose_proxy_logger.info(f"GC thresholds set to: {thresholds}")
+            else:
+                verbose_proxy_logger.warning(
+                    f"GC threshold not set: {gc_threshold_env}. Expected format: 'gen0,gen1,gen2'"
+                )
+        except ValueError as e:
+            verbose_proxy_logger.warning(
+                f"Failed to parse GC threshold: {gc_threshold_env}. Error: {e}"
+            )
+
+    # Log current thresholds
+    current_thresholds = gc.get_threshold()
+    verbose_proxy_logger.info(
+        f"Current GC thresholds: gen0={current_thresholds[0]}, gen1={current_thresholds[1]}, gen2={current_thresholds[2]}"
+    )
+
+
+# Initialize GC configuration
+configure_gc_thresholds()
+
+
+@router.get("/debug/asyncio-tasks")
+async def get_active_tasks_stats():
+    """
+    Returns:
+      total_active_tasks: int
+      by_name: { coroutine_name: count }
+    """
+    MAX_TASKS_TO_CHECK = 5000
+    # Gather all tasks in this event loop (including this endpoint’s own task).
+    all_tasks = asyncio.all_tasks()
+
+    # Filter out tasks that are already done.
+    active_tasks = [t for t in all_tasks if not t.done()]
+
+    # Count how many active tasks exist, grouped by coroutine function name.
+    counter = Counter()
+    for idx, task in enumerate(active_tasks):
+        # reasonable max circuit breaker
+        if idx >= MAX_TASKS_TO_CHECK:
+            break
+        coro = task.get_coro()
+        # Derive a human‐readable name from the coroutine:
+        name = (
+            getattr(coro, "__qualname__", None)
+            or getattr(coro, "__name__", None)
+            or repr(coro)
+        )
+        counter[name] += 1
+
+    return {
+        "total_active_tasks": len(active_tasks),
+        "by_name": dict(counter),
+    }
+
+
+if os.environ.get("LITELLM_PROFILE", "false").lower() == "true":
+    try:
+        import objgraph  # type: ignore
+
+        print("growth of objects")  # noqa
+        objgraph.show_growth()
+        print("\n\nMost common types")  # noqa
+        objgraph.show_most_common_types()
+        roots = objgraph.get_leaking_objects()
+        print("\n\nLeaking objects")  # noqa
+        objgraph.show_most_common_types(objects=roots)
+    except ImportError:
+        raise ImportError(
+            "objgraph not found. Please install objgraph to use this feature."
+        )
+
+    tracemalloc.start(10)
+
+    @router.get("/memory-usage", include_in_schema=False)
+    async def memory_usage():
+        # Take a snapshot of the current memory usage
+        snapshot = tracemalloc.take_snapshot()
+        top_stats = snapshot.statistics("lineno")
+        verbose_proxy_logger.debug("TOP STATS: %s", top_stats)
+
+        # Get the top 50 memory usage lines
+        top_50 = top_stats[:50]
+        result = []
+        for stat in top_50:
+            result.append(f"{stat.traceback.format(limit=10)}: {stat.size / 1024} KiB")
+
+        return {"top_50_memory_usage": result}
+
+
+@router.get("/memory-usage-in-mem-cache", include_in_schema=False)
+async def memory_usage_in_mem_cache(
+    _: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    # returns the size of all in-memory caches on the proxy server
+    """
+    1. user_api_key_cache
+    2. router_cache
+    3. proxy_logging_cache
+    4. internal_usage_cache
+    """
+    from litellm.proxy.proxy_server import (
+        llm_router,
+        proxy_logging_obj,
+        user_api_key_cache,
+    )
+
+    if llm_router is None:
+        num_items_in_llm_router_cache = 0
+    else:
+        num_items_in_llm_router_cache = len(
+            llm_router.cache.in_memory_cache.cache_dict
+        ) + len(llm_router.cache.in_memory_cache.ttl_dict)
+
+    num_items_in_user_api_key_cache = len(
+        user_api_key_cache.in_memory_cache.cache_dict
+    ) + len(user_api_key_cache.in_memory_cache.ttl_dict)
+
+    num_items_in_proxy_logging_obj_cache = len(
+        proxy_logging_obj.internal_usage_cache.dual_cache.in_memory_cache.cache_dict
+    ) + len(proxy_logging_obj.internal_usage_cache.dual_cache.in_memory_cache.ttl_dict)
+
+    return {
+        "num_items_in_user_api_key_cache": num_items_in_user_api_key_cache,
+        "num_items_in_llm_router_cache": num_items_in_llm_router_cache,
+        "num_items_in_proxy_logging_obj_cache": num_items_in_proxy_logging_obj_cache,
+    }
+
+
+@router.get("/memory-usage-in-mem-cache-items", include_in_schema=False)
+async def memory_usage_in_mem_cache_items(
+    _: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    # returns the size of all in-memory caches on the proxy server
+    """
+    1. user_api_key_cache
+    2. router_cache
+    3. proxy_logging_cache
+    4. internal_usage_cache
+    """
+    from litellm.proxy.proxy_server import (
+        llm_router,
+        proxy_logging_obj,
+        user_api_key_cache,
+    )
+
+    if llm_router is None:
+        llm_router_in_memory_cache_dict = {}
+        llm_router_in_memory_ttl_dict = {}
+    else:
+        llm_router_in_memory_cache_dict = llm_router.cache.in_memory_cache.cache_dict
+        llm_router_in_memory_ttl_dict = llm_router.cache.in_memory_cache.ttl_dict
+
+    return {
+        "user_api_key_cache": user_api_key_cache.in_memory_cache.cache_dict,
+        "user_api_key_ttl": user_api_key_cache.in_memory_cache.ttl_dict,
+        "llm_router_cache": llm_router_in_memory_cache_dict,
+        "llm_router_ttl": llm_router_in_memory_ttl_dict,
+        "proxy_logging_obj_cache": proxy_logging_obj.internal_usage_cache.dual_cache.in_memory_cache.cache_dict,
+        "proxy_logging_obj_ttl": proxy_logging_obj.internal_usage_cache.dual_cache.in_memory_cache.ttl_dict,
+    }
+
+
+@router.get("/debug/memory/summary", include_in_schema=False)
+async def get_memory_summary(
+    _: UserAPIKeyAuth = Depends(user_api_key_auth),
+) -> Dict[str, Any]:
+    """
+    Get simplified memory usage summary for the proxy.
+
+    Returns:
+    - worker_pid: Process ID
+    - status: Overall health based on memory usage
+    - memory: Process memory usage and RAM info
+    - caches: Cache item counts and descriptions
+    - garbage_collector: GC status and pending object counts
+
+    Example usage:
+    curl http://localhost:4000/debug/memory/summary -H "Authorization: Bearer sk-1234"
+
+    For detailed analysis, call GET /debug/memory/details
+    For cache management, use the cache management endpoints
+    """
+    from litellm.proxy.proxy_server import (
+        llm_router,
+        proxy_logging_obj,
+        user_api_key_cache,
+    )
+
+    # Get process memory info
+    process_memory = {}
+    health_status = "healthy"
+
+    try:
+        import psutil
+
+        process = psutil.Process()
+        memory_info = process.memory_info()
+        memory_mb = memory_info.rss / (1024 * 1024)
+        memory_percent = process.memory_percent()
+
+        process_memory = {
+            "summary": f"{memory_mb:.1f} MB ({memory_percent:.1f}% of system memory)",
+            "ram_usage_mb": round(memory_mb, 2),
+            "system_memory_percent": round(memory_percent, 2),
+        }
+
+        # Check memory health status
+        if memory_percent > 80:
+            health_status = "critical"
+        elif memory_percent > 60:
+            health_status = "warning"
+        else:
+            health_status = "healthy"
+
+    except ImportError:
+        process_memory[
+            "error"
+        ] = "Install psutil for memory monitoring: pip install psutil"
+    except Exception as e:
+        process_memory["error"] = str(e)
+
+    # Get cache information
+    caches: Dict[str, Any] = {}
+    total_cache_items = 0
+
+    try:
+        # User API key cache
+        user_cache_items = len(user_api_key_cache.in_memory_cache.cache_dict)
+        total_cache_items += user_cache_items
+        caches["user_api_keys"] = {
+            "count": user_cache_items,
+            "count_readable": f"{user_cache_items:,}",
+            "what_it_stores": "Validated API keys for faster authentication",
+        }
+
+        # Router cache
+        if llm_router is not None:
+            router_cache_items = len(llm_router.cache.in_memory_cache.cache_dict)
+            total_cache_items += router_cache_items
+            caches["llm_responses"] = {
+                "count": router_cache_items,
+                "count_readable": f"{router_cache_items:,}",
+                "what_it_stores": "LLM responses for identical requests",
+            }
+
+        # Proxy logging cache
+        logging_cache_items = len(
+            proxy_logging_obj.internal_usage_cache.dual_cache.in_memory_cache.cache_dict
+        )
+        total_cache_items += logging_cache_items
+        caches["usage_tracking"] = {
+            "count": logging_cache_items,
+            "count_readable": f"{logging_cache_items:,}",
+            "what_it_stores": "Usage metrics before database write",
+        }
+
+    except Exception as e:
+        caches["error"] = str(e)
+
+    # Get garbage collector stats
+    gc_enabled = gc.isenabled()
+    objects_pending = gc.get_count()[0]
+    uncollectable = len(gc.garbage)
+
+    gc_info = {
+        "status": "enabled" if gc_enabled else "disabled",
+        "objects_awaiting_collection": objects_pending,
+    }
+
+    # Add warning if garbage collection issues detected
+    if uncollectable > 0:
+        gc_info[
+            "warning"
+        ] = f"{uncollectable} uncollectable objects (possible memory leak)"
+
+    return {
+        "worker_pid": os.getpid(),
+        "status": health_status,
+        "memory": process_memory,
+        "caches": {
+            "total_items": total_cache_items,
+            "breakdown": caches,
+        },
+        "garbage_collector": gc_info,
+    }
+
+
+def _get_gc_statistics() -> Dict[str, Any]:
+    """Get garbage collector statistics."""
+    return {
+        "enabled": gc.isenabled(),
+        "thresholds": {
+            "generation_0": gc.get_threshold()[0],
+            "generation_1": gc.get_threshold()[1],
+            "generation_2": gc.get_threshold()[2],
+            "explanation": "Number of allocations before automatic collection for each generation",
+        },
+        "current_counts": {
+            "generation_0": gc.get_count()[0],
+            "generation_1": gc.get_count()[1],
+            "generation_2": gc.get_count()[2],
+            "explanation": "Current number of allocated objects in each generation",
+        },
+        "collection_history": [
+            {
+                "generation": i,
+                "total_collections": stat["collections"],
+                "total_collected": stat["collected"],
+                "uncollectable": stat["uncollectable"],
+            }
+            for i, stat in enumerate(gc.get_stats())
+        ],
+    }
+
+
+def _get_object_type_counts(top_n: int) -> Tuple[int, List[Dict[str, Any]]]:
+    """Count objects by type and return total count and top N types."""
+    type_counts: Counter = Counter()
+    total_objects = 0
+
+    for obj in gc.get_objects():
+        total_objects += 1
+        obj_type = type(obj).__name__
+        type_counts[obj_type] += 1
+
+    top_object_types = [
+        {"type": obj_type, "count": count, "count_readable": f"{count:,}"}
+        for obj_type, count in type_counts.most_common(top_n)
+    ]
+
+    return total_objects, top_object_types
+
+
+def _get_uncollectable_objects_info() -> Dict[str, Any]:
+    """Get information about uncollectable objects (potential memory leaks)."""
+    uncollectable = gc.garbage
+    return {
+        "count": len(uncollectable),
+        "sample_types": [type(obj).__name__ for obj in uncollectable[:10]],
+        "warning": "If count > 0, you may have reference cycles preventing garbage collection"
+        if len(uncollectable) > 0
+        else None,
+    }
+
+
+def _get_cache_memory_stats(
+    user_api_key_cache, llm_router, proxy_logging_obj, redis_usage_cache
+) -> Dict[str, Any]:
+    """Calculate memory usage for all caches."""
+    cache_stats: Dict[str, Any] = {}
+    try:
+        # User API key cache
+        user_cache_size = sys.getsizeof(user_api_key_cache.in_memory_cache.cache_dict)
+        user_ttl_size = sys.getsizeof(user_api_key_cache.in_memory_cache.ttl_dict)
+        cache_stats["user_api_key_cache"] = {
+            "num_items": len(user_api_key_cache.in_memory_cache.cache_dict),
+            "cache_dict_size_bytes": user_cache_size,
+            "ttl_dict_size_bytes": user_ttl_size,
+            "total_size_mb": round(
+                (user_cache_size + user_ttl_size) / (1024 * 1024), 2
+            ),
+        }
+
+        # Router cache
+        if llm_router is not None:
+            router_cache_size = sys.getsizeof(
+                llm_router.cache.in_memory_cache.cache_dict
+            )
+            router_ttl_size = sys.getsizeof(llm_router.cache.in_memory_cache.ttl_dict)
+            cache_stats["llm_router_cache"] = {
+                "num_items": len(llm_router.cache.in_memory_cache.cache_dict),
+                "cache_dict_size_bytes": router_cache_size,
+                "ttl_dict_size_bytes": router_ttl_size,
+                "total_size_mb": round(
+                    (router_cache_size + router_ttl_size) / (1024 * 1024), 2
+                ),
+            }
+
+        # Proxy logging cache
+        logging_cache_size = sys.getsizeof(
+            proxy_logging_obj.internal_usage_cache.dual_cache.in_memory_cache.cache_dict
+        )
+        logging_ttl_size = sys.getsizeof(
+            proxy_logging_obj.internal_usage_cache.dual_cache.in_memory_cache.ttl_dict
+        )
+        cache_stats["proxy_logging_cache"] = {
+            "num_items": len(
+                proxy_logging_obj.internal_usage_cache.dual_cache.in_memory_cache.cache_dict
+            ),
+            "cache_dict_size_bytes": logging_cache_size,
+            "ttl_dict_size_bytes": logging_ttl_size,
+            "total_size_mb": round(
+                (logging_cache_size + logging_ttl_size) / (1024 * 1024), 2
+            ),
+        }
+
+        # Redis cache info
+        if redis_usage_cache is not None:
+            cache_stats["redis_usage_cache"] = {
+                "enabled": True,
+                "cache_type": type(redis_usage_cache).__name__,
+            }
+            # Try to get Redis connection pool info if available
+            try:
+                if (
+                    hasattr(redis_usage_cache, "redis_client")
+                    and redis_usage_cache.redis_client
+                ):
+                    if hasattr(redis_usage_cache.redis_client, "connection_pool"):
+                        pool_info = redis_usage_cache.redis_client.connection_pool  # type: ignore
+                        cache_stats["redis_usage_cache"]["connection_pool"] = {
+                            "max_connections": pool_info.max_connections
+                            if hasattr(pool_info, "max_connections")
+                            else None,
+                            "connection_class": pool_info.connection_class.__name__
+                            if hasattr(pool_info, "connection_class")
+                            else None,
+                        }
+            except Exception as e:
+                verbose_proxy_logger.debug(f"Error getting Redis pool info: {e}")
+        else:
+            cache_stats["redis_usage_cache"] = {"enabled": False}
+
+    except Exception as e:
+        verbose_proxy_logger.debug(f"Error calculating cache stats: {e}")
+        cache_stats["error"] = str(e)
+
+    return cache_stats
+
+
+def _get_router_memory_stats(llm_router) -> Dict[str, Any]:
+    """Get memory usage statistics for LiteLLM router."""
+    litellm_router_memory: Dict[str, Any] = {}
+    try:
+        if llm_router is not None:
+            # Model list memory size
+            if hasattr(llm_router, "model_list") and llm_router.model_list:
+                model_list_size = sys.getsizeof(llm_router.model_list)
+                litellm_router_memory["model_list"] = {
+                    "num_models": len(llm_router.model_list),
+                    "size_bytes": model_list_size,
+                    "size_mb": round(model_list_size / (1024 * 1024), 4),
+                }
+
+            # Model names set
+            if hasattr(llm_router, "model_names") and llm_router.model_names:
+                model_names_size = sys.getsizeof(llm_router.model_names)
+                litellm_router_memory["model_names_set"] = {
+                    "num_model_groups": len(llm_router.model_names),
+                    "size_bytes": model_names_size,
+                    "size_mb": round(model_names_size / (1024 * 1024), 4),
+                }
+
+            # Deployment names list
+            if hasattr(llm_router, "deployment_names") and llm_router.deployment_names:
+                deployment_names_size = sys.getsizeof(llm_router.deployment_names)
+                litellm_router_memory["deployment_names"] = {
+                    "num_deployments": len(llm_router.deployment_names),
+                    "size_bytes": deployment_names_size,
+                    "size_mb": round(deployment_names_size / (1024 * 1024), 4),
+                }
+
+            # Deployment latency map
+            if (
+                hasattr(llm_router, "deployment_latency_map")
+                and llm_router.deployment_latency_map
+            ):
+                latency_map_size = sys.getsizeof(llm_router.deployment_latency_map)
+                litellm_router_memory["deployment_latency_map"] = {
+                    "num_tracked_deployments": len(llm_router.deployment_latency_map),
+                    "size_bytes": latency_map_size,
+                    "size_mb": round(latency_map_size / (1024 * 1024), 4),
+                }
+
+            # Fallback configuration
+            if hasattr(llm_router, "fallbacks") and llm_router.fallbacks:
+                fallbacks_size = sys.getsizeof(llm_router.fallbacks)
+                litellm_router_memory["fallbacks"] = {
+                    "num_fallback_configs": len(llm_router.fallbacks),
+                    "size_bytes": fallbacks_size,
+                    "size_mb": round(fallbacks_size / (1024 * 1024), 4),
+                }
+
+            # Total router object size
+            router_obj_size = sys.getsizeof(llm_router)
+            litellm_router_memory["router_object"] = {
+                "size_bytes": router_obj_size,
+                "size_mb": round(router_obj_size / (1024 * 1024), 4),
+            }
+
+        else:
+            litellm_router_memory = {"note": "Router not initialized"}
+    except Exception as e:
+        verbose_proxy_logger.debug(f"Error getting router memory info: {e}")
+        litellm_router_memory = {"error": str(e)}
+
+    return litellm_router_memory
+
+
+def _get_process_memory_info(
+    worker_pid: int, include_process_info: bool
+) -> Optional[Dict[str, Any]]:
+    """Get process-level memory information using psutil."""
+    if not include_process_info:
+        return None
+
+    try:
+        import psutil
+
+        process = psutil.Process()
+        memory_info = process.memory_info()
+        ram_usage_mb = round(memory_info.rss / (1024 * 1024), 2)
+        virtual_memory_mb = round(memory_info.vms / (1024 * 1024), 2)
+        memory_percent = round(process.memory_percent(), 2)
+
+        return {
+            "pid": worker_pid,
+            "summary": f"Worker PID {worker_pid} using {ram_usage_mb:.1f} MB of RAM ({memory_percent:.1f}% of system memory)",
+            "ram_usage": {
+                "megabytes": ram_usage_mb,
+                "description": "Actual physical RAM used by this process",
+            },
+            "virtual_memory": {
+                "megabytes": virtual_memory_mb,
+                "description": "Total virtual memory allocated (includes swapped memory)",
+            },
+            "system_memory_percent": {
+                "percent": memory_percent,
+                "description": "Percentage of total system RAM being used",
+            },
+            "open_file_handles": {
+                "count": process.num_fds()
+                if hasattr(process, "num_fds")
+                else "N/A (Windows)",
+                "description": "Number of open file descriptors/handles",
+            },
+            "threads": {
+                "count": process.num_threads(),
+                "description": "Number of active threads in this process",
+            },
+        }
+    except ImportError:
+        return {
+            "pid": worker_pid,
+            "error": "psutil not installed. Install with: pip install psutil",
+        }
+    except Exception as e:
+        verbose_proxy_logger.debug(f"Error getting process info: {e}")
+        return {"pid": worker_pid, "error": str(e)}
+
+
+@router.get("/debug/memory/details", include_in_schema=False)
+async def get_memory_details(
+    _: UserAPIKeyAuth = Depends(user_api_key_auth),
+    top_n: int = Query(20, description="Number of top object types to return"),
+    include_process_info: bool = Query(True, description="Include process memory info"),
+) -> Dict[str, Any]:
+    """
+    Get detailed memory diagnostics for deep debugging.
+
+    Returns:
+    - worker_pid: Process ID
+    - process_memory: RAM usage, virtual memory, file handles, threads
+    - garbage_collector: GC thresholds, counts, collection history
+    - objects: Total tracked objects and top object types
+    - uncollectable: Objects that can't be garbage collected (potential leaks)
+    - cache_memory: Memory usage of user_api_key, router, and logging caches
+    - router_memory: Memory usage of router components (model_list, deployment_names, etc.)
+
+    Query Parameters:
+    - top_n: Number of top object types to return (default: 20)
+    - include_process_info: Include process-level memory info using psutil (default: true)
+
+    Example usage:
+    curl "http://localhost:4000/debug/memory/details?top_n=30" -H "Authorization: Bearer sk-1234"
+
+    All memory sizes are reported in both bytes and MB.
+    """
+    from litellm.proxy.proxy_server import (
+        llm_router,
+        proxy_logging_obj,
+        user_api_key_cache,
+        redis_usage_cache,
+    )
+
+    worker_pid = os.getpid()
+
+    # Collect all diagnostics using helper functions
+    gc_stats = _get_gc_statistics()
+    total_objects, top_object_types = _get_object_type_counts(top_n)
+    uncollectable_info = _get_uncollectable_objects_info()
+    cache_stats = _get_cache_memory_stats(
+        user_api_key_cache, llm_router, proxy_logging_obj, redis_usage_cache
+    )
+    litellm_router_memory = _get_router_memory_stats(llm_router)
+    process_info = _get_process_memory_info(worker_pid, include_process_info)
+
+    return {
+        "worker_pid": worker_pid,
+        "process_memory": process_info,
+        "garbage_collector": gc_stats,
+        "objects": {
+            "total_tracked": total_objects,
+            "total_tracked_readable": f"{total_objects:,}",
+            "top_types": top_object_types,
+        },
+        "uncollectable": uncollectable_info,
+        "cache_memory": cache_stats,
+        "router_memory": litellm_router_memory,
+    }
+
+
+@router.post("/debug/memory/gc/configure", include_in_schema=False)
+async def configure_gc_thresholds_endpoint(
+    _: UserAPIKeyAuth = Depends(user_api_key_auth),
+    generation_0: int = Query(700, description="Generation 0 threshold (default: 700)"),
+    generation_1: int = Query(10, description="Generation 1 threshold (default: 10)"),
+    generation_2: int = Query(10, description="Generation 2 threshold (default: 10)"),
+) -> Dict[str, Any]:
+    """
+    Configure Python garbage collection thresholds.
+
+    Lower thresholds mean more frequent GC cycles (less memory, more CPU overhead).
+    Higher thresholds mean less frequent GC cycles (more memory, less CPU overhead).
+
+    Returns:
+    - message: Confirmation message
+    - previous_thresholds: Old threshold values
+    - new_thresholds: New threshold values
+    - objects_awaiting_collection: Current object count in gen-0
+    - tip: Hint about when next collection will occur
+
+    Query Parameters:
+    - generation_0: Number of allocations before gen-0 collection (default: 700)
+    - generation_1: Number of gen-0 collections before gen-1 collection (default: 10)
+    - generation_2: Number of gen-1 collections before gen-2 collection (default: 10)
+
+    Example for more aggressive collection:
+    curl -X POST "http://localhost:4000/debug/memory/gc/configure?generation_0=500" -H "Authorization: Bearer sk-1234"
+
+    Example for less aggressive collection:
+    curl -X POST "http://localhost:4000/debug/memory/gc/configure?generation_0=1000" -H "Authorization: Bearer sk-1234"
+
+    Monitor memory usage with GET /debug/memory/summary after changes.
+    """
+    # Get current thresholds for logging
+    old_thresholds = gc.get_threshold()
+
+    # Set new thresholds with error handling
+    try:
+        gc.set_threshold(generation_0, generation_1, generation_2)
+        verbose_proxy_logger.info(
+            f"GC thresholds updated from {old_thresholds} to "
+            f"({generation_0}, {generation_1}, {generation_2})"
+        )
+    except Exception as e:
+        verbose_proxy_logger.error(f"Failed to set GC thresholds: {e}")
+        raise HTTPException(
+            status_code=500, detail=f"Failed to set GC thresholds: {str(e)}"
+        )
+
+    # Get current object count to show immediate impact
+    current_count = gc.get_count()[0]
+
+    return {
+        "message": "GC thresholds updated",
+        "previous_thresholds": f"{old_thresholds[0]}, {old_thresholds[1]}, {old_thresholds[2]}",
+        "new_thresholds": f"{generation_0}, {generation_1}, {generation_2}",
+        "objects_awaiting_collection": current_count,
+        "tip": f"Next collection will run after {generation_0 - current_count} more allocations",
+    }
+
+
+@router.get("/otel-spans", include_in_schema=False)
+async def get_otel_spans():
+    from litellm.proxy.proxy_server import open_telemetry_logger
+
+    if open_telemetry_logger is None:
+        return {
+            "otel_spans": [],
+            "spans_grouped_by_parent": {},
+            "most_recent_parent": None,
+        }
+
+    otel_exporter = open_telemetry_logger.OTEL_EXPORTER
+    if hasattr(otel_exporter, "get_finished_spans"):
+        recorded_spans = otel_exporter.get_finished_spans()  # type: ignore
+    else:
+        recorded_spans = []
+
+    print("Spans: ", recorded_spans)  # noqa
+
+    most_recent_parent = None
+    most_recent_start_time = 1000000
+    spans_grouped_by_parent = {}
+    for span in recorded_spans:
+        if span.parent is not None:
+            parent_trace_id = span.parent.trace_id
+            if parent_trace_id not in spans_grouped_by_parent:
+                spans_grouped_by_parent[parent_trace_id] = []
+            spans_grouped_by_parent[parent_trace_id].append(span.name)
+
+            # check time of span
+            if span.start_time > most_recent_start_time:
+                most_recent_parent = parent_trace_id
+                most_recent_start_time = span.start_time
+
+    # these are otel spans - get the span name
+    span_names = [span.name for span in recorded_spans]
+    return {
+        "otel_spans": span_names,
+        "spans_grouped_by_parent": spans_grouped_by_parent,
+        "most_recent_parent": most_recent_parent,
+    }
+
+
+# Helper functions for debugging
+def init_verbose_loggers():
+    try:
+        worker_config = get_secret_str("WORKER_CONFIG")
+        # if not, assume it's a json string
+        if worker_config is None:
+            return
+        if os.path.isfile(worker_config):
+            return
+        _settings = json.loads(worker_config)
+        if not isinstance(_settings, dict):
+            return
+
+        debug = _settings.get("debug", None)
+        detailed_debug = _settings.get("detailed_debug", None)
+        if debug is True:  # this needs to be first, so users can see Router init debugg
+            import logging
+
+            from litellm._logging import (
+                verbose_logger,
+                verbose_proxy_logger,
+                verbose_router_logger,
+            )
+
+            # this must ALWAYS remain logging.INFO, DO NOT MODIFY THIS
+            verbose_logger.setLevel(level=logging.INFO)  # sets package logs to info
+            verbose_router_logger.setLevel(
+                level=logging.INFO
+            )  # set router logs to info
+            verbose_proxy_logger.setLevel(level=logging.INFO)  # set proxy logs to info
+        if detailed_debug is True:
+            import logging
+
+            from litellm._logging import (
+                verbose_logger,
+                verbose_proxy_logger,
+                verbose_router_logger,
+            )
+
+            verbose_logger.setLevel(level=logging.DEBUG)  # set package log to debug
+            verbose_router_logger.setLevel(
+                level=logging.DEBUG
+            )  # set router logs to debug
+            verbose_proxy_logger.setLevel(
+                level=logging.DEBUG
+            )  # set proxy logs to debug
+        elif debug is False and detailed_debug is False:
+            # users can control proxy debugging using env variable = 'LITELLM_LOG'
+            litellm_log_setting = os.environ.get("LITELLM_LOG", "")
+            if litellm_log_setting is not None:
+                if litellm_log_setting.upper() == "INFO":
+                    import logging
+
+                    from litellm._logging import (
+                        verbose_proxy_logger,
+                        verbose_router_logger,
+                    )
+
+                    # this must ALWAYS remain logging.INFO, DO NOT MODIFY THIS
+
+                    verbose_router_logger.setLevel(
+                        level=logging.INFO
+                    )  # set router logs to info
+                    verbose_proxy_logger.setLevel(
+                        level=logging.INFO
+                    )  # set proxy logs to info
+                elif litellm_log_setting.upper() == "DEBUG":
+                    import logging
+
+                    from litellm._logging import (
+                        verbose_proxy_logger,
+                        verbose_router_logger,
+                    )
+
+                    verbose_router_logger.setLevel(
+                        level=logging.DEBUG
+                    )  # set router logs to info
+                    verbose_proxy_logger.setLevel(
+                        level=logging.DEBUG
+                    )  # set proxy logs to debug
+    except Exception as e:
+        import logging
+
+        logging.warning(f"Failed to init verbose loggers: {str(e)}")