Files
lijiaoqiao/llm-gateway-competitors/litellm-wheel-src/litellm/proxy/hooks/litellm_skills/main.py
2026-03-26 20:06:14 +08:00

915 lines
34 KiB
Python

"""
Skills Injection Hook for LiteLLM Proxy
Main hook that orchestrates skill processing:
- Fetches skills from LiteLLM DB
- Injects SKILL.md content into system prompt
- Adds litellm_code_execution tool for automatic code execution
- Handles agentic loop internally when litellm_code_execution is called
For non-Anthropic models (e.g., Bedrock, OpenAI, etc.):
- Skills are converted to OpenAI-style tools
- Skill file content (SKILL.md) is extracted and injected into the system prompt
- litellm_code_execution tool is added - when model calls it, LiteLLM handles
execution automatically and returns final response with file_ids
Usage:
# Simple - LiteLLM handles everything automatically via proxy
# The container parameter triggers the SkillsInjectionHook
response = await litellm.acompletion(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "Create a bouncing ball GIF"}],
container={"skills": [{"skill_id": "litellm:skill_abc123"}]},
)
# Response includes file_ids for generated files
"""
import base64
import json
from typing import Any, Dict, List, Optional, Union
from litellm._logging import verbose_proxy_logger
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.llms.litellm_proxy.skills.prompt_injection import (
SkillPromptInjectionHandler,
)
from litellm.proxy._types import LiteLLM_SkillsTable, UserAPIKeyAuth
from litellm.types.utils import CallTypes, CallTypesLiteral
class SkillsInjectionHook(CustomLogger):
"""
Pre/Post-call hook that processes skills from container.skills parameter.
Pre-call (async_pre_call_hook):
- Skills with 'litellm:' prefix are fetched from LiteLLM DB
- For Anthropic models: native skills pass through, LiteLLM skills converted to tools
- For non-Anthropic models: LiteLLM skills are converted to tools + execute_code tool
Post-call (async_post_call_success_deployment_hook):
- If response has litellm_code_execution tool call, automatically execute code
- Continue conversation loop until model gives final response
- Return response with generated files inline
This hook is called automatically by litellm during completion calls.
"""
def __init__(self, **kwargs):
from litellm.llms.litellm_proxy.skills.constants import (
DEFAULT_MAX_ITERATIONS,
DEFAULT_SANDBOX_TIMEOUT,
)
self.optional_params = kwargs
self.prompt_handler = SkillPromptInjectionHandler()
self.max_iterations = kwargs.get("max_iterations", DEFAULT_MAX_ITERATIONS)
self.sandbox_timeout = kwargs.get("sandbox_timeout", DEFAULT_SANDBOX_TIMEOUT)
super().__init__(**kwargs)
async def async_pre_call_hook(
self,
user_api_key_dict: UserAPIKeyAuth,
cache: DualCache,
data: dict,
call_type: CallTypesLiteral,
) -> Optional[Union[Exception, str, dict]]:
"""
Process skills from container.skills before the LLM call.
1. Check if container.skills exists in request
2. Separate skills by prefix (litellm: vs native)
3. Fetch LiteLLM skills from database
4. For Anthropic: keep native skills in container
5. For non-Anthropic: convert LiteLLM skills to tools, inject content, add execute_code
"""
# Only process completion-type calls
if call_type not in ["completion", "acompletion", "anthropic_messages"]:
return data
container = data.get("container")
if not container or not isinstance(container, dict):
return data
skills = container.get("skills")
if not skills or not isinstance(skills, list):
return data
verbose_proxy_logger.debug(
f"SkillsInjectionHook: Processing {len(skills)} skills"
)
litellm_skills: List[LiteLLM_SkillsTable] = []
anthropic_skills: List[Dict[str, Any]] = []
# Separate skills by prefix
for skill in skills:
if not isinstance(skill, dict):
continue
skill_id = skill.get("skill_id", "")
if skill_id.startswith("litellm_"):
# Fetch from LiteLLM DB
db_skill = await self._fetch_skill_from_db(skill_id)
if db_skill:
litellm_skills.append(db_skill)
else:
verbose_proxy_logger.warning(
f"SkillsInjectionHook: Skill '{skill_id}' not found in LiteLLM DB"
)
else:
# Native Anthropic skill - pass through
anthropic_skills.append(skill)
# Check if using messages API spec (anthropic_messages call type)
# Messages API always uses Anthropic-style tool format
use_anthropic_format = call_type == "anthropic_messages"
if len(litellm_skills) > 0:
data = self._process_for_messages_api(
data=data,
litellm_skills=litellm_skills,
use_anthropic_format=use_anthropic_format,
)
return data
def _process_for_messages_api(
self,
data: dict,
litellm_skills: List[LiteLLM_SkillsTable],
use_anthropic_format: bool = True,
) -> dict:
"""
Process skills for messages API (Anthropic format tools).
- Converts skills to Anthropic-style tools (name, description, input_schema)
- Extracts and injects SKILL.md content into system prompt
- Adds litellm_code_execution tool for code execution
- Stores skill files in metadata for sandbox execution
"""
from litellm.llms.litellm_proxy.skills.code_execution import (
get_litellm_code_execution_tool_anthropic,
)
tools = data.get("tools", [])
skill_contents: List[str] = []
all_skill_files: Dict[str, Dict[str, bytes]] = {}
all_module_paths: List[str] = []
for skill in litellm_skills:
# Convert skill to Anthropic-style tool
tools.append(self.prompt_handler.convert_skill_to_anthropic_tool(skill))
# Extract skill content from file if available
content = self.prompt_handler.extract_skill_content(skill)
if content:
skill_contents.append(content)
# Extract all files for code execution
skill_files = self.prompt_handler.extract_all_files(skill)
if skill_files:
all_skill_files[skill.skill_id] = skill_files
for path in skill_files.keys():
if path.endswith(".py"):
all_module_paths.append(path)
if tools:
data["tools"] = tools
# Inject skill content into system prompt
# For Anthropic messages API, use top-level 'system' param instead of messages array
if skill_contents:
data = self.prompt_handler.inject_skill_content_to_messages(
data, skill_contents, use_anthropic_format=use_anthropic_format
)
# Add litellm_code_execution tool if we have skill files
if all_skill_files:
code_exec_tool = get_litellm_code_execution_tool_anthropic()
data["tools"] = data.get("tools", []) + [code_exec_tool]
# Store skill files in litellm_metadata for automatic code execution
data["litellm_metadata"] = data.get("litellm_metadata", {})
data["litellm_metadata"]["_skill_files"] = all_skill_files
data["litellm_metadata"]["_litellm_code_execution_enabled"] = True
# Remove container (not supported by underlying providers)
data.pop("container", None)
verbose_proxy_logger.debug(
f"SkillsInjectionHook: Messages API - converted {len(litellm_skills)} skills to Anthropic tools, "
f"injected {len(skill_contents)} skill contents, "
f"added litellm_code_execution tool with {len(all_module_paths)} modules"
)
return data
def _process_non_anthropic_model(
self,
data: dict,
litellm_skills: List[LiteLLM_SkillsTable],
) -> dict:
"""
Process skills for non-Anthropic models (OpenAI format tools).
- Converts skills to OpenAI-style tools
- Extracts and injects SKILL.md content
- Adds execute_code tool for code execution
- Stores skill files in metadata for sandbox execution
"""
tools = data.get("tools", [])
skill_contents: List[str] = []
all_skill_files: Dict[str, Dict[str, bytes]] = {}
all_module_paths: List[str] = []
for skill in litellm_skills:
# Convert skill to OpenAI-style tool
tools.append(self.prompt_handler.convert_skill_to_tool(skill))
# Extract skill content from file if available
content = self.prompt_handler.extract_skill_content(skill)
if content:
skill_contents.append(content)
# Extract all files for code execution
skill_files = self.prompt_handler.extract_all_files(skill)
if skill_files:
all_skill_files[skill.skill_id] = skill_files
# Collect Python module paths
for path in skill_files.keys():
if path.endswith(".py"):
all_module_paths.append(path)
if tools:
data["tools"] = tools
# Inject skill content into system prompt
if skill_contents:
data = self.prompt_handler.inject_skill_content_to_messages(
data, skill_contents
)
# Add litellm_code_execution tool if we have skill files
if all_skill_files:
from litellm.llms.litellm_proxy.skills.code_execution import (
get_litellm_code_execution_tool,
)
data["tools"] = data.get("tools", []) + [get_litellm_code_execution_tool()]
# Store skill files in litellm_metadata for automatic code execution
# Using litellm_metadata instead of metadata to avoid conflicts with user metadata
data["litellm_metadata"] = data.get("litellm_metadata", {})
data["litellm_metadata"]["_skill_files"] = all_skill_files
data["litellm_metadata"]["_litellm_code_execution_enabled"] = True
# Remove container for non-Anthropic (they don't support it)
data.pop("container", None)
verbose_proxy_logger.debug(
f"SkillsInjectionHook: Non-Anthropic model - converted {len(litellm_skills)} skills to tools, "
f"injected {len(skill_contents)} skill contents, "
f"added execute_code tool with {len(all_module_paths)} modules"
)
return data
async def _fetch_skill_from_db(
self, skill_id: str
) -> Optional[LiteLLM_SkillsTable]:
"""
Fetch a skill from the LiteLLM database.
Args:
skill_id: The skill ID (without 'litellm:' prefix)
Returns:
LiteLLM_SkillsTable or None if not found
"""
try:
from litellm.llms.litellm_proxy.skills.handler import LiteLLMSkillsHandler
return await LiteLLMSkillsHandler.fetch_skill_from_db(skill_id)
except Exception as e:
verbose_proxy_logger.warning(
f"SkillsInjectionHook: Error fetching skill {skill_id}: {e}"
)
return None
def _is_anthropic_model(self, model: str) -> bool:
"""
Check if the model is an Anthropic model using get_llm_provider.
Args:
model: The model name/identifier
Returns:
True if Anthropic model, False otherwise
"""
try:
from litellm.litellm_core_utils.get_llm_provider_logic import (
get_llm_provider,
)
_, custom_llm_provider, _, _ = get_llm_provider(model=model)
return custom_llm_provider == "anthropic"
except Exception:
# Fallback to simple check if get_llm_provider fails
return "claude" in model.lower() or model.lower().startswith("anthropic/")
async def async_post_call_success_deployment_hook(
self,
request_data: dict,
response: Any,
call_type: Optional[CallTypes],
) -> Optional[Any]:
"""
Post-call hook to handle automatic code execution.
Handles both OpenAI format (response.choices) and Anthropic/messages API
format (response["content"]).
If the response contains a tool call (litellm_code_execution or skill tool):
1. Execute the code in sandbox
2. Add result to messages
3. Make another LLM call
4. Repeat until model gives final response
5. Return modified response with generated files
"""
from litellm.llms.litellm_proxy.skills.code_execution import (
LiteLLMInternalTools,
)
# Check if code execution is enabled for this request
litellm_metadata = request_data.get("litellm_metadata") or {}
metadata = request_data.get("metadata") or {}
code_exec_enabled = litellm_metadata.get(
"_litellm_code_execution_enabled"
) or metadata.get("_litellm_code_execution_enabled")
if not code_exec_enabled:
return None
# Get skill files
skill_files_by_id = litellm_metadata.get("_skill_files") or metadata.get(
"_skill_files", {}
)
all_skill_files: Dict[str, bytes] = {}
for files_dict in skill_files_by_id.values():
all_skill_files.update(files_dict)
if not all_skill_files:
verbose_proxy_logger.warning(
"SkillsInjectionHook: No skill files found, cannot execute code"
)
return None
# Check for tool calls - handle both Anthropic and OpenAI formats
tool_calls = self._extract_tool_calls(response)
if not tool_calls:
return None
# Check if any tool call needs execution (litellm_code_execution or skill tool)
has_executable_tool = False
for tc in tool_calls:
tool_name = tc.get("name", "")
# Execute if it's litellm_code_execution OR a skill tool (skill_xxx)
if (
tool_name == LiteLLMInternalTools.CODE_EXECUTION.value
or tool_name.startswith("skill_")
):
has_executable_tool = True
break
if not has_executable_tool:
return None
verbose_proxy_logger.debug(
"SkillsInjectionHook: Detected tool call, starting execution loop"
)
# Start the agentic loop
return await self._execute_code_loop_messages_api(
data=request_data,
response=response,
skill_files=all_skill_files,
)
def _extract_tool_calls(self, response: Any) -> List[Dict[str, Any]]:
"""Extract tool calls from response, handling both formats."""
tool_calls = []
# Get content - handle both dict and object responses
content = None
if isinstance(response, dict):
content = response.get("content", [])
elif hasattr(response, "content"):
content = response.content
# Anthropic/messages API format: response has "content" list with tool_use blocks
if content:
for block in content:
if isinstance(block, dict) and block.get("type") == "tool_use":
tool_calls.append(
{
"id": block.get("id"),
"name": block.get("name"),
"input": block.get("input", {}),
}
)
elif (
hasattr(block, "type")
and getattr(block, "type", None) == "tool_use"
):
tool_calls.append(
{
"id": getattr(block, "id", None),
"name": getattr(block, "name", None),
"input": getattr(block, "input", {}),
}
)
# OpenAI format: response has choices[0].message.tool_calls
if not tool_calls and hasattr(response, "choices") and response.choices: # type: ignore[union-attr]
msg = response.choices[0].message # type: ignore[union-attr]
if hasattr(msg, "tool_calls") and msg.tool_calls:
for tc in msg.tool_calls:
tool_calls.append(
{
"id": tc.id,
"name": tc.function.name,
"input": json.loads(tc.function.arguments)
if tc.function.arguments
else {},
}
)
return tool_calls
async def _execute_code_loop_messages_api(
self,
data: dict,
response: Any,
skill_files: Dict[str, bytes],
) -> Any:
"""
Execute the code execution loop for messages API (Anthropic format).
Returns the final response with generated files inline.
"""
import litellm
from litellm.llms.litellm_proxy.skills.code_execution import (
LiteLLMInternalTools,
)
from litellm.llms.litellm_proxy.skills.sandbox_executor import (
SkillsSandboxExecutor,
)
# Ensure response is not None
if response is None:
verbose_proxy_logger.error(
"SkillsInjectionHook: Response is None, cannot execute code loop"
)
return None
model = data.get("model", "")
messages = list(data.get("messages", []))
tools = data.get("tools", [])
max_tokens = data.get("max_tokens", 4096)
executor = SkillsSandboxExecutor(timeout=self.sandbox_timeout)
generated_files: List[Dict[str, Any]] = []
current_response = response
for iteration in range(self.max_iterations):
# Extract tool calls from current response
tool_calls = self._extract_tool_calls(current_response)
stop_reason = (
current_response.get("stop_reason")
if isinstance(current_response, dict)
else getattr(current_response, "stop_reason", None)
)
# Get content for assistant message - convert to plain dicts
raw_content = (
current_response.get("content", [])
if isinstance(current_response, dict)
else getattr(current_response, "content", [])
)
content_blocks = []
for block in raw_content or []:
if isinstance(block, dict):
content_blocks.append(block)
elif hasattr(block, "model_dump"):
content_blocks.append(block.model_dump())
elif hasattr(block, "__dict__"):
content_blocks.append(dict(block.__dict__))
else:
content_blocks.append({"type": "text", "text": str(block)})
# Build assistant message for conversation history (Anthropic format)
assistant_msg = {"role": "assistant", "content": content_blocks}
messages.append(assistant_msg)
# Check if we're done (no tool calls)
if stop_reason != "tool_use" or not tool_calls:
verbose_proxy_logger.debug(
f"SkillsInjectionHook: Loop completed after {iteration + 1} iterations, "
f"{len(generated_files)} files generated"
)
return self._attach_files_to_response(current_response, generated_files)
# Process tool calls
tool_results = []
for tc in tool_calls:
tool_name = tc.get("name", "")
tool_id = tc.get("id", "")
tool_input = tc.get("input", {})
# Execute if it's litellm_code_execution OR a skill tool
if tool_name == LiteLLMInternalTools.CODE_EXECUTION.value:
code = tool_input.get("code", "")
result = await self._execute_code(
code, skill_files, executor, generated_files
)
elif tool_name.startswith("skill_"):
# Skill tool - execute the skill's code
result = await self._execute_skill_tool(
tool_name, tool_input, skill_files, executor, generated_files
)
else:
result = f"Tool '{tool_name}' not handled"
tool_results.append(
{
"type": "tool_result",
"tool_use_id": tool_id,
"content": result,
}
)
# Add tool results to messages (Anthropic format)
messages.append({"role": "user", "content": tool_results})
# Make next LLM call
verbose_proxy_logger.debug(
f"SkillsInjectionHook: Making LLM call iteration {iteration + 2}"
)
try:
current_response = await litellm.anthropic.acreate(
model=model,
messages=messages,
tools=tools,
max_tokens=max_tokens,
)
if current_response is None:
verbose_proxy_logger.error(
"SkillsInjectionHook: LLM call returned None"
)
return self._attach_files_to_response(response, generated_files)
except Exception as e:
verbose_proxy_logger.error(f"SkillsInjectionHook: LLM call failed: {e}")
return self._attach_files_to_response(response, generated_files)
verbose_proxy_logger.warning(
f"SkillsInjectionHook: Max iterations ({self.max_iterations}) reached"
)
return self._attach_files_to_response(current_response, generated_files)
async def _execute_code(
self,
code: str,
skill_files: Dict[str, bytes],
executor: Any,
generated_files: List[Dict[str, Any]],
) -> str:
"""Execute code in sandbox and return result string."""
try:
verbose_proxy_logger.debug(
f"SkillsInjectionHook: Executing code ({len(code)} chars)"
)
exec_result = executor.execute(code=code, skill_files=skill_files)
result = exec_result.get("output", "") or ""
# Collect generated files
if exec_result.get("files"):
for f in exec_result["files"]:
generated_files.append(
{
"name": f["name"],
"mime_type": f["mime_type"],
"content_base64": f["content_base64"],
"size": len(base64.b64decode(f["content_base64"])),
}
)
result += f"\n\nGenerated file: {f['name']}"
if exec_result.get("error"):
result += f"\n\nError: {exec_result['error']}"
return result or "Code executed successfully"
except Exception as e:
return f"Code execution failed: {str(e)}"
async def _execute_skill_tool(
self,
tool_name: str,
tool_input: Dict[str, Any],
skill_files: Dict[str, bytes],
executor: Any,
generated_files: List[Dict[str, Any]],
) -> str:
"""Execute a skill tool by generating and running code based on skill content."""
# Generate code based on available skill modules
# Look for Python modules in the skill
python_modules = [
p
for p in skill_files.keys()
if p.endswith(".py") and not p.endswith("__init__.py")
]
# Try to find the main builder/creator module
main_module = None
for mod in python_modules:
if (
"builder" in mod.lower()
or "creator" in mod.lower()
or "generator" in mod.lower()
):
main_module = mod
break
if not main_module and python_modules:
# Use first non-init module
main_module = python_modules[0]
if main_module:
# Convert path to import: "core/gif_builder.py" -> "core.gif_builder"
import_path = main_module.replace("/", ".").replace(".py", "")
# Generate code that imports and uses the module
code = f"""
# Auto-generated code to execute skill
import sys
sys.path.insert(0, '/sandbox')
from {import_path} import *
# Try to find and use a Builder/Creator class
import inspect
module = __import__('{import_path}', fromlist=[''])
for name, obj in inspect.getmembers(module):
if inspect.isclass(obj) and name != 'object':
try:
instance = obj()
# Try common methods
if hasattr(instance, 'create'):
result = instance.create()
elif hasattr(instance, 'build'):
result = instance.build()
elif hasattr(instance, 'generate'):
result = instance.generate()
elif hasattr(instance, 'save'):
instance.save('output.gif')
print(f'Used {{name}} class')
break
except Exception as e:
print(f'Error with {{name}}: {{e}}')
continue
# List generated files
import os
for f in os.listdir('.'):
if f.endswith(('.gif', '.png', '.jpg')):
print(f'Generated: {{f}}')
"""
else:
# Fallback generic code
code = """
print('No executable skill module found')
"""
return await self._execute_code(code, skill_files, executor, generated_files)
async def _execute_code_loop(
self,
data: dict,
response: Any,
skill_files: Dict[str, bytes],
) -> Any:
"""
Execute the code execution loop until model gives final response.
Returns the final response with generated files inline.
"""
import litellm
from litellm.llms.litellm_proxy.skills.code_execution import (
LiteLLMInternalTools,
)
from litellm.llms.litellm_proxy.skills.sandbox_executor import (
SkillsSandboxExecutor,
)
model = data.get("model", "")
messages = list(data.get("messages", []))
tools = data.get("tools", [])
# Keys to exclude when passing through to acompletion
# These are either handled explicitly or are internal LiteLLM fields
_EXCLUDED_ACOMPLETION_KEYS = frozenset(
{
"messages",
"model",
"tools",
"metadata",
"litellm_metadata",
"container",
}
)
kwargs = {k: v for k, v in data.items() if k not in _EXCLUDED_ACOMPLETION_KEYS}
executor = SkillsSandboxExecutor(timeout=self.sandbox_timeout)
generated_files: List[Dict[str, Any]] = []
current_response: Any = response
for iteration in range(self.max_iterations):
# OpenAI format response has choices[0].message
assistant_message = current_response.choices[0].message # type: ignore[union-attr]
stop_reason = current_response.choices[0].finish_reason # type: ignore[union-attr]
# Build assistant message for conversation history
assistant_msg_dict: Dict[str, Any] = {
"role": "assistant",
"content": assistant_message.content,
}
if assistant_message.tool_calls:
assistant_msg_dict["tool_calls"] = [
{
"id": tc.id,
"type": "function",
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments,
},
}
for tc in assistant_message.tool_calls
]
messages.append(assistant_msg_dict)
# Check if we're done (no tool calls)
if stop_reason != "tool_calls" or not assistant_message.tool_calls:
verbose_proxy_logger.debug(
f"SkillsInjectionHook: Code execution loop completed after "
f"{iteration + 1} iterations, {len(generated_files)} files generated"
)
# Attach generated files to response
return self._attach_files_to_response(current_response, generated_files)
# Process tool calls
for tool_call in assistant_message.tool_calls:
tool_name = tool_call.function.name
if tool_name == LiteLLMInternalTools.CODE_EXECUTION.value:
tool_result = await self._execute_code_tool(
tool_call=tool_call,
skill_files=skill_files,
executor=executor,
generated_files=generated_files,
)
else:
# Non-code-execution tool - cannot handle
tool_result = f"Tool '{tool_name}' not handled automatically"
messages.append(
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": tool_result,
}
)
# Make next LLM call using the messages API
verbose_proxy_logger.debug(
f"SkillsInjectionHook: Making LLM call iteration {iteration + 2}"
)
current_response = await litellm.anthropic.acreate(
model=model,
messages=messages,
tools=tools,
max_tokens=kwargs.get("max_tokens", 4096),
)
# Max iterations reached
verbose_proxy_logger.warning(
f"SkillsInjectionHook: Max iterations ({self.max_iterations}) reached"
)
return self._attach_files_to_response(current_response, generated_files)
async def _execute_code_tool(
self,
tool_call: Any,
skill_files: Dict[str, bytes],
executor: Any,
generated_files: List[Dict[str, Any]],
) -> str:
"""Execute a litellm_code_execution tool call and return result string."""
try:
args = json.loads(tool_call.function.arguments)
code = args.get("code", "")
verbose_proxy_logger.debug(
f"SkillsInjectionHook: Executing code ({len(code)} chars)"
)
exec_result = executor.execute(
code=code,
skill_files=skill_files,
)
# Build tool result content
tool_result = exec_result.get("output", "") or ""
# Collect generated files
if exec_result.get("files"):
tool_result += "\n\nGenerated files:"
for f in exec_result["files"]:
file_content = base64.b64decode(f["content_base64"])
generated_files.append(
{
"name": f["name"],
"mime_type": f["mime_type"],
"content_base64": f["content_base64"],
"size": len(file_content),
}
)
tool_result += f"\n- {f['name']} ({len(file_content)} bytes)"
verbose_proxy_logger.debug(
f"SkillsInjectionHook: Generated file {f['name']} "
f"({len(file_content)} bytes)"
)
if exec_result.get("error"):
tool_result += f"\n\nError:\n{exec_result['error']}"
return tool_result
except Exception as e:
verbose_proxy_logger.error(
f"SkillsInjectionHook: Code execution failed: {e}"
)
return f"Code execution failed: {str(e)}"
def _attach_files_to_response(
self,
response: Any,
generated_files: List[Dict[str, Any]],
) -> Any:
"""
Attach generated files to the response object.
Files are added to response._litellm_generated_files for easy access.
For dict responses, files are added as a key.
"""
if not generated_files:
return response
# Handle dict response (Anthropic/messages API format)
if isinstance(response, dict):
response["_litellm_generated_files"] = generated_files
verbose_proxy_logger.debug(
f"SkillsInjectionHook: Attached {len(generated_files)} files to dict response"
)
return response
# Handle object response (OpenAI format)
try:
response._litellm_generated_files = generated_files
except AttributeError:
pass
# Also add to model_extra if available (for serialization)
if hasattr(response, "model_extra"):
if response.model_extra is None:
response.model_extra = {}
response.model_extra["_litellm_generated_files"] = generated_files
verbose_proxy_logger.debug(
f"SkillsInjectionHook: Attached {len(generated_files)} files to response"
)
return response
# Global instance for registration
skills_injection_hook = SkillsInjectionHook()
import litellm
litellm.logging_callback_manager.add_litellm_callback(skills_injection_hook)