chore: initial snapshot for gitea/github upload
This commit is contained in:
@@ -0,0 +1,275 @@
|
||||
#### OCR Endpoints #####
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, Optional, cast
|
||||
|
||||
import orjson
|
||||
from fastapi import APIRouter, Depends, Request, Response, UploadFile
|
||||
from fastapi.responses import ORJSONResponse
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.ocr.main import convert_file_document_to_url_document, get_mime_type
|
||||
from litellm.proxy._types import *
|
||||
from litellm.proxy.auth.user_api_key_auth import UserAPIKeyAuth, user_api_key_auth
|
||||
from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def _build_document_from_upload(
|
||||
file_content: bytes,
|
||||
filename: Optional[str],
|
||||
content_type: Optional[str],
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
Convert uploaded file bytes into a Mistral-format document dict with base64 data URI.
|
||||
|
||||
Delegates to convert_file_document_to_url_document after resolving MIME type
|
||||
from the upload's content_type header or filename.
|
||||
"""
|
||||
mime_type = content_type.split(";")[0].strip() if content_type else None
|
||||
if not mime_type or mime_type == "application/octet-stream":
|
||||
if filename:
|
||||
mime_type = get_mime_type(filename)
|
||||
|
||||
return convert_file_document_to_url_document(
|
||||
{
|
||||
"type": "file",
|
||||
"file": file_content,
|
||||
"mime_type": mime_type or "application/octet-stream",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def _parse_multipart_form(request: Request) -> Dict[str, Any]:
|
||||
"""
|
||||
Extract OCR data from a multipart form request.
|
||||
|
||||
Uses the cached form if already parsed by auth middleware,
|
||||
otherwise parses the form from the request.
|
||||
|
||||
Returns:
|
||||
A dict with 'document', 'model', and any other OCR params.
|
||||
"""
|
||||
try:
|
||||
form = await request.form()
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
f"Failed to parse multipart form data: {str(e)}. "
|
||||
"When using curl with --form/-F, do NOT set the Content-Type header "
|
||||
"manually — curl will set it automatically with the required boundary."
|
||||
)
|
||||
|
||||
uploaded_file = form.get("file")
|
||||
# request.form() may return either a FastAPI or Starlette UploadFile
|
||||
# depending on middleware; check both via isinstance (FastAPI's UploadFile
|
||||
# is a subclass of Starlette's) and fall back to duck-type check.
|
||||
if uploaded_file is None or (
|
||||
not isinstance(uploaded_file, UploadFile) and not hasattr(uploaded_file, "read")
|
||||
):
|
||||
raise ValueError(
|
||||
"Multipart OCR request must include a 'file' field with the document to process"
|
||||
)
|
||||
|
||||
uploaded_file = cast(UploadFile, uploaded_file)
|
||||
|
||||
# Seek to start in case the file was already partially read by middleware
|
||||
await uploaded_file.seek(0)
|
||||
file_content = await uploaded_file.read()
|
||||
if not file_content:
|
||||
raise ValueError("Uploaded file is empty")
|
||||
|
||||
document = _build_document_from_upload(
|
||||
file_content=file_content,
|
||||
filename=uploaded_file.filename,
|
||||
content_type=uploaded_file.content_type,
|
||||
)
|
||||
|
||||
data: Dict[str, Any] = {"document": document}
|
||||
|
||||
for field_name, field_value in form.items():
|
||||
if field_name in ("file", "document"):
|
||||
continue
|
||||
# Try to parse JSON values (e.g. pages=[0,1,2])
|
||||
if isinstance(field_value, str):
|
||||
try:
|
||||
data[field_name] = json.loads(field_value)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
data[field_name] = field_value
|
||||
else:
|
||||
data[field_name] = field_value
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
f"OCR multipart form request parsed - model: {data.get('model')}, "
|
||||
f"document_type: {document['type']}, "
|
||||
f"filename: {uploaded_file.filename}"
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
async def _parse_ocr_request(request: Request) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse an OCR request, supporting both JSON and multipart form data.
|
||||
|
||||
JSON body (existing behavior):
|
||||
{
|
||||
"model": "mistral/mistral-ocr-latest",
|
||||
"document": {"type": "document_url", "document_url": "https://..."}
|
||||
}
|
||||
|
||||
Multipart form data (new):
|
||||
- file: the uploaded file
|
||||
- model: model name (form field)
|
||||
- Any other OCR params as form fields (pages, include_image_base64, etc.)
|
||||
|
||||
Returns:
|
||||
A dict suitable for passing to the OCR processing pipeline.
|
||||
"""
|
||||
content_type = request.headers.get("content-type", "")
|
||||
|
||||
if "multipart/form-data" in content_type.lower():
|
||||
return await _parse_multipart_form(request)
|
||||
|
||||
# --- JSON body (existing behavior) ---
|
||||
try:
|
||||
body = await request.body()
|
||||
except RuntimeError:
|
||||
# Body stream was consumed by auth middleware (e.g., form parsing).
|
||||
body = b""
|
||||
|
||||
if not body:
|
||||
# The body may be empty because the auth middleware already parsed
|
||||
# it as form data (e.g., _read_request_body called request.form()).
|
||||
# Check if form data is available.
|
||||
if getattr(request, "_form", None) is not None:
|
||||
verbose_proxy_logger.debug(
|
||||
"OCR request body is empty but form data is available from middleware — "
|
||||
"processing as multipart form."
|
||||
)
|
||||
return await _parse_multipart_form(request)
|
||||
|
||||
raise ValueError(
|
||||
"Empty request body. For file uploads, use multipart/form-data content type "
|
||||
"with a file field. When using curl with --form/-F, do NOT set the Content-Type "
|
||||
"header manually."
|
||||
)
|
||||
|
||||
try:
|
||||
data = orjson.loads(body)
|
||||
except orjson.JSONDecodeError as e:
|
||||
raise ValueError(
|
||||
f"Invalid JSON in request body: {e}. "
|
||||
"Ensure the request body is valid JSON with Content-Type: application/json, "
|
||||
"or use multipart/form-data for file uploads."
|
||||
)
|
||||
|
||||
# Security: reject type="file" documents received via JSON.
|
||||
# The "file" document type is designed for local SDK usage where the
|
||||
# caller and the process share a filesystem. In the proxy context the
|
||||
# caller is remote, so allowing a file-path string would let an
|
||||
# authenticated user read arbitrary files from the server's filesystem.
|
||||
# File uploads must go through multipart/form-data instead.
|
||||
doc = data.get("document") if isinstance(data, dict) else None
|
||||
if isinstance(doc, dict) and doc.get("type") == "file":
|
||||
raise ValueError(
|
||||
"document type 'file' is not supported through the JSON API. "
|
||||
"To upload a local file, use multipart/form-data with a 'file' field. "
|
||||
"For JSON requests, use 'document_url' or 'image_url' document types."
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
@router.post(
|
||||
"/v1/ocr",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_class=ORJSONResponse,
|
||||
tags=["ocr"],
|
||||
)
|
||||
@router.post(
|
||||
"/ocr",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_class=ORJSONResponse,
|
||||
tags=["ocr"],
|
||||
)
|
||||
async def ocr(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
OCR endpoint for extracting text from documents and images.
|
||||
|
||||
Supports two input modes:
|
||||
|
||||
**1. JSON body** (Mistral OCR API compatible):
|
||||
```bash
|
||||
curl -X POST "http://localhost:4000/v1/ocr" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "mistral-ocr",
|
||||
"document": {
|
||||
"type": "document_url",
|
||||
"document_url": "https://arxiv.org/pdf/2201.04234"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
**2. Multipart form file upload**:
|
||||
```bash
|
||||
curl -X POST "http://localhost:4000/v1/ocr" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-F "model=mistral-ocr" \
|
||||
-F "file=@document.pdf"
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
general_settings,
|
||||
llm_router,
|
||||
proxy_config,
|
||||
proxy_logging_obj,
|
||||
select_data_generator,
|
||||
user_api_base,
|
||||
user_max_tokens,
|
||||
user_model,
|
||||
user_request_timeout,
|
||||
user_temperature,
|
||||
version,
|
||||
)
|
||||
|
||||
data: dict = {}
|
||||
try:
|
||||
# Parse request body (JSON or multipart form)
|
||||
data = await _parse_ocr_request(request)
|
||||
|
||||
# Process request using ProxyBaseLLMRequestProcessing
|
||||
processor = ProxyBaseLLMRequestProcessing(data=data)
|
||||
|
||||
return await processor.base_process_llm_request(
|
||||
request=request,
|
||||
fastapi_response=fastapi_response,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
route_type="aocr",
|
||||
proxy_logging_obj=proxy_logging_obj,
|
||||
llm_router=llm_router,
|
||||
general_settings=general_settings,
|
||||
proxy_config=proxy_config,
|
||||
select_data_generator=select_data_generator,
|
||||
model=None,
|
||||
user_model=user_model,
|
||||
user_temperature=user_temperature,
|
||||
user_request_timeout=user_request_timeout,
|
||||
user_max_tokens=user_max_tokens,
|
||||
user_api_base=user_api_base,
|
||||
version=version,
|
||||
)
|
||||
except Exception as e:
|
||||
processor = ProxyBaseLLMRequestProcessing(data=data)
|
||||
raise await processor._handle_llm_api_exception(
|
||||
e=e,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
proxy_logging_obj=proxy_logging_obj,
|
||||
version=version,
|
||||
)
|
||||
Reference in New Issue
Block a user