Oliver-ai-bot_2.0/backend/cloud_run_service.py
Vadym Samoilenko fa87976517 debug: add INFO logging for chunk count in extract_and_chunk and Cloud Run service
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-05 19:11:56 +00:00

54 lines
1.7 KiB
Python

"""
Cloud Run HTTP service for document processing.
Endpoints:
GET /health → Health check
POST /process-document → Extract text + chunk → return {"chunks": [...]} (stateless, no Qdrant)
"""
import logging
from typing import Optional
from fastapi import FastAPI, File, Form, UploadFile, HTTPException
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI(title="Nexus Document Processor")
@app.get("/health")
def health():
return {"status": "ok"}
@app.post("/process-document")
async def process_document(
file: UploadFile = File(...),
file_name: str = Form(...),
file_type: str = Form(...),
sharepoint_id: str = Form(...),
department_id: Optional[str] = Form(None),
region_code: Optional[str] = Form(None),
):
"""
Extract text and chunk a document. Returns chunks as JSON — no Qdrant, no embeddings.
The calling backend (on GCE) handles embed + upsert to Qdrant.
"""
from app.core.document_processor import DocumentProcessor, DocumentProcessingError
try:
file_bytes = await file.read()
processor = DocumentProcessor()
chunks = processor.extract_and_chunk(file_bytes, file_name, file_type)
logger.info("Extracted %d chunks from '%s' (type=%s)", len(chunks), file_name, file_type)
return {
"status": "completed",
"chunks": chunks,
"total_chunks": len(chunks),
}
except DocumentProcessingError as e:
logger.error("Document processing failed: %s", e)
raise HTTPException(status_code=422, detail=str(e))
except Exception as e:
logger.exception("Unexpected error processing document: %s", e)
raise HTTPException(status_code=500, detail=str(e))