Oliver-ai-bot_2.0/backend/cloud_run_service.py

"""
Cloud Run HTTP service for document processing.

Endpoints:
  GET  /health              → Health check
  POST /process-document    → Extract text + chunk → return {"chunks": [...]} (stateless, no Qdrant)
"""
import logging
from typing import Optional

from fastapi import FastAPI, File, Form, UploadFile, HTTPException

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(title="Nexus Document Processor")


@app.get("/health")
def health():
    return {"status": "ok"}


@app.post("/process-document")
async def process_document(
    file: UploadFile = File(...),
    file_name: str = Form(...),
    file_type: str = Form(...),
    sharepoint_id: str = Form(...),
    department_id: Optional[str] = Form(None),
    region_code: Optional[str] = Form(None),
):
    """
    Extract text and chunk a document. Returns chunks as JSON — no Qdrant, no embeddings.
    The calling backend (on GCE) handles embed + upsert to Qdrant.
    """
    from app.core.document_processor import DocumentProcessor, DocumentProcessingError

    try:
        file_bytes = await file.read()
        processor = DocumentProcessor()
        chunks = processor.extract_and_chunk(file_bytes, file_name, file_type)
        logger.info("Extracted %d chunks from '%s' (type=%s)", len(chunks), file_name, file_type)
        return {
            "status": "completed",
            "chunks": chunks,
            "total_chunks": len(chunks),
        }
    except DocumentProcessingError as e:
        logger.error("Document processing failed: %s", e)
        raise HTTPException(status_code=422, detail=str(e))
    except Exception as e:
        logger.exception("Unexpected error processing document: %s", e)
        raise HTTPException(status_code=500, detail=str(e))