modcomms/backend/app/main.py
Vadym Samoilenko 1982d5d76e feat(knowledge-base): smart resume for interrupted processing jobs
On server restart, stale active jobs are automatically resumed rather
than failed. Docs already parsed in a prior run are skipped (resume from
cache), docs stuck at 'parsing' are reset to 'pending' and re-parsed.

- Repository: add get_all_stale_active_jobs() and reset_stuck_parsing_docs()
- Service: skip already-parsed docs in _parse_doc(), reset stuck docs on start
- Main: recover stale jobs via asyncio.create_task() in lifespan startup

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-15 10:20:35 +01:00

166 lines
6.4 KiB
Python
Executable file

import logging
from contextlib import asynccontextmanager
from fastapi import FastAPI, Depends
from fastapi.middleware.cors import CORSMiddleware
from app.config import settings
from app.dependencies.auth import get_current_user
from app.models.database import init_db, close_db
from app.api import router as api_router, kb_router, analysis_router
from app.services.gemini_service import GeminiService
from app.services.reference_docs import ReferenceDocsService
from app.services.analysis_service import AnalysisService
from app.services.knowledge_base_service import KnowledgeBaseService
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)
logger = logging.getLogger(__name__)
class HealthCheckFilter(logging.Filter):
"""Filter out health check endpoint logs from uvicorn access log."""
def filter(self, record: logging.LogRecord) -> bool:
if "GET /health" in record.getMessage():
return False
return True
logging.getLogger("uvicorn.access").addFilter(HealthCheckFilter())
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Initialize services on startup and cleanup on shutdown."""
# Validate settings
settings.validate()
# Initialize database
print("Initializing database connection...")
db_available = False
try:
await init_db()
print("Database initialized successfully")
db_available = True
except Exception as e:
logger.warning(f"Database initialization failed (may not be available): {e}")
print("Warning: Database not available - running in stateless mode")
# Initialize services
print("Loading reference documents...")
reference_docs = ReferenceDocsService(settings.REFERENCE_DOCS_PATH)
if db_available:
try:
from app.models.database import async_session_factory
async with async_session_factory() as session:
print("Loading specs from database...")
await reference_docs.load_specs_from_db(session)
except Exception as e:
logger.warning(f"Failed to load specs from DB (falling back to files): {e}")
doc_summary = reference_docs.get_context_summary()
print(f" Brand documents: {len(doc_summary['brand_files'])} files ({doc_summary['brand_context_length']} chars)")
print(f" Channel documents: {len(doc_summary['channel_files'])} files ({doc_summary['channel_context_length']} chars)")
print("Initializing Gemini service...")
gemini_service = GeminiService(settings.GEMINI_API_KEY)
print("Initializing analysis service...")
analysis_service = AnalysisService(gemini_service, reference_docs)
app.state.analysis_service = analysis_service
# Initialize Knowledge Base service (requires LlamaParse API key)
knowledge_base_service = None
if settings.LLAMA_CLOUD_API_KEY:
from app.services.llamaparse_service import LlamaParseService
print("Initializing LlamaParse service...")
llamaparse_service = LlamaParseService(settings.LLAMA_CLOUD_API_KEY, settings.LLAMA_CLOUD_BASE_URL)
knowledge_base_service = KnowledgeBaseService(llamaparse_service, gemini_service, reference_docs)
app.state.knowledge_base_service = knowledge_base_service
print("Knowledge Base pipeline ready!")
if db_available:
import asyncio as _asyncio
from app.models.database import async_session_factory as _session_factory
from app.repositories.knowledge_base_repository import KnowledgeBaseRepository as _KBRepo
try:
async with _session_factory() as _session:
_repo = _KBRepo(_session)
_stale = await _repo.get_all_stale_active_jobs()
if _stale:
print(f"Resuming {len(_stale)} interrupted processing job(s)...")
for _job in _stale:
_kb = await _repo.get_knowledge_base(_job.knowledge_base_id)
if _kb:
_asyncio.create_task(
knowledge_base_service.process_documents(
kb_id=_job.knowledge_base_id,
job_id=_job.id,
agent_key=_kb.agent_key,
user_id=_job.triggered_by_id,
user_name=_job.triggered_by_name,
)
)
print(f" Resumed: {_kb.display_name} (job {_job.id}, was {_job.status})")
except Exception as _e:
logger.warning(f"Startup job recovery failed: {_e}")
else:
print("LLAMA_CLOUD_API_KEY not set - Knowledge Base processing pipeline disabled")
print("Backend ready!")
yield
# Cleanup on shutdown
print("Shutting down...")
await close_db()
# Create FastAPI app
app = FastAPI(
title="ModComms Proof Review API",
description="AI-powered proof review backend for Barclays marketing materials",
version="1.0.0",
lifespan=lifespan,
)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=settings.CORS_ORIGINS.split(","),
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Include API routes
app.include_router(api_router, prefix="/api")
app.include_router(kb_router, prefix="/api")
app.include_router(analysis_router, prefix="/api")
@app.get("/health")
async def health_check():
"""Health check endpoint."""
return {"status": "healthy", "service": "modcomms-backend"}
@app.get("/info")
async def info(user: dict = Depends(get_current_user)):
"""Get backend information. Requires authentication."""
analysis_service = getattr(app.state, "analysis_service", None)
if analysis_service:
ref_docs = analysis_service.reference_docs
doc_summary = ref_docs.get_context_summary()
return {
"status": "ready",
"user": user.get("name", "Unknown"),
"agents": ["Risk & Control Agent", "Brand Agent", "Channel Best Practices Agent", "Channel Tech Specs Agent"],
"reference_docs": doc_summary,
}
return {"status": "initializing", "user": user.get("name", "Unknown")}