On server restart, stale active jobs are automatically resumed rather than failed. Docs already parsed in a prior run are skipped (resume from cache), docs stuck at 'parsing' are reset to 'pending' and re-parsed. - Repository: add get_all_stale_active_jobs() and reset_stuck_parsing_docs() - Service: skip already-parsed docs in _parse_doc(), reset stuck docs on start - Main: recover stale jobs via asyncio.create_task() in lifespan startup Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
166 lines
6.4 KiB
Python
Executable file
166 lines
6.4 KiB
Python
Executable file
import logging
|
|
from contextlib import asynccontextmanager
|
|
|
|
from fastapi import FastAPI, Depends
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
from app.config import settings
|
|
from app.dependencies.auth import get_current_user
|
|
from app.models.database import init_db, close_db
|
|
from app.api import router as api_router, kb_router, analysis_router
|
|
from app.services.gemini_service import GeminiService
|
|
from app.services.reference_docs import ReferenceDocsService
|
|
from app.services.analysis_service import AnalysisService
|
|
from app.services.knowledge_base_service import KnowledgeBaseService
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
datefmt="%Y-%m-%d %H:%M:%S"
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class HealthCheckFilter(logging.Filter):
|
|
"""Filter out health check endpoint logs from uvicorn access log."""
|
|
def filter(self, record: logging.LogRecord) -> bool:
|
|
if "GET /health" in record.getMessage():
|
|
return False
|
|
return True
|
|
|
|
|
|
logging.getLogger("uvicorn.access").addFilter(HealthCheckFilter())
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
"""Initialize services on startup and cleanup on shutdown."""
|
|
# Validate settings
|
|
settings.validate()
|
|
|
|
# Initialize database
|
|
print("Initializing database connection...")
|
|
db_available = False
|
|
try:
|
|
await init_db()
|
|
print("Database initialized successfully")
|
|
db_available = True
|
|
except Exception as e:
|
|
logger.warning(f"Database initialization failed (may not be available): {e}")
|
|
print("Warning: Database not available - running in stateless mode")
|
|
|
|
# Initialize services
|
|
print("Loading reference documents...")
|
|
reference_docs = ReferenceDocsService(settings.REFERENCE_DOCS_PATH)
|
|
|
|
if db_available:
|
|
try:
|
|
from app.models.database import async_session_factory
|
|
async with async_session_factory() as session:
|
|
print("Loading specs from database...")
|
|
await reference_docs.load_specs_from_db(session)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to load specs from DB (falling back to files): {e}")
|
|
|
|
doc_summary = reference_docs.get_context_summary()
|
|
print(f" Brand documents: {len(doc_summary['brand_files'])} files ({doc_summary['brand_context_length']} chars)")
|
|
print(f" Channel documents: {len(doc_summary['channel_files'])} files ({doc_summary['channel_context_length']} chars)")
|
|
|
|
print("Initializing Gemini service...")
|
|
gemini_service = GeminiService(settings.GEMINI_API_KEY)
|
|
|
|
print("Initializing analysis service...")
|
|
analysis_service = AnalysisService(gemini_service, reference_docs)
|
|
app.state.analysis_service = analysis_service
|
|
|
|
# Initialize Knowledge Base service (requires LlamaParse API key)
|
|
knowledge_base_service = None
|
|
if settings.LLAMA_CLOUD_API_KEY:
|
|
from app.services.llamaparse_service import LlamaParseService
|
|
print("Initializing LlamaParse service...")
|
|
llamaparse_service = LlamaParseService(settings.LLAMA_CLOUD_API_KEY, settings.LLAMA_CLOUD_BASE_URL)
|
|
knowledge_base_service = KnowledgeBaseService(llamaparse_service, gemini_service, reference_docs)
|
|
app.state.knowledge_base_service = knowledge_base_service
|
|
print("Knowledge Base pipeline ready!")
|
|
|
|
if db_available:
|
|
import asyncio as _asyncio
|
|
from app.models.database import async_session_factory as _session_factory
|
|
from app.repositories.knowledge_base_repository import KnowledgeBaseRepository as _KBRepo
|
|
try:
|
|
async with _session_factory() as _session:
|
|
_repo = _KBRepo(_session)
|
|
_stale = await _repo.get_all_stale_active_jobs()
|
|
if _stale:
|
|
print(f"Resuming {len(_stale)} interrupted processing job(s)...")
|
|
for _job in _stale:
|
|
_kb = await _repo.get_knowledge_base(_job.knowledge_base_id)
|
|
if _kb:
|
|
_asyncio.create_task(
|
|
knowledge_base_service.process_documents(
|
|
kb_id=_job.knowledge_base_id,
|
|
job_id=_job.id,
|
|
agent_key=_kb.agent_key,
|
|
user_id=_job.triggered_by_id,
|
|
user_name=_job.triggered_by_name,
|
|
)
|
|
)
|
|
print(f" Resumed: {_kb.display_name} (job {_job.id}, was {_job.status})")
|
|
except Exception as _e:
|
|
logger.warning(f"Startup job recovery failed: {_e}")
|
|
else:
|
|
print("LLAMA_CLOUD_API_KEY not set - Knowledge Base processing pipeline disabled")
|
|
|
|
print("Backend ready!")
|
|
|
|
yield
|
|
|
|
# Cleanup on shutdown
|
|
print("Shutting down...")
|
|
await close_db()
|
|
|
|
|
|
# Create FastAPI app
|
|
app = FastAPI(
|
|
title="ModComms Proof Review API",
|
|
description="AI-powered proof review backend for Barclays marketing materials",
|
|
version="1.0.0",
|
|
lifespan=lifespan,
|
|
)
|
|
|
|
# CORS middleware
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=settings.CORS_ORIGINS.split(","),
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
# Include API routes
|
|
app.include_router(api_router, prefix="/api")
|
|
app.include_router(kb_router, prefix="/api")
|
|
app.include_router(analysis_router, prefix="/api")
|
|
|
|
|
|
@app.get("/health")
|
|
async def health_check():
|
|
"""Health check endpoint."""
|
|
return {"status": "healthy", "service": "modcomms-backend"}
|
|
|
|
|
|
@app.get("/info")
|
|
async def info(user: dict = Depends(get_current_user)):
|
|
"""Get backend information. Requires authentication."""
|
|
analysis_service = getattr(app.state, "analysis_service", None)
|
|
if analysis_service:
|
|
ref_docs = analysis_service.reference_docs
|
|
doc_summary = ref_docs.get_context_summary()
|
|
return {
|
|
"status": "ready",
|
|
"user": user.get("name", "Unknown"),
|
|
"agents": ["Risk & Control Agent", "Brand Agent", "Channel Best Practices Agent", "Channel Tech Specs Agent"],
|
|
"reference_docs": doc_summary,
|
|
}
|
|
return {"status": "initializing", "user": user.get("name", "Unknown")}
|