Run GraphRAG background init in a thread instead of asyncio task

Neo4j operations (connect, get_triplets, upsert_nodes,
build_communities) are synchronous blocking calls. When run as an
asyncio.ensure_future() task, they block the event loop and prevent
Hypercorn from binding to the port or accepting connections.

Fix: launch GraphRAG init in a daemon thread with its own event loop
so the main event loop stays free to serve HTTP requests immediately
after Phase 1 completes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
michael 2026-02-23 17:45:52 -06:00
parent 7cd8f26c37
commit 4ee2075eab

53
main.py
View file

@ -3,6 +3,7 @@
import asyncio
import os
import sys
import threading
from flask import Flask
from flask_cors import CORS
@ -56,6 +57,32 @@ register_routes(app)
log_structured('info', "Flask routes registered.")
def _launch_graphrag_background_thread():
"""Launch GraphRAG initialization in a daemon thread with its own event loop.
Neo4j operations (connect, get_triplets, upsert_nodes, build_communities)
are synchronous blocking calls. Running them in an asyncio task would block
the main event loop and prevent Hypercorn from serving requests. A separate
thread with its own event loop avoids this entirely.
"""
def _run():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
success = loop.run_until_complete(initialize_graphrag_components())
if success:
log_structured('info', "Background GraphRAG initialization completed successfully")
else:
log_structured('warning', "Background GraphRAG initialization failed — vector search still works")
except Exception as e:
log_structured('error', f"Background GraphRAG initialization error: {e}")
finally:
loop.close()
t = threading.Thread(target=_run, name="graphrag-init", daemon=True)
t.start()
# --- Startup Function ---
async def startup_event() -> bool:
"""Tasks to run when the application starts.
@ -94,22 +121,14 @@ async def startup_event() -> bool:
else:
log_structured('info', "Phase 1 complete: vector index and agent are available")
# 3. Phase 2: Launch GraphRAG initialization as a background task
# 3. Phase 2: Launch GraphRAG initialization in a background THREAD.
# This MUST run in a thread, not an asyncio task, because the Neo4j
# operations (connect, get_triplets, build_communities, etc.) are
# synchronous blocking calls. If run as an asyncio task, they block
# the event loop and prevent Hypercorn from accepting connections.
if vector_success:
log_structured('info', "Phase 2: Launching GraphRAG initialization in background...")
async def _background_graphrag_init():
try:
success = await initialize_graphrag_components()
if success:
log_structured('info', "Background GraphRAG initialization completed successfully")
else:
log_structured('warning', "Background GraphRAG initialization failed — vector search still works")
except Exception as e:
log_structured('error', f"Background GraphRAG initialization error: {e}")
# Schedule as a background task — does not block server startup
asyncio.ensure_future(_background_graphrag_init())
log_structured('info', "Phase 2: Launching GraphRAG initialization in background thread...")
_launch_graphrag_background_thread()
else:
log_structured('warning', "Skipping GraphRAG background init because vector init failed")
@ -176,8 +195,8 @@ if __name__ == '__main__':
log_structured('critical', "Emergency initialization failed. Server will run but chat will be impaired.")
else:
log_structured('info', "Emergency initialization succeeded.")
# Also try GraphRAG in background
asyncio.ensure_future(initialize_graphrag_components())
# Also try GraphRAG in background thread
_launch_graphrag_background_thread()
# Start serving — background GraphRAG init continues in same event loop
await hypercorn_serve(app, config)