Fix: Use test query ONCE for readiness, detects when docs actually searchable
This commit is contained in:
parent
32d0548475
commit
ffbd7e2924
2 changed files with 35 additions and 33 deletions
|
|
@ -101,7 +101,7 @@ if not documents:
|
|||
# Check if pipeline is actually ready
|
||||
from background_tasks import get_notebook_processing_tasks, TaskStatus
|
||||
from document_manager import get_latest_document_summary
|
||||
from pipeline_manager import check_pipeline_status_direct
|
||||
from pipeline_manager import check_pipeline_ready_with_test
|
||||
import asyncio as aio
|
||||
import time
|
||||
|
||||
|
|
@ -138,8 +138,8 @@ if f'pipeline_confirmed_ready_{notebook.id}' not in st.session_state:
|
|||
|
||||
st.stop()
|
||||
|
||||
with st.spinner("🔍 Checking pipeline status..."):
|
||||
readiness = asyncio.run(check_pipeline_status_direct(notebook.pipeline_id))
|
||||
with st.spinner("🔍 Checking if documents are searchable..."):
|
||||
readiness = asyncio.run(check_pipeline_ready_with_test(notebook.pipeline_id, notebook.model_type))
|
||||
|
||||
# If ready, mark as confirmed and don't test again
|
||||
if readiness.get('ready', False):
|
||||
|
|
|
|||
|
|
@ -85,53 +85,55 @@ async def add_document_to_pipeline(pipeline_id: str, file_path: str) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
async def check_pipeline_status_direct(pipeline_id: str) -> dict:
|
||||
async def check_pipeline_ready_with_test(pipeline_id: str, model_type: str) -> dict:
|
||||
"""
|
||||
Check pipeline status directly from LlamaCloud API (no test queries)
|
||||
Check if pipeline is actually ready by doing ONE simple test query
|
||||
Only called during readiness check, not during chat
|
||||
|
||||
Returns:
|
||||
dict with 'ready' (bool), 'status' (str), 'doc_count' (int), 'indexed_count' (int)
|
||||
dict with 'ready' (bool), 'status' (str), 'message' (str)
|
||||
"""
|
||||
try:
|
||||
# First check if pipeline has documents
|
||||
client = AsyncLlamaCloud(token=os.getenv("LLAMACLOUD_API_KEY"))
|
||||
|
||||
# Get pipeline details
|
||||
pipeline = await client.pipelines.get_pipeline(pipeline_id=pipeline_id)
|
||||
|
||||
# Count documents in pipeline
|
||||
doc_count = 0
|
||||
indexed_count = 0
|
||||
if hasattr(pipeline, 'configured_transformation') and hasattr(pipeline.configured_transformation, 'data_sources'):
|
||||
doc_count = len(pipeline.configured_transformation.data_sources)
|
||||
|
||||
if hasattr(pipeline, 'configured_transformation'):
|
||||
transform = pipeline.configured_transformation
|
||||
if hasattr(transform, 'data_sources'):
|
||||
doc_count = len(transform.data_sources)
|
||||
if doc_count == 0:
|
||||
return {
|
||||
'ready': False,
|
||||
'status': 'No documents',
|
||||
'message': 'No documents in pipeline yet'
|
||||
}
|
||||
|
||||
# Check each document's status
|
||||
for source in transform.data_sources:
|
||||
# If source has been processed, it's indexed
|
||||
# LlamaCloud doesn't expose explicit "indexed" status,
|
||||
# so we assume if it's in data_sources, it's processed
|
||||
indexed_count += 1
|
||||
# Documents exist - now test if they're actually searchable
|
||||
print(f"Testing pipeline {pipeline_id} with simple query...")
|
||||
test_response = await query_notebook_pipeline(pipeline_id, "Summarize this document briefly", model_type)
|
||||
|
||||
# Pipeline is ready if it has documents
|
||||
is_ready = doc_count > 0 and indexed_count > 0
|
||||
|
||||
return {
|
||||
'ready': is_ready,
|
||||
'status': 'Ready' if is_ready else 'Indexing',
|
||||
'doc_count': doc_count,
|
||||
'indexed_count': indexed_count,
|
||||
'message': f'{indexed_count}/{doc_count} documents indexed' if doc_count > 0 else 'No documents in pipeline'
|
||||
}
|
||||
# Check if we got a real response
|
||||
if test_response and "Empty Response" not in test_response and "Error:" not in test_response and len(test_response) > 50:
|
||||
print(f"✓ Pipeline ready - got real response")
|
||||
return {
|
||||
'ready': True,
|
||||
'status': 'Ready',
|
||||
'message': f'{doc_count} document(s) indexed and searchable'
|
||||
}
|
||||
else:
|
||||
print(f"⏳ Pipeline not ready yet - got: {test_response[:100] if test_response else 'None'}")
|
||||
return {
|
||||
'ready': False,
|
||||
'status': 'Still indexing',
|
||||
'message': f'{doc_count} document(s) added but not searchable yet'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error checking pipeline status: {e}")
|
||||
print(f"Error checking readiness: {e}")
|
||||
return {
|
||||
'ready': False,
|
||||
'status': 'Error',
|
||||
'doc_count': 0,
|
||||
'indexed_count': 0,
|
||||
'message': f'Error: {str(e)[:100]}'
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue