Replace test queries with direct pipeline status check - no more wasted API calls

This commit is contained in:
DJP 2025-10-02 13:11:26 -04:00
parent bca37e5759
commit f63e91a3c8
2 changed files with 40 additions and 25 deletions

View file

@ -101,7 +101,7 @@ if not documents:
# Check if pipeline is actually ready
from background_tasks import get_notebook_processing_tasks, TaskStatus
from document_manager import get_latest_document_summary
from pipeline_manager import check_pipeline_actually_ready
from pipeline_manager import check_pipeline_status_direct
import asyncio as aio
import time
@ -138,8 +138,8 @@ if f'pipeline_confirmed_ready_{notebook.id}' not in st.session_state:
st.stop()
with st.spinner("🔍 Testing if pipeline is ready..."):
readiness = asyncio.run(check_pipeline_actually_ready(notebook.pipeline_id, notebook.model_type))
with st.spinner("🔍 Checking pipeline status..."):
readiness = asyncio.run(check_pipeline_status_direct(notebook.pipeline_id))
# If ready, mark as confirmed and don't test again
if readiness.get('ready', False):

View file

@ -85,39 +85,54 @@ async def add_document_to_pipeline(pipeline_id: str, file_path: str) -> bool:
return False
async def check_pipeline_actually_ready(pipeline_id: str, model_type: str = 'gpt4') -> dict:
async def check_pipeline_status_direct(pipeline_id: str) -> dict:
"""
Actually test if pipeline is ready by attempting a simple query
Check pipeline status directly from LlamaCloud API (no test queries)
Returns:
dict with 'ready' (bool), 'message' (str), 'test_response' (str)
dict with 'ready' (bool), 'status' (str), 'doc_count' (int), 'indexed_count' (int)
"""
try:
# Try a simple test query
test_response = await query_notebook_pipeline(pipeline_id, "Test query - what is this document about?", model_type)
client = AsyncLlamaCloud(token=os.getenv("LLAMACLOUD_API_KEY"))
# Check if we got a real response (not empty or error)
if test_response and "Empty Response" not in test_response and "Error:" not in test_response and "Sorry" not in test_response:
return {
'ready': True,
'status': 'Ready',
'message': 'Pipeline indexed and responding to queries',
'test_response': test_response[:100]
}
else:
return {
'ready': False,
'status': 'Still indexing',
'message': 'Pipeline not responding yet - documents still being indexed',
'test_response': test_response[:100] if test_response else 'No response'
}
# Get pipeline details
pipeline = await client.pipelines.get_pipeline(pipeline_id=pipeline_id)
# Count documents in pipeline
doc_count = 0
indexed_count = 0
if hasattr(pipeline, 'configured_transformation'):
transform = pipeline.configured_transformation
if hasattr(transform, 'data_sources'):
doc_count = len(transform.data_sources)
# Check each document's status
for source in transform.data_sources:
# If source has been processed, it's indexed
# LlamaCloud doesn't expose explicit "indexed" status,
# so we assume if it's in data_sources, it's processed
indexed_count += 1
# Pipeline is ready if it has documents
is_ready = doc_count > 0 and indexed_count > 0
return {
'ready': is_ready,
'status': 'Ready' if is_ready else 'Indexing',
'doc_count': doc_count,
'indexed_count': indexed_count,
'message': f'{indexed_count}/{doc_count} documents indexed' if doc_count > 0 else 'No documents in pipeline'
}
except Exception as e:
print(f"Error testing pipeline readiness: {e}")
print(f"Error checking pipeline status: {e}")
return {
'ready': False,
'status': 'Error',
'message': f'Could not test pipeline: {str(e)[:100]}'
'doc_count': 0,
'indexed_count': 0,
'message': f'Error: {str(e)[:100]}'
}