Replace test queries with direct pipeline status check - no more wasted API calls
This commit is contained in:
parent
bca37e5759
commit
f63e91a3c8
2 changed files with 40 additions and 25 deletions
|
|
@ -101,7 +101,7 @@ if not documents:
|
|||
# Check if pipeline is actually ready
|
||||
from background_tasks import get_notebook_processing_tasks, TaskStatus
|
||||
from document_manager import get_latest_document_summary
|
||||
from pipeline_manager import check_pipeline_actually_ready
|
||||
from pipeline_manager import check_pipeline_status_direct
|
||||
import asyncio as aio
|
||||
import time
|
||||
|
||||
|
|
@ -138,8 +138,8 @@ if f'pipeline_confirmed_ready_{notebook.id}' not in st.session_state:
|
|||
|
||||
st.stop()
|
||||
|
||||
with st.spinner("🔍 Testing if pipeline is ready..."):
|
||||
readiness = asyncio.run(check_pipeline_actually_ready(notebook.pipeline_id, notebook.model_type))
|
||||
with st.spinner("🔍 Checking pipeline status..."):
|
||||
readiness = asyncio.run(check_pipeline_status_direct(notebook.pipeline_id))
|
||||
|
||||
# If ready, mark as confirmed and don't test again
|
||||
if readiness.get('ready', False):
|
||||
|
|
|
|||
|
|
@ -85,39 +85,54 @@ async def add_document_to_pipeline(pipeline_id: str, file_path: str) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
async def check_pipeline_actually_ready(pipeline_id: str, model_type: str = 'gpt4') -> dict:
|
||||
async def check_pipeline_status_direct(pipeline_id: str) -> dict:
|
||||
"""
|
||||
Actually test if pipeline is ready by attempting a simple query
|
||||
Check pipeline status directly from LlamaCloud API (no test queries)
|
||||
|
||||
Returns:
|
||||
dict with 'ready' (bool), 'message' (str), 'test_response' (str)
|
||||
dict with 'ready' (bool), 'status' (str), 'doc_count' (int), 'indexed_count' (int)
|
||||
"""
|
||||
try:
|
||||
# Try a simple test query
|
||||
test_response = await query_notebook_pipeline(pipeline_id, "Test query - what is this document about?", model_type)
|
||||
client = AsyncLlamaCloud(token=os.getenv("LLAMACLOUD_API_KEY"))
|
||||
|
||||
# Check if we got a real response (not empty or error)
|
||||
if test_response and "Empty Response" not in test_response and "Error:" not in test_response and "Sorry" not in test_response:
|
||||
return {
|
||||
'ready': True,
|
||||
'status': 'Ready',
|
||||
'message': 'Pipeline indexed and responding to queries',
|
||||
'test_response': test_response[:100]
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'ready': False,
|
||||
'status': 'Still indexing',
|
||||
'message': 'Pipeline not responding yet - documents still being indexed',
|
||||
'test_response': test_response[:100] if test_response else 'No response'
|
||||
}
|
||||
# Get pipeline details
|
||||
pipeline = await client.pipelines.get_pipeline(pipeline_id=pipeline_id)
|
||||
|
||||
# Count documents in pipeline
|
||||
doc_count = 0
|
||||
indexed_count = 0
|
||||
|
||||
if hasattr(pipeline, 'configured_transformation'):
|
||||
transform = pipeline.configured_transformation
|
||||
if hasattr(transform, 'data_sources'):
|
||||
doc_count = len(transform.data_sources)
|
||||
|
||||
# Check each document's status
|
||||
for source in transform.data_sources:
|
||||
# If source has been processed, it's indexed
|
||||
# LlamaCloud doesn't expose explicit "indexed" status,
|
||||
# so we assume if it's in data_sources, it's processed
|
||||
indexed_count += 1
|
||||
|
||||
# Pipeline is ready if it has documents
|
||||
is_ready = doc_count > 0 and indexed_count > 0
|
||||
|
||||
return {
|
||||
'ready': is_ready,
|
||||
'status': 'Ready' if is_ready else 'Indexing',
|
||||
'doc_count': doc_count,
|
||||
'indexed_count': indexed_count,
|
||||
'message': f'{indexed_count}/{doc_count} documents indexed' if doc_count > 0 else 'No documents in pipeline'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error testing pipeline readiness: {e}")
|
||||
print(f"Error checking pipeline status: {e}")
|
||||
return {
|
||||
'ready': False,
|
||||
'status': 'Error',
|
||||
'message': f'Could not test pipeline: {str(e)[:100]}'
|
||||
'doc_count': 0,
|
||||
'indexed_count': 0,
|
||||
'message': f'Error: {str(e)[:100]}'
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue