diff --git a/src/notebookllama/pages/3_Notebook_Chat.py b/src/notebookllama/pages/3_Notebook_Chat.py index f00267c..65f55e4 100644 --- a/src/notebookllama/pages/3_Notebook_Chat.py +++ b/src/notebookllama/pages/3_Notebook_Chat.py @@ -101,7 +101,7 @@ if not documents: # Check if pipeline is actually ready from background_tasks import get_notebook_processing_tasks, TaskStatus from document_manager import get_latest_document_summary -from pipeline_manager import check_pipeline_actually_ready +from pipeline_manager import check_pipeline_status_direct import asyncio as aio import time @@ -138,8 +138,8 @@ if f'pipeline_confirmed_ready_{notebook.id}' not in st.session_state: st.stop() - with st.spinner("🔍 Testing if pipeline is ready..."): - readiness = asyncio.run(check_pipeline_actually_ready(notebook.pipeline_id, notebook.model_type)) + with st.spinner("🔍 Checking pipeline status..."): + readiness = asyncio.run(check_pipeline_status_direct(notebook.pipeline_id)) # If ready, mark as confirmed and don't test again if readiness.get('ready', False): diff --git a/src/notebookllama/pipeline_manager.py b/src/notebookllama/pipeline_manager.py index cecbdf4..ee8488d 100644 --- a/src/notebookllama/pipeline_manager.py +++ b/src/notebookllama/pipeline_manager.py @@ -85,39 +85,54 @@ async def add_document_to_pipeline(pipeline_id: str, file_path: str) -> bool: return False -async def check_pipeline_actually_ready(pipeline_id: str, model_type: str = 'gpt4') -> dict: +async def check_pipeline_status_direct(pipeline_id: str) -> dict: """ - Actually test if pipeline is ready by attempting a simple query + Check pipeline status directly from LlamaCloud API (no test queries) Returns: - dict with 'ready' (bool), 'message' (str), 'test_response' (str) + dict with 'ready' (bool), 'status' (str), 'doc_count' (int), 'indexed_count' (int) """ try: - # Try a simple test query - test_response = await query_notebook_pipeline(pipeline_id, "Test query - what is this document about?", model_type) + client = AsyncLlamaCloud(token=os.getenv("LLAMACLOUD_API_KEY")) - # Check if we got a real response (not empty or error) - if test_response and "Empty Response" not in test_response and "Error:" not in test_response and "Sorry" not in test_response: - return { - 'ready': True, - 'status': 'Ready', - 'message': 'Pipeline indexed and responding to queries', - 'test_response': test_response[:100] - } - else: - return { - 'ready': False, - 'status': 'Still indexing', - 'message': 'Pipeline not responding yet - documents still being indexed', - 'test_response': test_response[:100] if test_response else 'No response' - } + # Get pipeline details + pipeline = await client.pipelines.get_pipeline(pipeline_id=pipeline_id) + + # Count documents in pipeline + doc_count = 0 + indexed_count = 0 + + if hasattr(pipeline, 'configured_transformation'): + transform = pipeline.configured_transformation + if hasattr(transform, 'data_sources'): + doc_count = len(transform.data_sources) + + # Check each document's status + for source in transform.data_sources: + # If source has been processed, it's indexed + # LlamaCloud doesn't expose explicit "indexed" status, + # so we assume if it's in data_sources, it's processed + indexed_count += 1 + + # Pipeline is ready if it has documents + is_ready = doc_count > 0 and indexed_count > 0 + + return { + 'ready': is_ready, + 'status': 'Ready' if is_ready else 'Indexing', + 'doc_count': doc_count, + 'indexed_count': indexed_count, + 'message': f'{indexed_count}/{doc_count} documents indexed' if doc_count > 0 else 'No documents in pipeline' + } except Exception as e: - print(f"Error testing pipeline readiness: {e}") + print(f"Error checking pipeline status: {e}") return { 'ready': False, 'status': 'Error', - 'message': f'Could not test pipeline: {str(e)[:100]}' + 'doc_count': 0, + 'indexed_count': 0, + 'message': f'Error: {str(e)[:100]}' }