Fix document processing: fallback to LLM when LlamaExtract returns data=None
LlamaExtract can return a non-None response object but with data=None for certain PDFs, causing 'NoneType' object has no attribute 'get' on notebook_data. Now falls back to LLM extraction instead of failing the task. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
f39ddb269f
commit
974a66288e
1 changed files with 4 additions and 4 deletions
|
|
@ -621,13 +621,13 @@ async def execute_document_processing_task(task_id: int):
|
|||
)
|
||||
extract_duration = time.time() - extract_start
|
||||
|
||||
if extraction_output:
|
||||
if extraction_output and extraction_output.data:
|
||||
notebook_data = extraction_output.data
|
||||
logger.info(f" ✓ [LLAMAEXTRACT] aextract → Success ({extract_duration:.1f}s)")
|
||||
else:
|
||||
logger.error(f" ✗ [LLAMAEXTRACT] aextract → No data returned ({extract_duration:.1f}s)")
|
||||
update_task_status(task_id, TaskStatus.FAILED, error="LlamaExtract failed")
|
||||
return
|
||||
logger.warning(f" ⚠ [LLAMAEXTRACT] aextract → No data returned ({extract_duration:.1f}s), falling back to LLM extraction")
|
||||
from llm_extraction import extract_with_llm
|
||||
notebook_data = await extract_with_llm(text, original_filename, notebook.model_type)
|
||||
except (httpx.RemoteProtocolError, httpx.ReadTimeout, httpx.ConnectError) as e:
|
||||
# Network errors during extraction - provide helpful error message
|
||||
logger.error(f"✗ Network error during extraction: {e}")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue