Fix document processing: fallback to LLM when LlamaExtract returns data=None

LlamaExtract can return a non-None response object but with data=None for certain PDFs, causing 'NoneType' object has no attribute 'get' on notebook_data. Now falls back to LLM extraction instead of failing the task. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-05 10:46:23 +01:00 · 2026-05-05 10:46:23 +01:00 · 974a66288e
commit 974a66288e
parent f39ddb269f
1 changed files with 4 additions and 4 deletions
--- a/backend/src/notebookllama/background_tasks.py
+++ b/backend/src/notebookllama/background_tasks.py
@ -621,13 +621,13 @@ async def execute_document_processing_task(task_id: int):
                )
                extract_duration = time.time() - extract_start

-                if extraction_output:
+                if extraction_output and extraction_output.data:
                    notebook_data = extraction_output.data
                    logger.info(f"  ✓ [LLAMAEXTRACT] aextract → Success ({extract_duration:.1f}s)")
                else:
-                    logger.error(f"  ✗ [LLAMAEXTRACT] aextract → No data returned ({extract_duration:.1f}s)")
-                    update_task_status(task_id, TaskStatus.FAILED, error="LlamaExtract failed")
-                    return
+                    logger.warning(f"  ⚠ [LLAMAEXTRACT] aextract → No data returned ({extract_duration:.1f}s), falling back to LLM extraction")
+                    from llm_extraction import extract_with_llm
+                    notebook_data = await extract_with_llm(text, original_filename, notebook.model_type)
            except (httpx.RemoteProtocolError, httpx.ReadTimeout, httpx.ConnectError) as e:
                # Network errors during extraction - provide helpful error message
                logger.error(f"✗ Network error during extraction: {e}")