Log full details of failed LlamaParse pages for troubleshooting

Handle MarkdownPageFailedMarkdownPage objects gracefully by checking for
the markdown attribute with hasattr instead of assuming all pages have
it. Failed pages now log their type and all attributes so the actual
LlamaParse error is visible in logs.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
michael 2026-02-12 17:45:58 -06:00
parent 8a9a24ebe6
commit 49facbe713

View file

@ -57,9 +57,21 @@ class LlamaParseService:
# Extract markdown from all pages
if result.markdown and result.markdown.pages:
pages = [page.markdown for page in result.markdown.pages if page.markdown]
pages = []
for i, page in enumerate(result.markdown.pages):
if hasattr(page, "markdown") and page.markdown:
pages.append(page.markdown)
else:
# Log full details of failed pages for troubleshooting
logger.error(
f"[LLAMAPARSE] Page {i} failed for '{filename}': "
f"type={type(page).__name__}, attrs={vars(page) if hasattr(page, '__dict__') else repr(page)}"
)
if not pages:
logger.warning(f"[LLAMAPARSE] All {len(result.markdown.pages)} pages failed for '{filename}'")
return ""
combined = "\n\n".join(pages)
logger.info(f"[LLAMAPARSE] Parsed '{filename}' -> {len(combined)} chars from {len(pages)} pages")
logger.info(f"[LLAMAPARSE] Parsed '{filename}' -> {len(combined)} chars from {len(pages)}/{len(result.markdown.pages)} pages")
return combined
logger.warning(f"[LLAMAPARSE] No markdown content returned for '{filename}'")