Show partial parse status in UI when some pages fail

- LlamaParse service now returns a ParseResult dataclass with markdown,
  total page count, and a list of failed pages (page number + error)
- Knowledge base service sets status to "partial" (instead of "parsed")
  when some pages failed, with a descriptive error listing which pages
  failed and why
- Frontend StatusBadge shows "partial parse" in orange for partial status
- Error details are shown inline below the document row for both partial
  and error statuses

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
michael 2026-02-12 17:51:52 -06:00
parent 49facbe713
commit de62fa1f87
3 changed files with 83 additions and 31 deletions

View file

@ -145,12 +145,36 @@ class KnowledgeBaseService:
await doc_repo.update_source_document_parse_status(doc.id, "parsing")
await doc_session.commit()
markdown = await self.llamaparse.parse_document(file_data, doc.filename)
result = await self.llamaparse.parse_document(file_data, doc.filename)
# Determine status based on failed pages
if result.failed_pages:
if not result.markdown.strip():
# All pages failed
failed_desc = "; ".join(
f"Page {fp['page']}: {fp['error']}" for fp in result.failed_pages
)
status = "error"
error_msg = f"All {result.total_pages} pages failed to parse. {failed_desc}"
else:
failed_desc = "; ".join(
f"Page {fp['page']}: {fp['error']}" for fp in result.failed_pages
)
status = "partial"
error_msg = (
f"{len(result.failed_pages)} of {result.total_pages} pages "
f"failed to parse. {failed_desc}"
)
else:
status = "parsed"
error_msg = None
async with async_session_factory() as doc_session:
doc_repo = KnowledgeBaseRepository(doc_session)
await doc_repo.update_source_document_parse_status(
doc.id, "parsed", parsed_markdown=markdown
doc.id, status,
parsed_markdown=result.markdown if result.markdown.strip() else None,
parse_error=error_msg,
)
await doc_session.commit()
@ -159,8 +183,8 @@ class KnowledgeBaseService:
await repo.update_processing_job(job_id, parsed_documents=parsed_count)
await session.commit()
if markdown.strip():
return (doc.filename, str(doc.id), markdown)
if result.markdown.strip():
return (doc.filename, str(doc.id), result.markdown)
return None
except Exception as e:

View file

@ -1,17 +1,26 @@
import logging
import tempfile
from dataclasses import dataclass, field
from pathlib import Path
logger = logging.getLogger(__name__)
@dataclass
class ParseResult:
"""Result of a LlamaParse document parse."""
markdown: str
total_pages: int = 0
failed_pages: list[dict] = field(default_factory=list)
class LlamaParseService:
"""Service for parsing documents using LlamaParse with the llama-cloud SDK."""
def __init__(self, api_key: str):
self.api_key = api_key
async def parse_document(self, file_data: bytes, filename: str) -> str:
async def parse_document(self, file_data: bytes, filename: str) -> ParseResult:
"""
Parse a document using LlamaParse Agentic Plus tier and return markdown.
@ -23,7 +32,7 @@ class LlamaParseService:
filename: Original filename (used for format detection)
Returns:
Parsed markdown text
ParseResult with markdown text and any failed page info
"""
from llama_cloud import AsyncLlamaCloud
@ -57,25 +66,29 @@ class LlamaParseService:
# Extract markdown from all pages
if result.markdown and result.markdown.pages:
total_pages = len(result.markdown.pages)
pages = []
failed_pages = []
for i, page in enumerate(result.markdown.pages):
if hasattr(page, "markdown") and page.markdown:
pages.append(page.markdown)
else:
# Log full details of failed pages for troubleshooting
page_num = getattr(page, "page_number", i + 1)
error_msg = getattr(page, "error", "Unknown error")
logger.error(
f"[LLAMAPARSE] Page {i} failed for '{filename}': "
f"type={type(page).__name__}, attrs={vars(page) if hasattr(page, '__dict__') else repr(page)}"
f"[LLAMAPARSE] Page {page_num} failed for '{filename}': "
f"type={type(page).__name__}, error={error_msg}"
)
failed_pages.append({"page": page_num, "error": error_msg})
if not pages:
logger.warning(f"[LLAMAPARSE] All {len(result.markdown.pages)} pages failed for '{filename}'")
return ""
logger.warning(f"[LLAMAPARSE] All {total_pages} pages failed for '{filename}'")
return ParseResult(markdown="", total_pages=total_pages, failed_pages=failed_pages)
combined = "\n\n".join(pages)
logger.info(f"[LLAMAPARSE] Parsed '{filename}' -> {len(combined)} chars from {len(pages)}/{len(result.markdown.pages)} pages")
return combined
logger.info(f"[LLAMAPARSE] Parsed '{filename}' -> {len(combined)} chars from {len(pages)}/{total_pages} pages")
return ParseResult(markdown=combined, total_pages=total_pages, failed_pages=failed_pages)
logger.warning(f"[LLAMAPARSE] No markdown content returned for '{filename}'")
return ""
return ParseResult(markdown="", total_pages=0, failed_pages=[])
finally:
# Clean up temp file

View file

@ -16,15 +16,19 @@ const StatusBadge: React.FC<{ status: string }> = ({ status }) => {
pending: 'bg-yellow-100 text-yellow-800',
parsing: 'bg-blue-100 text-blue-800',
parsed: 'bg-green-100 text-green-800',
partial: 'bg-orange-100 text-orange-800',
error: 'bg-red-100 text-red-800',
parsing_documents: 'bg-blue-100 text-blue-800',
distilling: 'bg-purple-100 text-purple-800',
completed: 'bg-green-100 text-green-800',
failed: 'bg-red-100 text-red-800',
};
const labels: Record<string, string> = {
partial: 'partial parse',
};
return (
<span className={`inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium ${colors[status] || 'bg-grey-100 text-grey-700'}`}>
{status}
{labels[status] || status}
</span>
);
};
@ -475,22 +479,33 @@ export const KnowledgeBase: React.FC = () => {
</thead>
<tbody className="divide-y divide-grey-200">
{selectedKb.source_documents.map(doc => (
<tr key={doc.id} className="hover:bg-grey-50 transition-colors">
<td className="px-4 py-3 font-medium text-primary-blue">{doc.filename}</td>
<td className="px-4 py-3 text-grey-700">{formatBytes(doc.file_size_bytes)}</td>
<td className="px-4 py-3 text-grey-700">{formatDate(doc.created_at)}</td>
<td className="px-4 py-3 text-grey-700">{doc.uploaded_by_name || '-'}</td>
<td className="px-4 py-3"><StatusBadge status={doc.parse_status} /></td>
<td className="px-4 py-3">
<button
onClick={() => handleRemoveDoc(doc.id)}
className="text-grey-700 hover:text-error transition-colors"
title="Remove document"
>
<TrashIcon className="h-4 w-4" />
</button>
</td>
</tr>
<React.Fragment key={doc.id}>
<tr className="hover:bg-grey-50 transition-colors">
<td className="px-4 py-3 font-medium text-primary-blue">{doc.filename}</td>
<td className="px-4 py-3 text-grey-700">{formatBytes(doc.file_size_bytes)}</td>
<td className="px-4 py-3 text-grey-700">{formatDate(doc.created_at)}</td>
<td className="px-4 py-3 text-grey-700">{doc.uploaded_by_name || '-'}</td>
<td className="px-4 py-3"><StatusBadge status={doc.parse_status} /></td>
<td className="px-4 py-3">
<button
onClick={() => handleRemoveDoc(doc.id)}
className="text-grey-700 hover:text-error transition-colors"
title="Remove document"
>
<TrashIcon className="h-4 w-4" />
</button>
</td>
</tr>
{doc.parse_error && (doc.parse_status === 'partial' || doc.parse_status === 'error') && (
<tr>
<td colSpan={6} className="px-4 pb-3 pt-0">
<div className={`text-xs px-3 py-2 rounded-lg ${doc.parse_status === 'partial' ? 'bg-orange-50 text-orange-700' : 'bg-red-50 text-red-700'}`}>
{doc.parse_error}
</div>
</td>
</tr>
)}
</React.Fragment>
))}
</tbody>
</table>