Show partial parse status in UI when some pages fail
- LlamaParse service now returns a ParseResult dataclass with markdown, total page count, and a list of failed pages (page number + error) - Knowledge base service sets status to "partial" (instead of "parsed") when some pages failed, with a descriptive error listing which pages failed and why - Frontend StatusBadge shows "partial parse" in orange for partial status - Error details are shown inline below the document row for both partial and error statuses Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
49facbe713
commit
de62fa1f87
3 changed files with 83 additions and 31 deletions
|
|
@ -145,12 +145,36 @@ class KnowledgeBaseService:
|
|||
await doc_repo.update_source_document_parse_status(doc.id, "parsing")
|
||||
await doc_session.commit()
|
||||
|
||||
markdown = await self.llamaparse.parse_document(file_data, doc.filename)
|
||||
result = await self.llamaparse.parse_document(file_data, doc.filename)
|
||||
|
||||
# Determine status based on failed pages
|
||||
if result.failed_pages:
|
||||
if not result.markdown.strip():
|
||||
# All pages failed
|
||||
failed_desc = "; ".join(
|
||||
f"Page {fp['page']}: {fp['error']}" for fp in result.failed_pages
|
||||
)
|
||||
status = "error"
|
||||
error_msg = f"All {result.total_pages} pages failed to parse. {failed_desc}"
|
||||
else:
|
||||
failed_desc = "; ".join(
|
||||
f"Page {fp['page']}: {fp['error']}" for fp in result.failed_pages
|
||||
)
|
||||
status = "partial"
|
||||
error_msg = (
|
||||
f"{len(result.failed_pages)} of {result.total_pages} pages "
|
||||
f"failed to parse. {failed_desc}"
|
||||
)
|
||||
else:
|
||||
status = "parsed"
|
||||
error_msg = None
|
||||
|
||||
async with async_session_factory() as doc_session:
|
||||
doc_repo = KnowledgeBaseRepository(doc_session)
|
||||
await doc_repo.update_source_document_parse_status(
|
||||
doc.id, "parsed", parsed_markdown=markdown
|
||||
doc.id, status,
|
||||
parsed_markdown=result.markdown if result.markdown.strip() else None,
|
||||
parse_error=error_msg,
|
||||
)
|
||||
await doc_session.commit()
|
||||
|
||||
|
|
@ -159,8 +183,8 @@ class KnowledgeBaseService:
|
|||
await repo.update_processing_job(job_id, parsed_documents=parsed_count)
|
||||
await session.commit()
|
||||
|
||||
if markdown.strip():
|
||||
return (doc.filename, str(doc.id), markdown)
|
||||
if result.markdown.strip():
|
||||
return (doc.filename, str(doc.id), result.markdown)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -1,17 +1,26 @@
|
|||
import logging
|
||||
import tempfile
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParseResult:
|
||||
"""Result of a LlamaParse document parse."""
|
||||
markdown: str
|
||||
total_pages: int = 0
|
||||
failed_pages: list[dict] = field(default_factory=list)
|
||||
|
||||
|
||||
class LlamaParseService:
|
||||
"""Service for parsing documents using LlamaParse with the llama-cloud SDK."""
|
||||
|
||||
def __init__(self, api_key: str):
|
||||
self.api_key = api_key
|
||||
|
||||
async def parse_document(self, file_data: bytes, filename: str) -> str:
|
||||
async def parse_document(self, file_data: bytes, filename: str) -> ParseResult:
|
||||
"""
|
||||
Parse a document using LlamaParse Agentic Plus tier and return markdown.
|
||||
|
||||
|
|
@ -23,7 +32,7 @@ class LlamaParseService:
|
|||
filename: Original filename (used for format detection)
|
||||
|
||||
Returns:
|
||||
Parsed markdown text
|
||||
ParseResult with markdown text and any failed page info
|
||||
"""
|
||||
from llama_cloud import AsyncLlamaCloud
|
||||
|
||||
|
|
@ -57,25 +66,29 @@ class LlamaParseService:
|
|||
|
||||
# Extract markdown from all pages
|
||||
if result.markdown and result.markdown.pages:
|
||||
total_pages = len(result.markdown.pages)
|
||||
pages = []
|
||||
failed_pages = []
|
||||
for i, page in enumerate(result.markdown.pages):
|
||||
if hasattr(page, "markdown") and page.markdown:
|
||||
pages.append(page.markdown)
|
||||
else:
|
||||
# Log full details of failed pages for troubleshooting
|
||||
page_num = getattr(page, "page_number", i + 1)
|
||||
error_msg = getattr(page, "error", "Unknown error")
|
||||
logger.error(
|
||||
f"[LLAMAPARSE] Page {i} failed for '{filename}': "
|
||||
f"type={type(page).__name__}, attrs={vars(page) if hasattr(page, '__dict__') else repr(page)}"
|
||||
f"[LLAMAPARSE] Page {page_num} failed for '{filename}': "
|
||||
f"type={type(page).__name__}, error={error_msg}"
|
||||
)
|
||||
failed_pages.append({"page": page_num, "error": error_msg})
|
||||
if not pages:
|
||||
logger.warning(f"[LLAMAPARSE] All {len(result.markdown.pages)} pages failed for '{filename}'")
|
||||
return ""
|
||||
logger.warning(f"[LLAMAPARSE] All {total_pages} pages failed for '{filename}'")
|
||||
return ParseResult(markdown="", total_pages=total_pages, failed_pages=failed_pages)
|
||||
combined = "\n\n".join(pages)
|
||||
logger.info(f"[LLAMAPARSE] Parsed '{filename}' -> {len(combined)} chars from {len(pages)}/{len(result.markdown.pages)} pages")
|
||||
return combined
|
||||
logger.info(f"[LLAMAPARSE] Parsed '{filename}' -> {len(combined)} chars from {len(pages)}/{total_pages} pages")
|
||||
return ParseResult(markdown=combined, total_pages=total_pages, failed_pages=failed_pages)
|
||||
|
||||
logger.warning(f"[LLAMAPARSE] No markdown content returned for '{filename}'")
|
||||
return ""
|
||||
return ParseResult(markdown="", total_pages=0, failed_pages=[])
|
||||
|
||||
finally:
|
||||
# Clean up temp file
|
||||
|
|
|
|||
|
|
@ -16,15 +16,19 @@ const StatusBadge: React.FC<{ status: string }> = ({ status }) => {
|
|||
pending: 'bg-yellow-100 text-yellow-800',
|
||||
parsing: 'bg-blue-100 text-blue-800',
|
||||
parsed: 'bg-green-100 text-green-800',
|
||||
partial: 'bg-orange-100 text-orange-800',
|
||||
error: 'bg-red-100 text-red-800',
|
||||
parsing_documents: 'bg-blue-100 text-blue-800',
|
||||
distilling: 'bg-purple-100 text-purple-800',
|
||||
completed: 'bg-green-100 text-green-800',
|
||||
failed: 'bg-red-100 text-red-800',
|
||||
};
|
||||
const labels: Record<string, string> = {
|
||||
partial: 'partial parse',
|
||||
};
|
||||
return (
|
||||
<span className={`inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium ${colors[status] || 'bg-grey-100 text-grey-700'}`}>
|
||||
{status}
|
||||
{labels[status] || status}
|
||||
</span>
|
||||
);
|
||||
};
|
||||
|
|
@ -475,22 +479,33 @@ export const KnowledgeBase: React.FC = () => {
|
|||
</thead>
|
||||
<tbody className="divide-y divide-grey-200">
|
||||
{selectedKb.source_documents.map(doc => (
|
||||
<tr key={doc.id} className="hover:bg-grey-50 transition-colors">
|
||||
<td className="px-4 py-3 font-medium text-primary-blue">{doc.filename}</td>
|
||||
<td className="px-4 py-3 text-grey-700">{formatBytes(doc.file_size_bytes)}</td>
|
||||
<td className="px-4 py-3 text-grey-700">{formatDate(doc.created_at)}</td>
|
||||
<td className="px-4 py-3 text-grey-700">{doc.uploaded_by_name || '-'}</td>
|
||||
<td className="px-4 py-3"><StatusBadge status={doc.parse_status} /></td>
|
||||
<td className="px-4 py-3">
|
||||
<button
|
||||
onClick={() => handleRemoveDoc(doc.id)}
|
||||
className="text-grey-700 hover:text-error transition-colors"
|
||||
title="Remove document"
|
||||
>
|
||||
<TrashIcon className="h-4 w-4" />
|
||||
</button>
|
||||
</td>
|
||||
</tr>
|
||||
<React.Fragment key={doc.id}>
|
||||
<tr className="hover:bg-grey-50 transition-colors">
|
||||
<td className="px-4 py-3 font-medium text-primary-blue">{doc.filename}</td>
|
||||
<td className="px-4 py-3 text-grey-700">{formatBytes(doc.file_size_bytes)}</td>
|
||||
<td className="px-4 py-3 text-grey-700">{formatDate(doc.created_at)}</td>
|
||||
<td className="px-4 py-3 text-grey-700">{doc.uploaded_by_name || '-'}</td>
|
||||
<td className="px-4 py-3"><StatusBadge status={doc.parse_status} /></td>
|
||||
<td className="px-4 py-3">
|
||||
<button
|
||||
onClick={() => handleRemoveDoc(doc.id)}
|
||||
className="text-grey-700 hover:text-error transition-colors"
|
||||
title="Remove document"
|
||||
>
|
||||
<TrashIcon className="h-4 w-4" />
|
||||
</button>
|
||||
</td>
|
||||
</tr>
|
||||
{doc.parse_error && (doc.parse_status === 'partial' || doc.parse_status === 'error') && (
|
||||
<tr>
|
||||
<td colSpan={6} className="px-4 pb-3 pt-0">
|
||||
<div className={`text-xs px-3 py-2 rounded-lg ${doc.parse_status === 'partial' ? 'bg-orange-50 text-orange-700' : 'bg-red-50 text-red-700'}`}>
|
||||
{doc.parse_error}
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
)}
|
||||
</React.Fragment>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue