Backend: - Migration 014: add file_hash (SHA-256) to knowledge_documents - Upload endpoint: skip if identical, replace if changed, create if new - Response includes action: "created" | "updated" | "skipped" Frontend: - Show SkipForward icon for skipped files - Overall progress bar: X / Y processed with percentage - Summary line shows uploaded / skipped / failed counts Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
60 lines
2.3 KiB
Python
60 lines
2.3 KiB
Python
"""
|
|
Knowledge Base document model for admin-uploaded RAG documents
|
|
"""
|
|
from sqlalchemy import Column, String, Boolean, DateTime, Integer, BigInteger, Text, ForeignKey, Enum as SQLEnum
|
|
from sqlalchemy.dialects.postgresql import UUID
|
|
from sqlalchemy.orm import relationship
|
|
from app.database import Base
|
|
import uuid
|
|
from datetime import datetime
|
|
import enum
|
|
|
|
|
|
class DocumentStatus(str, enum.Enum):
|
|
"""Knowledge document processing status"""
|
|
PENDING = "pending"
|
|
PROCESSING = "processing"
|
|
COMPLETED = "completed"
|
|
FAILED = "failed"
|
|
|
|
|
|
class KnowledgeDocument(Base):
|
|
"""
|
|
Tracks documents uploaded via admin panel for RAG knowledge base.
|
|
document_key maps to sharepoint_id in Qdrant (DocumentProcessor reuse).
|
|
"""
|
|
__tablename__ = "knowledge_documents"
|
|
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
file_name = Column(String(512), nullable=False)
|
|
file_type = Column(String(50), nullable=False) # pdf, docx, xlsx, txt
|
|
file_size = Column(BigInteger, nullable=False)
|
|
|
|
# Unique key for Qdrant mapping (maps to sharepoint_id in DocumentProcessor)
|
|
document_key = Column(String(255), unique=True, nullable=False, index=True)
|
|
|
|
# Content hash for deduplication (SHA-256 hex digest)
|
|
file_hash = Column(String(64), nullable=True, index=True)
|
|
|
|
# Processing status
|
|
status = Column(String(20), default="pending", nullable=False, index=True)
|
|
vector_count = Column(Integer, default=0, nullable=False)
|
|
error_message = Column(Text, nullable=True)
|
|
|
|
# Metadata
|
|
description = Column(Text, nullable=True)
|
|
department_id = Column(UUID(as_uuid=True), ForeignKey("departments.id", ondelete="SET NULL"), nullable=True, index=True)
|
|
region_code = Column(String(10), nullable=True)
|
|
|
|
# Ownership
|
|
uploaded_by = Column(UUID(as_uuid=True), ForeignKey("users.id", ondelete="SET NULL"), nullable=True, index=True)
|
|
is_active = Column(Boolean, default=True, nullable=False, index=True)
|
|
|
|
# Timestamps
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
|
processed_at = Column(DateTime, nullable=True)
|
|
|
|
# Relationships
|
|
uploader = relationship("User", foreign_keys=[uploaded_by])
|
|
department = relationship("Department", foreign_keys=[department_id])
|