from pydantic import BaseModel, Field from typing import Optional, List, Dict, Any from datetime import datetime from bson import ObjectId from .user import PyObjectId class DocumentBase(BaseModel): filename: str original_filename: str file_size: int content_type: str index_id: str uploaded_by: PyObjectId created_at: Optional[datetime] = None updated_at: Optional[datetime] = None class DocumentCreate(DocumentBase): pass class DocumentUpdate(BaseModel): filename: Optional[str] = None updated_at: Optional[datetime] = None class DocumentInDB(DocumentBase): id: PyObjectId = Field(default_factory=PyObjectId, alias="_id") file_path: str processing_status: str = "pending" # pending, processing, completed, failed metadata: Dict[str, Any] = Field(default_factory=dict) parsed_text: Optional[str] = None text_chunks: Optional[List[str]] = None embedding_status: str = "pending" # pending, processing, completed, failed chunk_count: int = 0 vector_ids: Optional[List[str]] = None contract_summary: Optional[Dict[str, Any]] = None summary_status: str = "pending" # pending, processing, completed, failed summary_created_at: Optional[datetime] = None class Config: populate_by_name = True arbitrary_types_allowed = True json_encoders = {ObjectId: str} class Document(DocumentBase): id: PyObjectId = Field(default_factory=PyObjectId, alias="_id") processing_status: str = "pending" metadata: Dict[str, Any] = Field(default_factory=dict) parsed_text: Optional[str] = None text_chunks: Optional[List[str]] = None embedding_status: str = "pending" chunk_count: int = 0 vector_ids: Optional[List[str]] = None contract_summary: Optional[Dict[str, Any]] = None summary_status: str = "pending" summary_created_at: Optional[datetime] = None class Config: populate_by_name = True arbitrary_types_allowed = True json_encoders = {ObjectId: str}