contract-query/backend/app/models/document.py
2025-08-14 15:03:33 -05:00

59 lines
No EOL
2 KiB
Python

from pydantic import BaseModel, Field
from typing import Optional, List, Dict, Any
from datetime import datetime
from bson import ObjectId
from .user import PyObjectId
class DocumentBase(BaseModel):
filename: str
original_filename: str
file_size: int
content_type: str
index_id: str
uploaded_by: PyObjectId
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
class DocumentCreate(DocumentBase):
pass
class DocumentUpdate(BaseModel):
filename: Optional[str] = None
updated_at: Optional[datetime] = None
class DocumentInDB(DocumentBase):
id: PyObjectId = Field(default_factory=PyObjectId, alias="_id")
file_path: str
processing_status: str = "pending" # pending, processing, completed, failed
metadata: Dict[str, Any] = Field(default_factory=dict)
parsed_text: Optional[str] = None
text_chunks: Optional[List[str]] = None
embedding_status: str = "pending" # pending, processing, completed, failed
chunk_count: int = 0
vector_ids: Optional[List[str]] = None
contract_summary: Optional[Dict[str, Any]] = None
summary_status: str = "pending" # pending, processing, completed, failed
summary_created_at: Optional[datetime] = None
class Config:
populate_by_name = True
arbitrary_types_allowed = True
json_encoders = {ObjectId: str}
class Document(DocumentBase):
id: PyObjectId = Field(default_factory=PyObjectId, alias="_id")
processing_status: str = "pending"
metadata: Dict[str, Any] = Field(default_factory=dict)
parsed_text: Optional[str] = None
text_chunks: Optional[List[str]] = None
embedding_status: str = "pending"
chunk_count: int = 0
vector_ids: Optional[List[str]] = None
contract_summary: Optional[Dict[str, Any]] = None
summary_status: str = "pending"
summary_created_at: Optional[datetime] = None
class Config:
populate_by_name = True
arbitrary_types_allowed = True
json_encoders = {ObjectId: str}