- Step 10: Extended file upload for Excel/CSV/images/URLs (openpyxl, trafilatura) - Step 11: Content intelligence service with rule-based + LLM classification - Step 12: Slide mapping engine mapping content blocks to master deck layouts - Step 13: Chart data extractor, native PPTX chart service (bar/line/pie/gantt/waterfall), ChartDataEditor skeleton Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
35 lines
924 B
Python
35 lines
924 B
Python
"""Content classification models for the content intelligence pipeline."""
|
|
from enum import Enum
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from pydantic import BaseModel
|
|
|
|
from services.attachment_parser_service import ImageInfo, TableData
|
|
|
|
|
|
class ContentBlockType(str, Enum):
|
|
narrative = "narrative"
|
|
quote = "quote"
|
|
metric = "metric"
|
|
table = "table"
|
|
timeline = "timeline"
|
|
comparison = "comparison"
|
|
list_items = "list_items"
|
|
image_reference = "image_reference"
|
|
call_to_action = "call_to_action"
|
|
|
|
|
|
class ContentBlock(BaseModel):
|
|
type: ContentBlockType
|
|
raw_text: str
|
|
extracted_data: Optional[Dict[str, Any]] = None
|
|
source_section: Optional[str] = None
|
|
priority: int = 5 # 1-10
|
|
|
|
|
|
class ClassifiedContent(BaseModel):
|
|
title: Optional[str] = None
|
|
blocks: List[ContentBlock]
|
|
tables: List[TableData] = []
|
|
images: List[ImageInfo] = []
|
|
summary: str = ""
|