presenton/servers/fastapi/utils/outline_utils.py
sudipnext c7860127f2 feat: add support for optional embedded Ollama and enhance database migration handling
- Updated docker-compose.yml to allow disabling embedded Ollama via environment variable.
- Refactored Dockerfile and Dockerfile.dev for improved dependency management and installation process.
- Enhanced FastAPI migration scripts to handle orphaned Alembic revisions and added new database migration logic.
- Improved error handling in background tasks and Codex authentication endpoints.
- Added support for font file uploads with better validation and extraction of font names.
- Introduced new image search functionality with support for Pexels and Pixabay APIs.
2026-04-15 15:39:35 +05:45

205 lines
5.7 KiB
Python

import math
import re
from typing import Iterable, List, Optional
from models.presentation_outline_model import (
PresentationOutlineModel,
SlideOutlineModel,
)
HEADING_PATTERN = re.compile(r"^\s{0,3}#+\s*(.+)$", re.MULTILINE)
FIRST_SENTENCE_PATTERN = re.compile(r"^\s*([^.?!]+?[.?!])", re.DOTALL)
IMAGE_URL_PATTERN = re.compile(
r"https?://[-\w./%~:!$&'()*+,;=]+?\.(?:jpe?g|png|webp)(?:\?[^\s\"\'\\]*)?",
re.IGNORECASE | re.UNICODE,
)
def get_presentation_title_from_presentation_outline(
presentation_outline: PresentationOutlineModel,
) -> str:
if not presentation_outline.slides:
return "Untitled Presentation"
first_content = presentation_outline.slides[0].content or ""
if re.match(r"^\s*#{1,6}\s*Page\s+\d+\b", first_content):
first_content = re.sub(
r"^\s*#{1,6}\s*Page\s+\d+\b[\s,:\-]*",
"",
first_content,
count=1,
)
return (
first_content[:100]
.replace("#", "")
.replace("/", "")
.replace("\\", "")
.replace("\n", " ")
)
def _get_toc_count_for_total_slides(total_slides: int, title_slide: bool) -> int:
if total_slides <= 0:
return 0
first_pass = math.ceil(((total_slides - 1) if title_slide else total_slides) / 10)
return math.ceil((total_slides - first_pass) / 10)
def get_no_of_toc_required_for_n_outlines(
*,
n_outlines: int,
title_slide: bool,
target_total_slides: Optional[int] = None,
) -> int:
if target_total_slides is not None:
adjusted_total = max(target_total_slides, n_outlines)
return _get_toc_count_for_total_slides(adjusted_total, title_slide)
if n_outlines <= 0:
return 0
return math.ceil(((n_outlines - 1) if title_slide else n_outlines) / 10)
def get_no_of_outlines_to_generate_for_n_slides(
*,
n_slides: int,
toc: bool,
title_slide: bool,
) -> int:
if toc:
n_toc_1 = math.ceil(((n_slides - 1) if title_slide else n_slides) / 10)
n_toc_2 = math.ceil((n_slides - n_toc_1) / 10)
return n_slides - n_toc_2
else:
return n_slides
def get_presentation_outline_model_with_toc(
*,
outline: PresentationOutlineModel,
n_toc_slides: int,
title_slide: bool,
) -> PresentationOutlineModel:
if n_toc_slides <= 0:
return outline
outline_with_toc = outline.model_copy(deep=True)
insertion_index = 1 if title_slide else 0
existing_outlines = outline_with_toc.slides
outlines_for_toc = existing_outlines[insertion_index:]
if not outlines_for_toc:
return outline_with_toc
sections = _split_outlines_evenly(outlines_for_toc, n_toc_slides)
if not sections:
return outline_with_toc
toc_slides: List[SlideOutlineModel] = []
outlines_before_toc = 1 if title_slide else 0
total_toc_slides = len(sections)
global_outline_index = 0
for section_index, section in enumerate(sections):
section_lines = [
"## Table of Contents",
"",
]
for outline in section:
outline_title = _extract_outline_title(outline.content)
page_number = (
outlines_before_toc + total_toc_slides + global_outline_index + 1
)
section_lines.append(
f"- Page number: {page_number}, Title: {outline_title}"
)
global_outline_index += 1
toc_slides.append(
SlideOutlineModel(
content="\n".join(
line for line in section_lines if line is not None
).strip()
)
)
for offset, toc_slide in enumerate(toc_slides):
existing_outlines.insert(insertion_index + offset, toc_slide)
return outline_with_toc
def _split_outlines_evenly(
outlines: Iterable[SlideOutlineModel], n_sections: int
) -> List[List[SlideOutlineModel]]:
"""Split outlines into n contiguous sections with near-equal sizes."""
outlines_list = list(outlines)
if n_sections <= 0 or not outlines_list:
return []
total = len(outlines_list)
n_sections = max(1, n_sections)
base_size = total // n_sections
remainder = total % n_sections
sections: List[List[SlideOutlineModel]] = []
start = 0
for section_index in range(n_sections):
current_size = base_size + (1 if section_index < remainder else 0)
end = start + current_size
sections.append(outlines_list[start:end])
start = end
return sections
def _extract_outline_title(content: str) -> str:
"""Get a human-friendly title from an outline's markdown content."""
text = content or ""
heading_match = HEADING_PATTERN.search(text)
if heading_match:
return heading_match.group(1).strip()
sentence_match = FIRST_SENTENCE_PATTERN.search(text.strip())
if sentence_match:
return sentence_match.group(1).strip()
for line in text.splitlines():
stripped_line = line.strip()
if stripped_line:
return stripped_line
return "Slide"
def get_images_for_slides_from_outline(
slides: List[SlideOutlineModel],
) -> List[List[str]]:
"""
Extract image URLs (png, jpg, jpeg, webp) from each slide's content in the outline.
Args:
outline: PresentationOutlineModel containing slides with content
Returns:
List of lists of image URLs, one list per slide
"""
result: List[List[str]] = []
for slide in slides:
content = slide.content or ""
image_urls = IMAGE_URL_PATTERN.findall(content)
# Remove duplicates while preserving order
unique_urls = list(dict.fromkeys(image_urls))
result.append(unique_urls)
return result