- Updated docker-compose.yml to allow disabling embedded Ollama via environment variable. - Refactored Dockerfile and Dockerfile.dev for improved dependency management and installation process. - Enhanced FastAPI migration scripts to handle orphaned Alembic revisions and added new database migration logic. - Improved error handling in background tasks and Codex authentication endpoints. - Added support for font file uploads with better validation and extraction of font names. - Introduced new image search functionality with support for Pexels and Pixabay APIs.
205 lines
5.7 KiB
Python
205 lines
5.7 KiB
Python
import math
|
|
import re
|
|
from typing import Iterable, List, Optional
|
|
|
|
from models.presentation_outline_model import (
|
|
PresentationOutlineModel,
|
|
SlideOutlineModel,
|
|
)
|
|
|
|
|
|
HEADING_PATTERN = re.compile(r"^\s{0,3}#+\s*(.+)$", re.MULTILINE)
|
|
FIRST_SENTENCE_PATTERN = re.compile(r"^\s*([^.?!]+?[.?!])", re.DOTALL)
|
|
IMAGE_URL_PATTERN = re.compile(
|
|
r"https?://[-\w./%~:!$&'()*+,;=]+?\.(?:jpe?g|png|webp)(?:\?[^\s\"\'\\]*)?",
|
|
re.IGNORECASE | re.UNICODE,
|
|
)
|
|
|
|
|
|
def get_presentation_title_from_presentation_outline(
|
|
presentation_outline: PresentationOutlineModel,
|
|
) -> str:
|
|
if not presentation_outline.slides:
|
|
return "Untitled Presentation"
|
|
|
|
first_content = presentation_outline.slides[0].content or ""
|
|
|
|
if re.match(r"^\s*#{1,6}\s*Page\s+\d+\b", first_content):
|
|
first_content = re.sub(
|
|
r"^\s*#{1,6}\s*Page\s+\d+\b[\s,:\-]*",
|
|
"",
|
|
first_content,
|
|
count=1,
|
|
)
|
|
|
|
return (
|
|
first_content[:100]
|
|
.replace("#", "")
|
|
.replace("/", "")
|
|
.replace("\\", "")
|
|
.replace("\n", " ")
|
|
)
|
|
|
|
|
|
def _get_toc_count_for_total_slides(total_slides: int, title_slide: bool) -> int:
|
|
if total_slides <= 0:
|
|
return 0
|
|
|
|
first_pass = math.ceil(((total_slides - 1) if title_slide else total_slides) / 10)
|
|
return math.ceil((total_slides - first_pass) / 10)
|
|
|
|
|
|
def get_no_of_toc_required_for_n_outlines(
|
|
*,
|
|
n_outlines: int,
|
|
title_slide: bool,
|
|
target_total_slides: Optional[int] = None,
|
|
) -> int:
|
|
if target_total_slides is not None:
|
|
adjusted_total = max(target_total_slides, n_outlines)
|
|
return _get_toc_count_for_total_slides(adjusted_total, title_slide)
|
|
|
|
if n_outlines <= 0:
|
|
return 0
|
|
|
|
return math.ceil(((n_outlines - 1) if title_slide else n_outlines) / 10)
|
|
|
|
|
|
def get_no_of_outlines_to_generate_for_n_slides(
|
|
*,
|
|
n_slides: int,
|
|
toc: bool,
|
|
title_slide: bool,
|
|
) -> int:
|
|
if toc:
|
|
n_toc_1 = math.ceil(((n_slides - 1) if title_slide else n_slides) / 10)
|
|
n_toc_2 = math.ceil((n_slides - n_toc_1) / 10)
|
|
|
|
return n_slides - n_toc_2
|
|
|
|
else:
|
|
return n_slides
|
|
|
|
|
|
def get_presentation_outline_model_with_toc(
|
|
*,
|
|
outline: PresentationOutlineModel,
|
|
n_toc_slides: int,
|
|
title_slide: bool,
|
|
) -> PresentationOutlineModel:
|
|
if n_toc_slides <= 0:
|
|
return outline
|
|
|
|
outline_with_toc = outline.model_copy(deep=True)
|
|
insertion_index = 1 if title_slide else 0
|
|
|
|
existing_outlines = outline_with_toc.slides
|
|
outlines_for_toc = existing_outlines[insertion_index:]
|
|
if not outlines_for_toc:
|
|
return outline_with_toc
|
|
|
|
sections = _split_outlines_evenly(outlines_for_toc, n_toc_slides)
|
|
if not sections:
|
|
return outline_with_toc
|
|
|
|
toc_slides: List[SlideOutlineModel] = []
|
|
outlines_before_toc = 1 if title_slide else 0
|
|
total_toc_slides = len(sections)
|
|
global_outline_index = 0
|
|
|
|
for section_index, section in enumerate(sections):
|
|
section_lines = [
|
|
"## Table of Contents",
|
|
"",
|
|
]
|
|
|
|
for outline in section:
|
|
outline_title = _extract_outline_title(outline.content)
|
|
page_number = (
|
|
outlines_before_toc + total_toc_slides + global_outline_index + 1
|
|
)
|
|
section_lines.append(
|
|
f"- Page number: {page_number}, Title: {outline_title}"
|
|
)
|
|
global_outline_index += 1
|
|
|
|
toc_slides.append(
|
|
SlideOutlineModel(
|
|
content="\n".join(
|
|
line for line in section_lines if line is not None
|
|
).strip()
|
|
)
|
|
)
|
|
|
|
for offset, toc_slide in enumerate(toc_slides):
|
|
existing_outlines.insert(insertion_index + offset, toc_slide)
|
|
|
|
return outline_with_toc
|
|
|
|
|
|
def _split_outlines_evenly(
|
|
outlines: Iterable[SlideOutlineModel], n_sections: int
|
|
) -> List[List[SlideOutlineModel]]:
|
|
"""Split outlines into n contiguous sections with near-equal sizes."""
|
|
outlines_list = list(outlines)
|
|
if n_sections <= 0 or not outlines_list:
|
|
return []
|
|
|
|
total = len(outlines_list)
|
|
n_sections = max(1, n_sections)
|
|
base_size = total // n_sections
|
|
remainder = total % n_sections
|
|
|
|
sections: List[List[SlideOutlineModel]] = []
|
|
start = 0
|
|
for section_index in range(n_sections):
|
|
current_size = base_size + (1 if section_index < remainder else 0)
|
|
end = start + current_size
|
|
sections.append(outlines_list[start:end])
|
|
start = end
|
|
|
|
return sections
|
|
|
|
|
|
def _extract_outline_title(content: str) -> str:
|
|
"""Get a human-friendly title from an outline's markdown content."""
|
|
text = content or ""
|
|
|
|
heading_match = HEADING_PATTERN.search(text)
|
|
if heading_match:
|
|
return heading_match.group(1).strip()
|
|
|
|
sentence_match = FIRST_SENTENCE_PATTERN.search(text.strip())
|
|
if sentence_match:
|
|
return sentence_match.group(1).strip()
|
|
|
|
for line in text.splitlines():
|
|
stripped_line = line.strip()
|
|
if stripped_line:
|
|
return stripped_line
|
|
|
|
return "Slide"
|
|
|
|
|
|
def get_images_for_slides_from_outline(
|
|
slides: List[SlideOutlineModel],
|
|
) -> List[List[str]]:
|
|
"""
|
|
Extract image URLs (png, jpg, jpeg, webp) from each slide's content in the outline.
|
|
|
|
Args:
|
|
outline: PresentationOutlineModel containing slides with content
|
|
|
|
Returns:
|
|
List of lists of image URLs, one list per slide
|
|
"""
|
|
result: List[List[str]] = []
|
|
|
|
for slide in slides:
|
|
content = slide.content or ""
|
|
image_urls = IMAGE_URL_PATTERN.findall(content)
|
|
# Remove duplicates while preserving order
|
|
unique_urls = list(dict.fromkeys(image_urls))
|
|
result.append(unique_urls)
|
|
|
|
return result
|