Merge pull request #91 from presenton/feat/presentation_export

feat/presentation export
This commit is contained in:
Saurav Niraula 2025-07-18 17:41:49 +05:45 committed by GitHub
commit 9bb985101f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 1397 additions and 36 deletions

View file

@ -37,9 +37,9 @@ async def stream_outlines(presentation_id: str):
).to_string()
presentation_content_text += chunk
presentation_content = PresentationOutlineModel.model_validate_json(
presentation_content_text
)
presentation_content_json = json.loads(presentation_content_text)
presentation_content = PresentationOutlineModel(**presentation_content_json)
presentation_content.slides = presentation_content.slides[
: presentation.n_slides
]

View file

@ -1,5 +1,6 @@
import asyncio
import json
import os
import random
from typing import Annotated, List, Optional
import uuid
@ -8,6 +9,7 @@ from fastapi.responses import StreamingResponse
from sqlalchemy import delete
from sqlmodel import select
from models.pptx_models import PptxPresentationModel
from models.presentation_outline_model import SlideOutlineModel
from models.presentation_layout import PresentationLayoutModel
from models.presentation_structure_model import PresentationStructureModel
@ -18,6 +20,8 @@ from services import TEMP_FILE_SERVICE
from services.database import get_sql_session
from services.documents_loader import DocumentsLoader
from models.sql.presentation import PresentationModel
from services.pptx_presentation_creator import PptxPresentationCreator
from utils.asset_directory_utils import get_export_directory
from utils.llm_calls.generate_document_summary import generate_document_summary
from utils.llm_calls.generate_presentation_structure import (
generate_presentation_structure,
@ -26,6 +30,7 @@ from utils.llm_calls.generate_slide_content import (
get_slide_content_from_type_and_outline,
)
from utils.process_slides import process_slide_and_fetch_assets
from utils.randomizers import get_random_uuid
PRESENTATION_ROUTER = APIRouter(prefix="/presentation", tags=["Presentation"])
@ -272,3 +277,17 @@ def update_presentation(
**presentation.model_dump(),
slides=updated_slides,
)
@PRESENTATION_ROUTER.post("/export/pptx", response_model=str)
async def create_pptx(pptx_model: Annotated[PptxPresentationModel, Body()]):
pptx_creator = PptxPresentationCreator(pptx_model)
await pptx_creator.create_ppt()
export_directory = get_export_directory()
pptx_path = os.path.join(
export_directory, f"{pptx_model.name or get_random_uuid()}.pptx"
)
pptx_creator.save(pptx_path)
return pptx_path

View file

@ -0,0 +1,158 @@
from enum import Enum
from typing import Annotated, List, Optional
from annotated_types import Len
from pydantic import BaseModel
from pptx.util import Pt
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_AUTO_SHAPE_TYPE, MSO_CONNECTOR_TYPE
class PptxBoxShapeEnum(Enum):
RECTANGLE = "rectangle"
CIRCLE = "circle"
class PptxObjectFitEnum(Enum):
CONTAIN = "contain"
COVER = "cover"
FILL = "fill"
class PptxSpacingModel(BaseModel):
top: int = 0
bottom: int = 0
left: int = 0
right: int = 0
@classmethod
def all(cls, num: int):
return PptxSpacingModel(top=num, left=num, bottom=num, right=num)
class PptxPositionModel(BaseModel):
left: int = 0
top: int = 0
width: int = 0
height: int = 0
@classmethod
def for_textbox(cls, left: int, top: int, width: int):
return cls(left=left, top=top, width=width, height=100)
def to_pt_list(self) -> List[int]:
return [Pt(self.left), Pt(self.top), Pt(self.width), Pt(self.height)]
def to_pt_xyxy(self) -> List[int]:
return [
Pt(self.left),
Pt(self.top),
Pt(self.left + self.width),
Pt(self.top + self.height),
]
class PptxFontModel(BaseModel):
name: str = "Inter"
size: int = 16
bold: bool = False
italic: bool = False
color: str = "000000"
class PptxFillModel(BaseModel):
color: str
class PptxStrokeModel(BaseModel):
color: str
thickness: float
class PptxShadowModel(BaseModel):
radius: int
offset: int = 0
color: str = "000000"
opacity: float = 0.5
angle: int = 0
class PptxTextRunModel(BaseModel):
text: str
font: Optional[PptxFontModel] = None
class PptxParagraphModel(BaseModel):
spacing: Optional[PptxSpacingModel] = None
alignment: Optional[PP_ALIGN] = None
font: Optional[PptxFontModel] = None
text: Optional[str] = None
text_runs: Optional[List[PptxTextRunModel]] = None
class PptxObjectFitModel(BaseModel):
fit: Optional[PptxObjectFitEnum] = None
focus: Optional[
Annotated[List[Optional[float]], Len(min_length=2, max_length=2)]
] = None
class PptxPictureModel(BaseModel):
is_network: bool
path: str
class PptxShapeModel(BaseModel):
pass
class PptxTextBoxModel(PptxShapeModel):
margin: Optional[PptxSpacingModel] = None
fill: Optional[PptxFillModel] = None
position: PptxPositionModel
text_wrap: bool = True
paragraphs: List[PptxParagraphModel]
class PptxAutoShapeBoxModel(PptxShapeModel):
type: MSO_AUTO_SHAPE_TYPE = MSO_AUTO_SHAPE_TYPE.RECTANGLE
margin: Optional[PptxSpacingModel] = None
fill: Optional[PptxFillModel] = None
stroke: Optional[PptxStrokeModel] = None
shadow: Optional[PptxShadowModel] = None
position: PptxPositionModel
text_wrap: bool = True
border_radius: Optional[int] = None
paragraphs: Optional[List[PptxParagraphModel]] = None
class PptxPictureBoxModel(PptxShapeModel):
position: PptxPositionModel
margin: Optional[PptxSpacingModel] = None
clip: bool = True
overlay: Optional[str] = None
border_radius: Optional[List[int]] = None
shape: Optional[PptxBoxShapeEnum] = None
object_fit: Optional[PptxObjectFitModel] = None
picture: PptxPictureModel
class PptxConnectorModel(PptxShapeModel):
type: MSO_CONNECTOR_TYPE = MSO_CONNECTOR_TYPE.STRAIGHT
position: PptxPositionModel
thickness: float = 0.5
color: str = "000000"
class PptxSlideModel(BaseModel):
shapes: List[
PptxTextBoxModel
| PptxAutoShapeBoxModel
| PptxConnectorModel
| PptxPictureBoxModel
]
class PptxPresentationModel(BaseModel):
name: Optional[str] = None
shapes: Optional[List[PptxShapeModel]] = None
slides: List[PptxSlideModel]

View file

@ -0,0 +1,439 @@
import os
from typing import List, Optional
import uuid
from lxml import etree
from pptx import Presentation
from pptx.shapes.autoshape import Shape
from pptx.slide import Slide
from pptx.text.text import _Paragraph, TextFrame, Font, _Run
from pptx.opc.constants import RELATIONSHIP_TYPE as RT
from lxml.etree import fromstring, tostring
from PIL import Image
from pptx.util import Pt
from pptx.dml.color import RGBColor
from models.pptx_models import (
PptxAutoShapeBoxModel,
PptxBoxShapeEnum,
PptxConnectorModel,
PptxFillModel,
PptxFontModel,
PptxParagraphModel,
PptxPictureBoxModel,
PptxPositionModel,
PptxPresentationModel,
PptxShadowModel,
PptxSlideModel,
PptxSpacingModel,
PptxStrokeModel,
PptxTextBoxModel,
PptxTextRunModel,
)
from utils.download_helpers import download_files
from utils.image_utils import (
change_image_color,
clip_image,
create_circle_image,
fit_image,
round_image_corners,
)
BLANK_SLIDE_LAYOUT = 6
class PptxPresentationCreator:
def __init__(self, ppt_model: PptxPresentationModel, temp_dir: str):
self._temp_dir = temp_dir
self._ppt_model = ppt_model
self._slide_models = ppt_model.slides
self._ppt = Presentation()
self._ppt.slide_width = Pt(1280)
self._ppt.slide_height = Pt(720)
async def fetch_network_assets(self):
image_urls = []
for each_slide in self._slide_models:
models_with_network_asset: List[PptxPictureBoxModel] = []
for each_shape in each_slide.shapes:
if isinstance(each_shape, PptxPictureBoxModel):
image_path = each_shape.picture.path
if not image_path.startswith("http"):
continue
image_urls.append(image_path)
models_with_network_asset.append(each_shape)
image_paths = await download_files(image_urls, self._temp_dir)
for each_shape, each_image_path in zip(
models_with_network_asset, image_paths
):
each_shape.picture.path = each_image_path
each_shape.picture.is_network = False
async def create_ppt(self):
await self.fetch_network_assets()
for slide_model in self._slide_models:
# Adding global shapes to slide
if self._ppt_model.shapes:
slide_model.shapes.append(self._ppt_model.shapes)
self.add_and_populate_slide(slide_model)
def set_presentation_theme(self):
slide_master = self._ppt.slide_master
slide_master_part = slide_master.part
theme_part = slide_master_part.part_related_by(RT.THEME)
theme = fromstring(theme_part.blob)
theme_colors = self._theme.colors.theme_color_mapping
nsmap = {"a": "http://schemas.openxmlformats.org/drawingml/2006/main"}
for color_name, hex_value in theme_colors.items():
if color_name:
color_element = theme.xpath(
f"a:themeElements/a:clrScheme/a:{color_name}/a:srgbClr",
namespaces=nsmap,
)[0]
color_element.set("val", hex_value.encode("utf-8"))
theme_part._blob = tostring(theme)
def add_and_populate_slide(self, slide_model: PptxSlideModel):
slide = self._ppt.slides.add_slide(self._ppt.slide_layouts[BLANK_SLIDE_LAYOUT])
for shape_model in slide_model.shapes:
model_type = type(shape_model)
if model_type is PptxPictureBoxModel:
self.add_picture(slide, shape_model)
elif model_type is PptxAutoShapeBoxModel:
self.add_autoshape(slide, shape_model)
elif model_type is PptxTextBoxModel:
self.add_textbox(slide, shape_model)
elif model_type is PptxConnectorModel:
self.add_connector(slide, shape_model)
def add_connector(self, slide: Slide, connector_model: PptxConnectorModel):
if connector_model.thickness == 0:
return
connector_shape = slide.shapes.add_connector(
connector_model.type, *connector_model.position.to_pt_xyxy()
)
connector_shape.line.width = Pt(connector_model.thickness)
connector_shape.line.color.rgb = RGBColor.from_string(connector_model.color)
def add_picture(self, slide: Slide, picture_model: PptxPictureBoxModel):
image_path = picture_model.picture.path
if (
picture_model.clip
or picture_model.border_radius
or picture_model.overlay
or picture_model.object_fit
or picture_model.shape
):
try:
image = Image.open(image_path)
except:
print(f"Could not open image: {image_path}")
return
image = image.convert("RGBA")
# ? Applying border radius twice to support both clip and object fit
if picture_model.border_radius:
image = round_image_corners(image, picture_model.border_radius)
if picture_model.object_fit:
image = fit_image(
image,
picture_model.position.width,
picture_model.position.height,
picture_model.object_fit,
)
elif picture_model.clip:
image = clip_image(
image,
picture_model.position.width,
picture_model.position.height,
)
if picture_model.border_radius:
image = round_image_corners(image, picture_model.border_radius)
if picture_model.shape == PptxBoxShapeEnum.CIRCLE:
image = create_circle_image(image)
if picture_model.overlay:
image = change_image_color(image, picture_model.overlay)
image_path = os.path.join(self._temp_dir, f"{str(uuid.uuid4())}.png")
image.save(image_path)
margined_position = self.get_margined_position(
picture_model.position, picture_model.margin
)
slide.shapes.add_picture(image_path, *margined_position.to_pt_list())
def add_autoshape(self, slide: Slide, autoshape_box_model: PptxAutoShapeBoxModel):
position = autoshape_box_model.position
if autoshape_box_model.margin:
position = self.get_margined_position(position, autoshape_box_model.margin)
autoshape = slide.shapes.add_shape(
autoshape_box_model.type, *position.to_pt_list()
)
textbox = autoshape.text_frame
textbox.word_wrap = autoshape_box_model.text_wrap
self.apply_fill_to_shape(autoshape, autoshape_box_model.fill)
self.apply_margin_to_text_box(textbox, autoshape_box_model.margin)
self.apply_stroke_to_shape(autoshape, autoshape_box_model.stroke)
self.apply_shadow_to_shape(autoshape, autoshape_box_model.shadow)
self.apply_border_radius_to_shape(autoshape, autoshape_box_model.border_radius)
if autoshape_box_model.paragraphs:
self.add_paragraphs(textbox, autoshape_box_model.paragraphs)
def add_textbox(self, slide: Slide, textbox_model: PptxTextBoxModel):
position = textbox_model.position
textbox_shape = slide.shapes.add_textbox(*position.to_pt_list())
textbox_shape.width += Pt(2)
textbox = textbox_shape.text_frame
textbox.word_wrap = textbox_model.text_wrap
self.apply_fill_to_shape(textbox_shape, textbox_model.fill)
self.apply_margin_to_text_box(textbox, textbox_model.margin)
self.add_paragraphs(textbox, textbox_model.paragraphs)
def add_paragraphs(
self, textbox: TextFrame, paragraph_models: List[PptxParagraphModel]
):
for index, paragraph_model in enumerate(paragraph_models):
paragraph = textbox.add_paragraph() if index > 0 else textbox.paragraphs[0]
self.populate_paragraph(paragraph, paragraph_model)
def populate_paragraph(
self, paragraph: _Paragraph, paragraph_model: PptxParagraphModel
):
if paragraph_model.spacing:
self.apply_spacing_to_paragraph(paragraph, paragraph_model.spacing)
if paragraph_model.alignment:
paragraph.alignment = paragraph_model.alignment
if paragraph_model.font:
self.apply_font_to_paragraph(paragraph, paragraph_model.font)
text_runs = []
if paragraph_model.text:
text_runs = self.parse_markdown_text_to_text_runs(
paragraph_model.font, paragraph_model.text
)
elif paragraph_model.text_runs:
text_runs = paragraph_model.text_runs
for text_run_model in text_runs:
text_run = paragraph.add_run()
self.populate_text_run(text_run, text_run_model)
def parse_markdown_text_to_text_runs(self, font: PptxFontModel, text: str):
text_runs = []
for line in text.split("\n"):
current_pos = 0
while current_pos < len(line):
# Check for bold and italic (***text***)
if (
line[current_pos:].startswith("***")
and "***" in line[current_pos + 3 :]
):
end_pos = line.find("***", current_pos + 3)
text_content = line[current_pos + 3 : end_pos]
font_json = font.model_dump()
font_json["bold"] = True
font_json["italic"] = True
text_runs.append(
PptxTextRunModel(
text=text_content, font=PptxFontModel(**font_json)
)
)
current_pos = end_pos + 3
# Check for bold (**text**)
elif (
line[current_pos:].startswith("**")
and "**" in line[current_pos + 2 :]
):
end_pos = line.find("**", current_pos + 2)
text_content = line[current_pos + 2 : end_pos]
font_json = font.model_dump()
font_json["bold"] = True
text_runs.append(
PptxTextRunModel(
text=text_content, font=PptxFontModel(**font_json)
)
)
current_pos = end_pos + 2
# Check for italic (*text*)
elif (
line[current_pos:].startswith("__")
and "__" in line[current_pos + 2 :]
):
end_pos = line.find("__", current_pos + 2)
text_content = line[current_pos + 2 : end_pos]
font_json = font.model_dump()
font_json["italic"] = True
text_runs.append(
PptxTextRunModel(
text=text_content, font=PptxFontModel(**font_json)
)
)
current_pos = end_pos + 2
else:
# Find the next formatting marker or end of line
next_marker = float("inf")
for marker in ["***", "**", "__"]:
pos = line.find(marker, current_pos)
if pos != -1:
next_marker = min(next_marker, pos)
end_pos = next_marker if next_marker != float("inf") else len(line)
text_content = line[current_pos:end_pos]
if text_content: # Only add non-empty text
text_runs.append(PptxTextRunModel(text=text_content, font=font))
current_pos = end_pos
# Add newline if not the last line
if line != text.split("\n")[-1]:
text_runs.append(PptxTextRunModel(text="\n"))
return text_runs
def populate_text_run(self, text_run: _Run, text_run_model: PptxTextRunModel):
text_run.text = text_run_model.text
if text_run_model.font:
self.apply_font(text_run.font, text_run_model.font)
def apply_border_radius_to_shape(self, shape: Shape, border_radius: Optional[int]):
if not border_radius:
return
try:
normalized_border_radius = Pt(border_radius) / min(
shape.width, shape.height
)
shape.adjustments[0] = normalized_border_radius
except:
print("Could not apply border radius.")
def apply_fill_to_shape(self, shape: Shape, fill: Optional[PptxFillModel] = None):
if not fill:
shape.fill.background()
else:
shape.fill.solid()
shape.fill.fore_color.rgb = RGBColor.from_string(fill.color)
def apply_stroke_to_shape(
self, shape: Shape, stroke: Optional[PptxStrokeModel] = None
):
if not stroke or stroke.thickness == 0:
shape.line.fill.background()
else:
shape.line.fill.solid()
shape.line.fill.fore_color.rgb = RGBColor.from_string(stroke.color)
shape.line.width = Pt(stroke.thickness)
def apply_shadow_to_shape(
self, shape: Shape, shadow: Optional[PptxShadowModel] = None
):
# Access the XML for the shape
sp_element = shape._element
sp_pr = sp_element.xpath("p:spPr")[0] # Shape properties XML element
nsmap = sp_pr.nsmap
# # Remove existing shadow effects if present
effect_list = sp_pr.find("a:effectLst", namespaces=nsmap)
if effect_list:
old_shadow = effect_list.find("a:outerShdw")
if old_shadow:
effect_list.remove(
old_shadow, namespaces=nsmap
) # Remove the old shadow
if not shadow:
return
if not effect_list:
effect_list = etree.SubElement(
sp_pr, f"{{{nsmap['a']}}}effectLst", nsmap=nsmap
)
outer_shadow = etree.SubElement(
effect_list,
f"{{{nsmap['a']}}}outerShdw",
{
"blurRad": f"{Pt(shadow.radius)}",
"dir": f"{shadow.angle * 1000}",
"dist": f"{Pt(shadow.offset)}",
"rotWithShape": "0",
},
nsmap=nsmap,
)
color_element = etree.SubElement(
outer_shadow,
f"{{{nsmap['a']}}}srgbClr",
{"val": f"{shadow.color}"},
nsmap=nsmap,
)
etree.SubElement(
color_element,
f"{{{nsmap['a']}}}alpha",
{"val": f"{int(shadow.opacity * 100000)}"},
nsmap=nsmap,
)
def get_margined_position(
self, position: PptxPositionModel, margin: Optional[PptxSpacingModel]
) -> PptxPositionModel:
if not margin:
return position
left = position.left + margin.left
top = position.top + margin.top
width = max(position.width - margin.left - margin.right, 0)
height = max(position.height - margin.top - margin.bottom, 0)
return PptxPositionModel(left=left, top=top, width=width, height=height)
def apply_margin_to_text_box(
self, text_frame: TextFrame, margin: Optional[PptxSpacingModel]
) -> PptxPositionModel:
text_frame.margin_left = Pt(margin.left if margin else 0)
text_frame.margin_right = Pt(margin.right if margin else 0)
text_frame.margin_top = Pt(margin.top if margin else 0)
text_frame.margin_bottom = Pt(margin.bottom if margin else 0)
def apply_spacing_to_paragraph(
self, paragraph: _Paragraph, spacing: PptxSpacingModel
):
paragraph.space_before = Pt(spacing.top)
paragraph.space_after = Pt(spacing.bottom)
def apply_font_to_paragraph(self, paragraph: _Paragraph, font: PptxFontModel):
self.apply_font(paragraph.font, font)
def apply_font(self, font: Font, font_model: PptxFontModel):
font.name = font_model.name
font.color.rgb = RGBColor.from_string(font_model.color)
font.bold = font_model.bold
font.italic = font_model.italic
font.size = Pt(font_model.size)
def save(self, path: str):
self._ppt.save(path)

View file

@ -6,3 +6,9 @@ def get_images_directory():
images_directory = os.path.join(get_app_data_directory_env(), "images")
os.makedirs(images_directory, exist_ok=True)
return images_directory
def get_export_directory():
export_directory = os.path.join(get_app_data_directory_env(), "exports")
os.makedirs(export_directory, exist_ok=True)
return export_directory

View file

@ -0,0 +1,241 @@
from typing import List
from PIL import Image, ImageDraw
from models.pptx_models import PptxObjectFitEnum, PptxObjectFitModel
def clip_image(
image: Image.Image,
width: int,
height: int,
focus_x: float = 50.0,
focus_y: float = 50.0,
) -> Image.Image:
img_width, img_height = image.size
img_aspect = img_width / img_height
box_aspect = width / height
if img_aspect > box_aspect:
new_height = height
new_width = int(new_height * img_aspect)
else:
new_width = width
new_height = int(new_width / img_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
# Calculate clipping position based on focus
# Convert focus percentages (0-100) to position in the resized image
focus_x = max(0.0, min(100.0, focus_x)) # Clamp to 0-100 range
focus_y = max(0.0, min(100.0, focus_y)) # Clamp to 0-100 range
# Calculate the center point based on focus
center_x = int((new_width - width) * (focus_x / 100.0))
center_y = int((new_height - height) * (focus_y / 100.0))
# Calculate clipping box
left = center_x
top = center_y
right = left + width
bottom = top + height
clipped_image = resized_image.crop((left, top, right, bottom))
return clipped_image
def round_image_corners(image: Image.Image, radii: List[int]) -> Image.Image:
if len(radii) != 4:
raise ValueError(
"Image Border Radius - radii must contain exactly 4 values for each corner"
)
w, h = image.size
# Ensure the image has an alpha channel (RGBA)
if image.mode != "RGBA":
image = image.convert("RGBA")
# Create a mask for the rounded corners (start with fully transparent)
rounded_mask = Image.new("L", image.size, 0)
# Create a rectangular mask (fully opaque)
rectangular_mask = Image.new("L", image.size, 255)
# Process each corner
for i, radius in enumerate(radii):
if radius > 0: # Only process if radius is positive
# Create a circle for this radius
circle = Image.new("L", (radius * 2, radius * 2), 0)
draw = ImageDraw.Draw(circle)
draw.ellipse((0, 0, radius * 2 - 1, radius * 2 - 1), fill=255)
# Calculate position based on corner index
if i == 0: # top-left
rounded_mask.paste(circle.crop((0, 0, radius, radius)), (0, 0))
rectangular_mask.paste(0, (0, 0, radius, radius))
elif i == 1: # top-right
rounded_mask.paste(
circle.crop((radius, 0, radius * 2, radius)), (w - radius, 0)
)
rectangular_mask.paste(0, (w - radius, 0, w, radius))
elif i == 2: # bottom-right
rounded_mask.paste(
circle.crop((radius, radius, radius * 2, radius * 2)),
(w - radius, h - radius),
)
rectangular_mask.paste(0, (w - radius, h - radius, w, h))
else: # bottom-left
rounded_mask.paste(
circle.crop((0, radius, radius, radius * 2)), (0, h - radius)
)
rectangular_mask.paste(0, (0, h - radius, radius, h))
# Get the original alpha channel
original_alpha = image.getchannel("A")
# Combine the rectangular mask with the rounded corners
corner_mask = Image.composite(rounded_mask, rectangular_mask, rounded_mask)
# Combine the corner mask with the original alpha channel
final_alpha = Image.composite(
original_alpha, Image.new("L", image.size, 0), corner_mask
)
# Create a new image with the modified alpha channel
result = Image.new("RGBA", image.size)
result.paste(image.convert("RGB"), (0, 0))
result.putalpha(final_alpha)
return result
def change_image_color(img: Image.Image, color: str) -> Image.Image:
# r, g, b, alpha = img.split()
# color_overlay = Image.new("RGBA", img.size, color=f"#{color}")
# return Image.composite(color_overlay, img, alpha)
if color.startswith("#"):
color = color[1:]
r_new = int(color[:2], 16)
g_new = int(color[2:4], 16)
b_new = int(color[4:], 16)
# Get image data
data = img.getdata()
# Process each pixel
new_data = []
for item in data:
# Get current pixel values
r, g, b, a = item
# Apply new color while preserving transparency
if a != 0: # Skip fully transparent pixels
new_data.append((r_new, g_new, b_new, a))
else:
new_data.append((0, 0, 0, 0))
# Create new image with modified data
new_img = Image.new("RGBA", img.size)
new_img.putdata(new_data)
return new_img
def create_circle_image(
image: Image.Image,
) -> Image.Image:
# Convert to RGBA if not already
img = image.convert("RGBA")
# Get the original image size
size = img.size
# Use the smaller dimension for the circle
circle_size = min(size)
# Create a transparent image of the same size as original
mask = Image.new("RGBA", size, color=(0, 0, 0, 0))
draw = ImageDraw.Draw(mask)
# Calculate center position
center_x = size[0] // 2
center_y = size[1] // 2
radius = circle_size // 2
# Create a circular mask
draw.ellipse(
(
center_x - radius,
center_y - radius,
center_x + radius,
center_y + radius,
),
fill=(255, 255, 255, 255),
)
# Apply the circular mask
result = Image.composite(img, mask, mask)
return result
def fit_image(
image: Image.Image, width: int, height: int, object_fit: PptxObjectFitModel
) -> Image.Image:
if not object_fit.fit:
return image
img_width, img_height = image.size
img_aspect = img_width / img_height
box_aspect = width / height
if object_fit.fit == PptxObjectFitEnum.CONTAIN:
# Scale image to fit within the box while maintaining aspect ratio
if img_aspect > box_aspect:
new_width = width
new_height = int(width / img_aspect)
else:
new_height = height
new_width = int(height * img_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
# Use focus point for positioning if available
focus_x = 50.0
focus_y = 50.0
if object_fit.focus and len(object_fit.focus) == 2:
focus_x, focus_y = object_fit.focus[0], object_fit.focus[1]
# Calculate paste position based on focus
paste_x = int((width - new_width) * (focus_x / 100.0))
paste_y = int((height - new_height) * (focus_y / 100.0))
result = Image.new("RGBA", (width, height), (0, 0, 0, 0))
result.paste(resized_image, (paste_x, paste_y))
return result
elif object_fit.fit == PptxObjectFitEnum.COVER:
# Scale image to cover the box while maintaining aspect ratio
if img_aspect > box_aspect:
new_height = height
new_width = int(height * img_aspect)
else:
new_width = width
new_height = int(width / img_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
# Use focus point for positioning if available
focus_x = 50.0
focus_y = 50.0
if object_fit.focus and len(object_fit.focus) == 2:
focus_x, focus_y = object_fit.focus[0], object_fit.focus[1]
# Calculate paste position based on focus
paste_x = int((new_width - width) * (focus_x / 100.0))
paste_y = int((new_height - height) * (focus_y / 100.0))
# Clip the image to the box size
return resized_image.crop((paste_x, paste_y, paste_x + width, paste_y + height))
elif object_fit.fit == PptxObjectFitEnum.FILL:
# Stretch image to fill the box exactly
return image.resize((width, height), Image.LANCZOS)
return image

View file

@ -11,28 +11,138 @@ from utils.llm_provider import (
is_google_selected,
)
# system_prompt = """
# Create a presentation based on the provided prompt, number of slides, output language, and additional informational details.
# Format the output in the specified JSON schema with structured markdown content.
# # Steps
# 1. Identify key points from the provided prompt, including the topic, number of slides, output language, and additional content directions.
# 2. Create a concise and descriptive title reflecting the main topic, adhering to the specified language.
# 3. Generate a clear title for each slide.
# 4. Develop comprehensive content using markdown structure:
# * Use bullet points (- or *) for lists.
# * Use **bold** for emphasis, *italic* for secondary emphasis, and `code` for technical terms.
# 5. Provide important points from prompt as notes.
# # Notes
# - Content must be generated for every slide.
# - Images or Icons information provided in **Input** must be included in the **notes**.
# - Notes should cleary define if it is for specific slide or for the presentation.
# - Slide **body** should not contain slide **title**.
# - Slide **title** should not contain "Slide 1", "Slide 2", etc.
# - Slide **title** should not be in markdown format.
# - There must be exact **Number of Slides** as specified.
# """
system_prompt = """
Create a presentation based on the provided prompt, number of slides, output language, and additional informational details.
Format the output in the specified JSON schema with structured markdown content.
You are an expert presentation creator. Generate structured presentations based on user requirements and format them according to the specified JSON schema with markdown content.
# Steps
## Core Requirements
1. Identify key points from the provided prompt, including the topic, number of slides, output language, and additional content directions.
2. Create a concise and descriptive title reflecting the main topic, adhering to the specified language.
3. Generate a clear title for each slide.
4. Develop comprehensive content using markdown structure:
* Use bullet points (- or *) for lists.
* Use **bold** for emphasis, *italic* for secondary emphasis, and `code` for technical terms.
5. Provide important points from prompt as notes.
# Notes
- Content must be generated for every slide.
- Images or Icons information provided in **Input** must be included in the **notes**.
- Notes should cleary define if it is for specific slide or for the presentation.
- Slide **body** should not contain slide **title**.
- Slide **title** should not contain "Slide 1", "Slide 2", etc.
- Slide **title** should not be in markdown format.
- There must be exact **Number of Slides** as specified.
### Input Processing
1. **Extract key information** from the user's prompt:
- Main topic/subject matter
- Required number of slides
- Target language for output
- Specific content requirements or focus areas
- Target audience (if specified)
- Presentation style or tone preferences
## Content Generation Guidelines
### Presentation Title
- Create a **concise, descriptive title** that captures the essence of the topic
- Use **plain text format** (no markdown formatting)
- Make it **engaging and professional**
- Ensure it reflects the main theme and target audience
### Slide Titles
- Generate **clear, specific titles** for each slide
- Use **plain text format** (no markdown, no "Slide 1", "Slide 2" prefixes)
- Make each title **descriptive and informative**
- Ensure titles create a **logical flow** through the presentation
- Keep titles **concise but meaningful**
### Slide Body Content
- Use **full markdown formatting** for rich content structure
- Apply consistent formatting:
- `**bold**` for key concepts and emphasis
- `*italic*` for secondary emphasis or definitions
- `- or *` for bullet points and lists
- `> ` for quotes or callouts
- `### ` for subsections within slides
- ``` for code blocks (when applicable)
- `inline code` for technical terms or specific terminology
### Content Structure Per Slide
- **Opening/Hook**: Start with engaging content
- **Main Points**: 3-5 key points maximum per slide
- **Supporting Details**: Brief explanations or examples
- **Visual Cues**: Suggest where charts, images, or diagrams would be beneficial
- **Transitions**: Natural flow to next slide topic
### Speaker Notes
- Include **comprehensive speaker notes** for each slide
- Provide **additional context** not covered in slide content
- Add **timing suggestions** and **delivery tips**
- Include **visual element descriptions** (charts, images, icons)
- Specify if notes apply to **specific slides** or **entire presentation**
- Add **interaction opportunities** (questions, polls, discussions)
## Quality Standards
### Content Quality
- Ensure **factual accuracy** and **current information**
- Maintain **consistent tone** throughout presentation
- Create **logical progression** between slides
- Include **actionable insights** where appropriate
- Balance **depth and accessibility** for target audience
### Formatting Consistency
- Use **uniform markdown styling** across all slides
- Maintain **consistent bullet point structure**
- Apply **appropriate heading levels**
- Ensure **readable content density**
### Language and Tone
- Generate content in the **specified language**
- Adapt **tone and complexity** to target audience
- Use **active voice** and **clear, direct language**
- Include **engaging elements** (questions, scenarios, examples)
## Special Considerations
### Slide Count Compliance
- Generate **exactly** the number of slides requested
- Distribute content **evenly** across slides
- Ensure **no slide is significantly longer** than others
- Create **balanced information flow**
### Visual Integration
- Suggest **relevant visual elements** in notes
- Indicate **optimal placement** for charts, graphs, images
- Recommend **slide layouts** for different content types
- Specify **color schemes** or **design elements** when relevant
### Interactivity Elements
- Include **audience engagement opportunities**
- Suggest **discussion points** or **questions**
- Recommend **interactive elements** (polls, breakout sessions)
- Provide **transition phrases** between sections
## Validation Checklist
Before finalizing, ensure:
- [ ] Exact number of slides generated
- [ ] All titles are plain text (no markdown)
- [ ] All slide bodies use proper markdown formatting
- [ ] Comprehensive notes provided for each slide
- [ ] Logical flow between slides
- [ ] Consistent formatting throughout
- [ ] Content appropriate for specified language
- [ ] No slide title appears in slide body
- [ ] Speaker notes clearly indicate scope (slide-specific or presentation-wide)
"""

View file

@ -144,7 +144,7 @@ const PresentationPage: React.FC<PresentationPageProps> = ({ presentation_id })
/>
<div className="flex-1 h-[calc(100vh-100px)] overflow-y-auto">
<div className="mx-auto flex flex-col items-center overflow-hidden justify-center p-2 sm:p-6 pt-0">
<div id="presentation-slides-wrapper" className="mx-auto flex flex-col items-center overflow-hidden justify-center p-2 sm:p-6 pt-0">
{!presentationData ||
loading ||
!presentationData?.slides ||

View file

@ -1,30 +1,207 @@
import { ApiError } from "@/models/errors";
import { NextRequest, NextResponse } from "next/server";
import puppeteer from "puppeteer";
import puppeteer, { ElementHandle } from "puppeteer";
import { ElementAttributes } from "@/types/element_attibutes";
export async function GET(request: NextRequest) {
export async function POST(request: NextRequest) {
let id: string;
try {
const body = await request.json();
id = body.id;
} catch (error) {
return NextResponse.json({ detail: "Invalid request body" }, { status: 400 });
const id = await getPresentationId(request);
const slides = await getSlides(id);
const slide = slides[0];
const attributes = await getAllChildElementsAttributes(slide);
console.log(attributes);
// Temporary
return NextResponse.json({
attributes: attributes,
});
} catch (error: any) {
console.error(error);
if (error instanceof ApiError) {
return NextResponse.json(error, { status: 400 });
}
return NextResponse.json({ detail: `Internal server error: ${error.message}` }, { status: 500 });
}
return NextResponse.json({ message: "Hello, world!" });
}
async function getPresentationId(request: NextRequest) {
const id = request.nextUrl.searchParams.get("id");
if (!id) {
throw new ApiError("Presentation ID not found");
}
return id;
}
async function get_presentation_page(id: string) {
async function getPresentationPage(id: string) {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
await page.setViewport({ width: 1440, height: 900, deviceScaleFactor: 1 });
await page.goto(`http://localhost/pdf-maker?id=${id}`, {
await page.setViewport({ width: 1640, height: 720, deviceScaleFactor: 1 });
await page.goto(`http://localhost/presentation?id=${id}`, {
waitUntil: "networkidle0",
timeout: 60000,
});
return page;
}
async function getSlidesWrapper(id: string): Promise<ElementHandle<Element>> {
const page = await getPresentationPage(id);
const slides_wrapper = await page.$("#presentation-slides-wrapper");
if (!slides_wrapper) {
throw new ApiError("Presentation slides not found");
}
return slides_wrapper;
}
async function getSlides(id: string) {
const slides_wrapper = await getSlidesWrapper(id);
const slides = await slides_wrapper.$$(":scope > div > div");
return slides;
}
async function getElementAttributes(element: ElementHandle<Element>): Promise<ElementAttributes> {
const attributes = await element.evaluate((el) => {
// Helper function to convert color to hex
function colorToHex(color: string): string | undefined {
if (!color || color === 'transparent' || color === 'rgba(0, 0, 0, 0)') {
return undefined;
}
// Create a temporary canvas to convert colors to hex
const canvas = document.createElement('canvas');
const ctx = canvas.getContext('2d');
if (!ctx) return color;
ctx.fillStyle = color;
return ctx.fillStyle;
}
const computedStyles = window.getComputedStyle(el);
// Parse position and dimensions
const rect = el.getBoundingClientRect();
const position = {
left: rect.left,
top: rect.top,
width: rect.width,
height: rect.height,
};
// Parse background
const backgroundColor = colorToHex(computedStyles.backgroundColor);
const backgroundOpacity = parseFloat(computedStyles.opacity);
const background = {
color: backgroundColor,
opacity: isNaN(backgroundOpacity) ? undefined : backgroundOpacity,
};
// Parse border
const borderColor = colorToHex(computedStyles.borderColor);
const borderWidth = parseFloat(computedStyles.borderWidth);
const border = borderWidth === 0 ? undefined : {
color: borderColor,
width: isNaN(borderWidth) ? undefined : borderWidth,
};
// Parse shadow (box-shadow)
const boxShadow = computedStyles.boxShadow;
let shadow = {
offset: undefined as [number, number] | undefined,
color: undefined as string | undefined,
opacity: undefined as number | undefined,
};
if (boxShadow && boxShadow !== 'none') {
const shadowParts = boxShadow.split(' ');
if (shadowParts.length >= 4) {
const offsetX = parseFloat(shadowParts[0]);
const offsetY = parseFloat(shadowParts[1]);
shadow = {
offset: (!isNaN(offsetX) && !isNaN(offsetY)) ? [offsetX, offsetY] as [number, number] : undefined,
color: colorToHex(shadowParts[3]),
opacity: 1,
};
}
}
// Parse font
const fontSize = parseFloat(computedStyles.fontSize);
const fontWeight = parseInt(computedStyles.fontWeight);
const fontColor = colorToHex(computedStyles.color);
const font = {
size: isNaN(fontSize) ? undefined : fontSize,
weight: isNaN(fontWeight) ? undefined : fontWeight,
color: fontColor,
};
// Parse margin
const marginTop = parseFloat(computedStyles.marginTop);
const marginBottom = parseFloat(computedStyles.marginBottom);
const marginLeft = parseFloat(computedStyles.marginLeft);
const marginRight = parseFloat(computedStyles.marginRight);
const margin = {
top: isNaN(marginTop) ? undefined : marginTop,
bottom: isNaN(marginBottom) ? undefined : marginBottom,
left: isNaN(marginLeft) ? undefined : marginLeft,
right: isNaN(marginRight) ? undefined : marginRight,
};
// Parse padding
const paddingTop = parseFloat(computedStyles.paddingTop);
const paddingBottom = parseFloat(computedStyles.paddingBottom);
const paddingLeft = parseFloat(computedStyles.paddingLeft);
const paddingRight = parseFloat(computedStyles.paddingRight);
const padding = {
top: isNaN(paddingTop) ? undefined : paddingTop,
bottom: isNaN(paddingBottom) ? undefined : paddingBottom,
left: isNaN(paddingLeft) ? undefined : paddingLeft,
right: isNaN(paddingRight) ? undefined : paddingRight,
};
return {
tagName: el.tagName.toLowerCase(),
id: el.id || undefined,
className: el.className || undefined,
innerText: el.textContent || undefined,
background,
border,
shadow,
font,
position,
margin,
padding,
};
});
return attributes;
}
async function getAllChildElementsAttributes(element: ElementHandle<Element>): Promise<ElementAttributes[]> {
// Get the root element's bounding rect for relative positioning
const rootRect = await element.evaluate((el) => el.getBoundingClientRect());
// Get all child elements as ElementHandles
const childElementHandles = await element.$$(':scope *');
// Get attributes for each child element using getElementAttributes
const attributesPromises = childElementHandles.map(async (childElementHandle) => {
const attributes = await getElementAttributes(childElementHandle);
// Convert positions to relative positions
if (attributes.position && attributes.position.left !== undefined && attributes.position.top !== undefined) {
attributes.position = {
left: attributes.position.left - rootRect.left,
top: attributes.position.top - rootRect.top,
width: attributes.position.width,
height: attributes.position.height,
};
}
return attributes;
});
return Promise.all(attributesPromises);
}

View file

@ -0,0 +1,7 @@
export class ApiError {
detail: string;
constructor(detail: string) {
this.detail = detail;
}
}

View file

@ -0,0 +1,42 @@
export interface ElementAttributes {
tagName: string;
id?: string;
className?: string;
innerText?: string;
background?: {
color?: string;
opacity?: number;
};
border?: {
color?: string;
width?: number;
};
shadow?: {
offset?: [number, number];
color?: string;
opacity?: number;
},
font?: {
size?: number;
weight?: number;
color?: string;
};
position?: {
left?: number;
top?: number;
width?: number;
height?: number;
};
margin?: {
top?: number;
bottom?: number;
left?: number;
right?: number;
};
padding?: {
top?: number;
bottom?: number;
left?: number;
right?: number;
};
}

View file

@ -0,0 +1,150 @@
export enum PptxBoxShapeEnum {
RECTANGLE = "rectangle",
CIRCLE = "circle"
}
export enum PptxObjectFitEnum {
CONTAIN = "contain",
COVER = "cover",
FILL = "fill"
}
export interface PptxSpacingModel {
top?: number;
bottom?: number;
left?: number;
right?: number;
}
export interface PptxPositionModel {
left?: number;
top?: number;
width?: number;
height?: number;
}
export interface PptxFontModel {
name?: string;
size?: number;
bold?: boolean;
italic?: boolean;
color?: string;
}
export interface PptxFillModel {
color: string;
}
export interface PptxStrokeModel {
color: string;
thickness: number;
}
export interface PptxShadowModel {
radius: number;
offset?: number;
color?: string;
opacity?: number;
angle?: number;
}
export interface PptxTextRunModel {
text: string;
font?: PptxFontModel;
}
export interface PptxParagraphModel {
spacing?: PptxSpacingModel;
alignment?: any;
font?: PptxFontModel;
text?: string;
text_runs?: PptxTextRunModel[];
}
export interface PptxObjectFitModel {
fit?: PptxObjectFitEnum;
focus?: [number | null, number | null];
}
export interface PptxPictureModel {
is_network: boolean;
path: string;
}
export interface PptxShapeModel {
}
export interface PptxTextBoxModel extends PptxShapeModel {
margin?: PptxSpacingModel;
fill?: PptxFillModel;
position: PptxPositionModel;
text_wrap?: boolean;
paragraphs: PptxParagraphModel[];
}
export interface PptxAutoShapeBoxModel extends PptxShapeModel {
type?: any;
margin?: PptxSpacingModel;
fill?: PptxFillModel;
stroke?: PptxStrokeModel;
shadow?: PptxShadowModel;
position: PptxPositionModel;
text_wrap?: boolean;
border_radius?: number;
paragraphs?: PptxParagraphModel[];
}
export interface PptxPictureBoxModel extends PptxShapeModel {
position: PptxPositionModel;
margin?: PptxSpacingModel;
clip?: boolean;
overlay?: string;
border_radius?: number[];
shape?: PptxBoxShapeEnum;
object_fit?: PptxObjectFitModel;
picture: PptxPictureModel;
}
export interface PptxConnectorModel extends PptxShapeModel {
type?: any;
position: PptxPositionModel;
thickness?: number;
color?: string;
}
export interface PptxSlideModel {
shapes: (PptxTextBoxModel | PptxAutoShapeBoxModel | PptxConnectorModel | PptxPictureBoxModel)[];
}
export interface PptxPresentationModel {
background_color: string;
shapes?: PptxShapeModel[];
slides: PptxSlideModel[];
}
export const createPptxSpacingAll = (num: number): PptxSpacingModel => ({
top: num,
left: num,
bottom: num,
right: num
});
export const createPptxPositionForTextbox = (left: number, top: number, width: number): PptxPositionModel => ({
left,
top,
width,
height: 100
});
export const positionToPtList = (position: PptxPositionModel): number[] => {
return [position.left || 0, position.top || 0, position.width || 0, position.height || 0];
};
export const positionToPtXyxy = (position: PptxPositionModel): number[] => {
const left = position.left || 0;
const top = position.top || 0;
const width = position.width || 0;
const height = position.height || 0;
return [left, top, left + width, top + height];
};

View file

@ -0,0 +1,12 @@
import { ApiError } from "@/models/errors";
export function wrap_errors(func: any) {
try {
return func();
} catch (error: any) {
if (error instanceof ApiError) {
throw error;
}
throw new ApiError(`Internal server error: ${error.message}`);
}
}