From a7516ee208a2dab9bc3b7514973274711ce0bb89 Mon Sep 17 00:00:00 2001 From: sauravniraula Date: Fri, 18 Jul 2025 08:16:35 +0545 Subject: [PATCH] feat(nextjs): adds basic slide scraper endpoint, feat(fastapi): adds presentation export endpoint and pptx creator service --- .../fastapi/api/v1/ppt/endpoints/outlines.py | 6 +- .../api/v1/ppt/endpoints/presentation.py | 9 + servers/fastapi/models/pptx_models.py | 158 +++++++ .../services/pptx_presentation_creator.py | 417 ++++++++++++++++++ servers/fastapi/utils/image_utils.py | 241 ++++++++++ .../generate_presentation_outlines.py | 148 ++++++- .../components/PresentationPage.tsx | 2 +- .../api/presentation_to_pptx_model/route.ts | 203 ++++++++- servers/nextjs/models/errors.ts | 7 + servers/nextjs/types/element_attibutes.ts | 42 ++ servers/nextjs/types/pptx_models.ts | 150 +++++++ servers/nextjs/utils/error_helpers.ts | 12 + 12 files changed, 1359 insertions(+), 36 deletions(-) create mode 100644 servers/fastapi/models/pptx_models.py create mode 100644 servers/fastapi/services/pptx_presentation_creator.py create mode 100644 servers/fastapi/utils/image_utils.py create mode 100644 servers/nextjs/models/errors.ts create mode 100644 servers/nextjs/types/element_attibutes.ts create mode 100644 servers/nextjs/types/pptx_models.ts create mode 100644 servers/nextjs/utils/error_helpers.ts diff --git a/servers/fastapi/api/v1/ppt/endpoints/outlines.py b/servers/fastapi/api/v1/ppt/endpoints/outlines.py index dc47bb11..6041bb7d 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/outlines.py +++ b/servers/fastapi/api/v1/ppt/endpoints/outlines.py @@ -37,9 +37,9 @@ async def stream_outlines(presentation_id: str): ).to_string() presentation_content_text += chunk - presentation_content = PresentationOutlineModel.model_validate_json( - presentation_content_text - ) + presentation_content_json = json.loads(presentation_content_text) + + presentation_content = PresentationOutlineModel(**presentation_content_json) presentation_content.slides = presentation_content.slides[ : presentation.n_slides ] diff --git a/servers/fastapi/api/v1/ppt/endpoints/presentation.py b/servers/fastapi/api/v1/ppt/endpoints/presentation.py index 42eb3c0e..b09a103e 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/presentation.py +++ b/servers/fastapi/api/v1/ppt/endpoints/presentation.py @@ -8,6 +8,7 @@ from fastapi.responses import StreamingResponse from sqlalchemy import delete from sqlmodel import select +from models.pptx_models import PptxPresentationModel from models.presentation_outline_model import SlideOutlineModel from models.presentation_layout import PresentationLayoutModel from models.presentation_structure_model import PresentationStructureModel @@ -18,6 +19,7 @@ from services import TEMP_FILE_SERVICE from services.database import get_sql_session from services.documents_loader import DocumentsLoader from models.sql.presentation import PresentationModel +from services.pptx_presentation_creator import PptxPresentationCreator from utils.llm_calls.generate_document_summary import generate_document_summary from utils.llm_calls.generate_presentation_structure import ( generate_presentation_structure, @@ -272,3 +274,10 @@ def update_presentation( **presentation.model_dump(), slides=updated_slides, ) + + +@PRESENTATION_ROUTER.post("/export/pptx") +def create_pptx(pptx_model: Annotated[PptxPresentationModel, Body()]): + pptx_creator = PptxPresentationCreator(pptx_model) + pptx_creator.create_ppt() + pptx_creator.save(pptx_model.id) diff --git a/servers/fastapi/models/pptx_models.py b/servers/fastapi/models/pptx_models.py new file mode 100644 index 00000000..073576c7 --- /dev/null +++ b/servers/fastapi/models/pptx_models.py @@ -0,0 +1,158 @@ +from enum import Enum +from typing import Annotated, List, Optional +from annotated_types import Len +from pydantic import BaseModel +from pptx.util import Pt +from pptx.enum.text import PP_ALIGN +from pptx.enum.shapes import MSO_AUTO_SHAPE_TYPE, MSO_CONNECTOR_TYPE + + +class PptxBoxShapeEnum(Enum): + RECTANGLE = "rectangle" + CIRCLE = "circle" + + +class PptxObjectFitEnum(Enum): + CONTAIN = "contain" + COVER = "cover" + FILL = "fill" + + +class PptxSpacingModel(BaseModel): + top: int = 0 + bottom: int = 0 + left: int = 0 + right: int = 0 + + @classmethod + def all(cls, num: int): + return PptxSpacingModel(top=num, left=num, bottom=num, right=num) + + +class PptxPositionModel(BaseModel): + left: int = 0 + top: int = 0 + width: int = 0 + height: int = 0 + + @classmethod + def for_textbox(cls, left: int, top: int, width: int): + return cls(left=left, top=top, width=width, height=100) + + def to_pt_list(self) -> List[int]: + return [Pt(self.left), Pt(self.top), Pt(self.width), Pt(self.height)] + + def to_pt_xyxy(self) -> List[int]: + return [ + Pt(self.left), + Pt(self.top), + Pt(self.left + self.width), + Pt(self.top + self.height), + ] + + +class PptxFontModel(BaseModel): + name: str = "Inter" + size: int = 16 + bold: bool = False + italic: bool = False + color: str = "000000" + + +class PptxFillModel(BaseModel): + color: str + + +class PptxStrokeModel(BaseModel): + color: str + thickness: float + + +class PptxShadowModel(BaseModel): + radius: int + offset: int = 0 + color: str = "000000" + opacity: float = 0.5 + angle: int = 0 + + +class PptxTextRunModel(BaseModel): + text: str + font: Optional[PptxFontModel] = None + + +class PptxParagraphModel(BaseModel): + spacing: Optional[PptxSpacingModel] = None + alignment: Optional[PP_ALIGN] = None + font: Optional[PptxFontModel] = None + text: Optional[str] = None + text_runs: Optional[List[PptxTextRunModel]] = None + + +class PptxObjectFitModel(BaseModel): + fit: Optional[PptxObjectFitEnum] = None + focus: Optional[ + Annotated[List[Optional[float]], Len(min_length=2, max_length=2)] + ] = None + + +class PptxPictureModel(BaseModel): + is_network: bool + path: str + + +class PptxShapeModel(BaseModel): + pass + + +class PptxTextBoxModel(PptxShapeModel): + margin: Optional[PptxSpacingModel] = None + fill: Optional[PptxFillModel] = None + position: PptxPositionModel + text_wrap: bool = True + paragraphs: List[PptxParagraphModel] + + +class PptxAutoShapeBoxModel(PptxShapeModel): + type: MSO_AUTO_SHAPE_TYPE = MSO_AUTO_SHAPE_TYPE.RECTANGLE + margin: Optional[PptxSpacingModel] = None + fill: Optional[PptxFillModel] = None + stroke: Optional[PptxStrokeModel] = None + shadow: Optional[PptxShadowModel] = None + position: PptxPositionModel + text_wrap: bool = True + border_radius: Optional[int] = None + paragraphs: Optional[List[PptxParagraphModel]] = None + + +class PptxPictureBoxModel(PptxShapeModel): + position: PptxPositionModel + margin: Optional[PptxSpacingModel] = None + clip: bool = True + overlay: Optional[str] = None + border_radius: Optional[List[int]] = None + shape: Optional[PptxBoxShapeEnum] = None + object_fit: Optional[PptxObjectFitModel] = None + picture: PptxPictureModel + + +class PptxConnectorModel(PptxShapeModel): + type: MSO_CONNECTOR_TYPE = MSO_CONNECTOR_TYPE.STRAIGHT + position: PptxPositionModel + thickness: float = 0.5 + color: str = "000000" + + +class PptxSlideModel(BaseModel): + shapes: List[ + PptxTextBoxModel + | PptxAutoShapeBoxModel + | PptxConnectorModel + | PptxPictureBoxModel + ] + + +class PptxPresentationModel(BaseModel): + name: str + shapes: Optional[List[PptxShapeModel]] = None + slides: List[PptxSlideModel] diff --git a/servers/fastapi/services/pptx_presentation_creator.py b/servers/fastapi/services/pptx_presentation_creator.py new file mode 100644 index 00000000..9d9b3e41 --- /dev/null +++ b/servers/fastapi/services/pptx_presentation_creator.py @@ -0,0 +1,417 @@ +import os +from typing import List, Optional +import uuid +from lxml import etree + +from pptx import Presentation +from pptx.shapes.autoshape import Shape +from pptx.slide import Slide +from pptx.text.text import _Paragraph, TextFrame, Font, _Run +from pptx.opc.constants import RELATIONSHIP_TYPE as RT +from lxml.etree import fromstring, tostring +from PIL import Image + +from pptx.util import Pt +from pptx.dml.color import RGBColor + +from models.pptx_models import ( + PptxAutoShapeBoxModel, + PptxBoxShapeEnum, + PptxConnectorModel, + PptxFillModel, + PptxFontModel, + PptxParagraphModel, + PptxPictureBoxModel, + PptxPositionModel, + PptxPresentationModel, + PptxShadowModel, + PptxSlideModel, + PptxSpacingModel, + PptxStrokeModel, + PptxTextBoxModel, + PptxTextRunModel, +) +from utils.image_utils import ( + change_image_color, + clip_image, + create_circle_image, + fit_image, + round_image_corners, +) + +BLANK_SLIDE_LAYOUT = 6 + + +class PptxPresentationCreator: + + def __init__(self, ppt_model: PptxPresentationModel, temp_dir: str): + self._temp_dir = temp_dir + + self._ppt_model = ppt_model + self._slide_models = ppt_model.slides + + self._ppt = Presentation() + self._ppt.slide_width = Pt(1280) + self._ppt.slide_height = Pt(720) + + def create_ppt(self): + + for slide_model in self._slide_models: + # Adding global shapes to slide + if self._ppt_model.shapes: + slide_model.shapes.append(self._ppt_model.shapes) + + self.add_and_populate_slide(slide_model) + + def set_presentation_theme(self): + slide_master = self._ppt.slide_master + slide_master_part = slide_master.part + + theme_part = slide_master_part.part_related_by(RT.THEME) + theme = fromstring(theme_part.blob) + + theme_colors = self._theme.colors.theme_color_mapping + nsmap = {"a": "http://schemas.openxmlformats.org/drawingml/2006/main"} + + for color_name, hex_value in theme_colors.items(): + if color_name: + color_element = theme.xpath( + f"a:themeElements/a:clrScheme/a:{color_name}/a:srgbClr", + namespaces=nsmap, + )[0] + color_element.set("val", hex_value.encode("utf-8")) + + theme_part._blob = tostring(theme) + + def add_and_populate_slide(self, slide_model: PptxSlideModel): + slide = self._ppt.slides.add_slide(self._ppt.slide_layouts[BLANK_SLIDE_LAYOUT]) + + for shape_model in slide_model.shapes: + model_type = type(shape_model) + + if model_type is PptxPictureBoxModel: + self.add_picture(slide, shape_model) + + elif model_type is PptxAutoShapeBoxModel: + self.add_autoshape(slide, shape_model) + + elif model_type is PptxTextBoxModel: + self.add_textbox(slide, shape_model) + + elif model_type is PptxConnectorModel: + self.add_connector(slide, shape_model) + + def add_connector(self, slide: Slide, connector_model: PptxConnectorModel): + if connector_model.thickness == 0: + return + connector_shape = slide.shapes.add_connector( + connector_model.type, *connector_model.position.to_pt_xyxy() + ) + connector_shape.line.width = Pt(connector_model.thickness) + connector_shape.line.color.rgb = RGBColor.from_string(connector_model.color) + + def add_picture(self, slide: Slide, picture_model: PptxPictureBoxModel): + image_path = picture_model.picture.path + if ( + picture_model.clip + or picture_model.border_radius + or picture_model.overlay + or picture_model.object_fit + or picture_model.shape + ): + try: + image = Image.open(image_path) + except: + print(f"Could not open image: {image_path}") + return + + image = image.convert("RGBA") + # ? Applying border radius twice to support both clip and object fit + if picture_model.border_radius: + image = round_image_corners(image, picture_model.border_radius) + if picture_model.object_fit: + image = fit_image( + image, + picture_model.position.width, + picture_model.position.height, + picture_model.object_fit, + ) + elif picture_model.clip: + image = clip_image( + image, + picture_model.position.width, + picture_model.position.height, + ) + if picture_model.border_radius: + image = round_image_corners(image, picture_model.border_radius) + if picture_model.shape == PptxBoxShapeEnum.CIRCLE: + image = create_circle_image(image) + if picture_model.overlay: + image = change_image_color(image, picture_model.overlay) + image_path = os.path.join(self._temp_dir, f"{str(uuid.uuid4())}.png") + image.save(image_path) + + margined_position = self.get_margined_position( + picture_model.position, picture_model.margin + ) + + slide.shapes.add_picture(image_path, *margined_position.to_pt_list()) + + def add_autoshape(self, slide: Slide, autoshape_box_model: PptxAutoShapeBoxModel): + position = autoshape_box_model.position + if autoshape_box_model.margin: + position = self.get_margined_position(position, autoshape_box_model.margin) + + autoshape = slide.shapes.add_shape( + autoshape_box_model.type, *position.to_pt_list() + ) + + textbox = autoshape.text_frame + textbox.word_wrap = autoshape_box_model.text_wrap + + self.apply_fill_to_shape(autoshape, autoshape_box_model.fill) + self.apply_margin_to_text_box(textbox, autoshape_box_model.margin) + self.apply_stroke_to_shape(autoshape, autoshape_box_model.stroke) + self.apply_shadow_to_shape(autoshape, autoshape_box_model.shadow) + self.apply_border_radius_to_shape(autoshape, autoshape_box_model.border_radius) + + if autoshape_box_model.paragraphs: + self.add_paragraphs(textbox, autoshape_box_model.paragraphs) + + def add_textbox(self, slide: Slide, textbox_model: PptxTextBoxModel): + position = textbox_model.position + textbox_shape = slide.shapes.add_textbox(*position.to_pt_list()) + textbox_shape.width += Pt(2) + + textbox = textbox_shape.text_frame + textbox.word_wrap = textbox_model.text_wrap + + self.apply_fill_to_shape(textbox_shape, textbox_model.fill) + self.apply_margin_to_text_box(textbox, textbox_model.margin) + self.add_paragraphs(textbox, textbox_model.paragraphs) + + def add_paragraphs( + self, textbox: TextFrame, paragraph_models: List[PptxParagraphModel] + ): + for index, paragraph_model in enumerate(paragraph_models): + paragraph = textbox.add_paragraph() if index > 0 else textbox.paragraphs[0] + self.populate_paragraph(paragraph, paragraph_model) + + def populate_paragraph( + self, paragraph: _Paragraph, paragraph_model: PptxParagraphModel + ): + if paragraph_model.spacing: + self.apply_spacing_to_paragraph(paragraph, paragraph_model.spacing) + + if paragraph_model.alignment: + paragraph.alignment = paragraph_model.alignment + + if paragraph_model.font: + self.apply_font_to_paragraph(paragraph, paragraph_model.font) + + text_runs = [] + if paragraph_model.text: + text_runs = self.parse_markdown_text_to_text_runs( + paragraph_model.font, paragraph_model.text + ) + elif paragraph_model.text_runs: + text_runs = paragraph_model.text_runs + + for text_run_model in text_runs: + text_run = paragraph.add_run() + self.populate_text_run(text_run, text_run_model) + + def parse_markdown_text_to_text_runs(self, font: PptxFontModel, text: str): + text_runs = [] + for line in text.split("\n"): + current_pos = 0 + while current_pos < len(line): + # Check for bold and italic (***text***) + if ( + line[current_pos:].startswith("***") + and "***" in line[current_pos + 3 :] + ): + end_pos = line.find("***", current_pos + 3) + text_content = line[current_pos + 3 : end_pos] + font_json = font.model_dump() + font_json["bold"] = True + font_json["italic"] = True + text_runs.append( + PptxTextRunModel( + text=text_content, font=PptxFontModel(**font_json) + ) + ) + current_pos = end_pos + 3 + # Check for bold (**text**) + elif ( + line[current_pos:].startswith("**") + and "**" in line[current_pos + 2 :] + ): + end_pos = line.find("**", current_pos + 2) + text_content = line[current_pos + 2 : end_pos] + font_json = font.model_dump() + font_json["bold"] = True + text_runs.append( + PptxTextRunModel( + text=text_content, font=PptxFontModel(**font_json) + ) + ) + current_pos = end_pos + 2 + # Check for italic (*text*) + elif ( + line[current_pos:].startswith("__") + and "__" in line[current_pos + 2 :] + ): + end_pos = line.find("__", current_pos + 2) + text_content = line[current_pos + 2 : end_pos] + font_json = font.model_dump() + font_json["italic"] = True + text_runs.append( + PptxTextRunModel( + text=text_content, font=PptxFontModel(**font_json) + ) + ) + current_pos = end_pos + 2 + else: + # Find the next formatting marker or end of line + next_marker = float("inf") + for marker in ["***", "**", "__"]: + pos = line.find(marker, current_pos) + if pos != -1: + next_marker = min(next_marker, pos) + + end_pos = next_marker if next_marker != float("inf") else len(line) + text_content = line[current_pos:end_pos] + if text_content: # Only add non-empty text + text_runs.append(PptxTextRunModel(text=text_content, font=font)) + current_pos = end_pos + + # Add newline if not the last line + if line != text.split("\n")[-1]: + text_runs.append(PptxTextRunModel(text="\n")) + + return text_runs + + def populate_text_run(self, text_run: _Run, text_run_model: PptxTextRunModel): + text_run.text = text_run_model.text + if text_run_model.font: + self.apply_font(text_run.font, text_run_model.font) + + def apply_border_radius_to_shape(self, shape: Shape, border_radius: Optional[int]): + if not border_radius: + return + try: + normalized_border_radius = Pt(border_radius) / min( + shape.width, shape.height + ) + shape.adjustments[0] = normalized_border_radius + except: + print("Could not apply border radius.") + + def apply_fill_to_shape(self, shape: Shape, fill: Optional[PptxFillModel] = None): + if not fill: + shape.fill.background() + else: + shape.fill.solid() + shape.fill.fore_color.rgb = RGBColor.from_string(fill.color) + + def apply_stroke_to_shape( + self, shape: Shape, stroke: Optional[PptxStrokeModel] = None + ): + if not stroke or stroke.thickness == 0: + shape.line.fill.background() + else: + shape.line.fill.solid() + shape.line.fill.fore_color.rgb = RGBColor.from_string(stroke.color) + shape.line.width = Pt(stroke.thickness) + + def apply_shadow_to_shape( + self, shape: Shape, shadow: Optional[PptxShadowModel] = None + ): + + # Access the XML for the shape + sp_element = shape._element + sp_pr = sp_element.xpath("p:spPr")[0] # Shape properties XML element + + nsmap = sp_pr.nsmap + + # # Remove existing shadow effects if present + effect_list = sp_pr.find("a:effectLst", namespaces=nsmap) + if effect_list: + old_shadow = effect_list.find("a:outerShdw") + if old_shadow: + effect_list.remove( + old_shadow, namespaces=nsmap + ) # Remove the old shadow + + if not shadow: + return + + if not effect_list: + effect_list = etree.SubElement( + sp_pr, f"{{{nsmap['a']}}}effectLst", nsmap=nsmap + ) + + outer_shadow = etree.SubElement( + effect_list, + f"{{{nsmap['a']}}}outerShdw", + { + "blurRad": f"{Pt(shadow.radius)}", + "dir": f"{shadow.angle * 1000}", + "dist": f"{Pt(shadow.offset)}", + "rotWithShape": "0", + }, + nsmap=nsmap, + ) + color_element = etree.SubElement( + outer_shadow, + f"{{{nsmap['a']}}}srgbClr", + {"val": f"{shadow.color}"}, + nsmap=nsmap, + ) + etree.SubElement( + color_element, + f"{{{nsmap['a']}}}alpha", + {"val": f"{int(shadow.opacity * 100000)}"}, + nsmap=nsmap, + ) + + def get_margined_position( + self, position: PptxPositionModel, margin: Optional[PptxSpacingModel] + ) -> PptxPositionModel: + if not margin: + return position + + left = position.left + margin.left + top = position.top + margin.top + width = max(position.width - margin.left - margin.right, 0) + height = max(position.height - margin.top - margin.bottom, 0) + + return PptxPositionModel(left=left, top=top, width=width, height=height) + + def apply_margin_to_text_box( + self, text_frame: TextFrame, margin: Optional[PptxSpacingModel] + ) -> PptxPositionModel: + text_frame.margin_left = Pt(margin.left if margin else 0) + text_frame.margin_right = Pt(margin.right if margin else 0) + text_frame.margin_top = Pt(margin.top if margin else 0) + text_frame.margin_bottom = Pt(margin.bottom if margin else 0) + + def apply_spacing_to_paragraph( + self, paragraph: _Paragraph, spacing: PptxSpacingModel + ): + paragraph.space_before = Pt(spacing.top) + paragraph.space_after = Pt(spacing.bottom) + + def apply_font_to_paragraph(self, paragraph: _Paragraph, font: PptxFontModel): + self.apply_font(paragraph.font, font) + + def apply_font(self, font: Font, font_model: PptxFontModel): + font.name = font_model.name + font.color.rgb = RGBColor.from_string(font_model.color) + font.bold = font_model.bold + font.italic = font_model.italic + font.size = Pt(font_model.size) + + def save(self, path: str): + self._ppt.save(path) diff --git a/servers/fastapi/utils/image_utils.py b/servers/fastapi/utils/image_utils.py new file mode 100644 index 00000000..b0d26057 --- /dev/null +++ b/servers/fastapi/utils/image_utils.py @@ -0,0 +1,241 @@ +from typing import List + +from PIL import Image, ImageDraw + +from models.pptx_models import PptxObjectFitEnum, PptxObjectFitModel + + +def clip_image( + image: Image.Image, + width: int, + height: int, + focus_x: float = 50.0, + focus_y: float = 50.0, +) -> Image.Image: + img_width, img_height = image.size + + img_aspect = img_width / img_height + box_aspect = width / height + + if img_aspect > box_aspect: + new_height = height + new_width = int(new_height * img_aspect) + else: + new_width = width + new_height = int(new_width / img_aspect) + + resized_image = image.resize((new_width, new_height), Image.LANCZOS) + + # Calculate clipping position based on focus + # Convert focus percentages (0-100) to position in the resized image + focus_x = max(0.0, min(100.0, focus_x)) # Clamp to 0-100 range + focus_y = max(0.0, min(100.0, focus_y)) # Clamp to 0-100 range + + # Calculate the center point based on focus + center_x = int((new_width - width) * (focus_x / 100.0)) + center_y = int((new_height - height) * (focus_y / 100.0)) + + # Calculate clipping box + left = center_x + top = center_y + right = left + width + bottom = top + height + + clipped_image = resized_image.crop((left, top, right, bottom)) + + return clipped_image + + +def round_image_corners(image: Image.Image, radii: List[int]) -> Image.Image: + if len(radii) != 4: + raise ValueError( + "Image Border Radius - radii must contain exactly 4 values for each corner" + ) + + w, h = image.size + # Ensure the image has an alpha channel (RGBA) + if image.mode != "RGBA": + image = image.convert("RGBA") + + # Create a mask for the rounded corners (start with fully transparent) + rounded_mask = Image.new("L", image.size, 0) + + # Create a rectangular mask (fully opaque) + rectangular_mask = Image.new("L", image.size, 255) + + # Process each corner + for i, radius in enumerate(radii): + if radius > 0: # Only process if radius is positive + # Create a circle for this radius + circle = Image.new("L", (radius * 2, radius * 2), 0) + draw = ImageDraw.Draw(circle) + draw.ellipse((0, 0, radius * 2 - 1, radius * 2 - 1), fill=255) + + # Calculate position based on corner index + if i == 0: # top-left + rounded_mask.paste(circle.crop((0, 0, radius, radius)), (0, 0)) + rectangular_mask.paste(0, (0, 0, radius, radius)) + elif i == 1: # top-right + rounded_mask.paste( + circle.crop((radius, 0, radius * 2, radius)), (w - radius, 0) + ) + rectangular_mask.paste(0, (w - radius, 0, w, radius)) + elif i == 2: # bottom-right + rounded_mask.paste( + circle.crop((radius, radius, radius * 2, radius * 2)), + (w - radius, h - radius), + ) + rectangular_mask.paste(0, (w - radius, h - radius, w, h)) + else: # bottom-left + rounded_mask.paste( + circle.crop((0, radius, radius, radius * 2)), (0, h - radius) + ) + rectangular_mask.paste(0, (0, h - radius, radius, h)) + + # Get the original alpha channel + original_alpha = image.getchannel("A") + + # Combine the rectangular mask with the rounded corners + corner_mask = Image.composite(rounded_mask, rectangular_mask, rounded_mask) + + # Combine the corner mask with the original alpha channel + final_alpha = Image.composite( + original_alpha, Image.new("L", image.size, 0), corner_mask + ) + + # Create a new image with the modified alpha channel + result = Image.new("RGBA", image.size) + result.paste(image.convert("RGB"), (0, 0)) + result.putalpha(final_alpha) + + return result + + +def change_image_color(img: Image.Image, color: str) -> Image.Image: + # r, g, b, alpha = img.split() + + # color_overlay = Image.new("RGBA", img.size, color=f"#{color}") + # return Image.composite(color_overlay, img, alpha) + if color.startswith("#"): + color = color[1:] + r_new = int(color[:2], 16) + g_new = int(color[2:4], 16) + b_new = int(color[4:], 16) + + # Get image data + data = img.getdata() + + # Process each pixel + new_data = [] + for item in data: + # Get current pixel values + r, g, b, a = item + + # Apply new color while preserving transparency + if a != 0: # Skip fully transparent pixels + new_data.append((r_new, g_new, b_new, a)) + else: + new_data.append((0, 0, 0, 0)) + + # Create new image with modified data + new_img = Image.new("RGBA", img.size) + new_img.putdata(new_data) + return new_img + + +def create_circle_image( + image: Image.Image, +) -> Image.Image: + # Convert to RGBA if not already + img = image.convert("RGBA") + # Get the original image size + size = img.size + # Use the smaller dimension for the circle + circle_size = min(size) + # Create a transparent image of the same size as original + mask = Image.new("RGBA", size, color=(0, 0, 0, 0)) + draw = ImageDraw.Draw(mask) + + # Calculate center position + center_x = size[0] // 2 + center_y = size[1] // 2 + radius = circle_size // 2 + + # Create a circular mask + draw.ellipse( + ( + center_x - radius, + center_y - radius, + center_x + radius, + center_y + radius, + ), + fill=(255, 255, 255, 255), + ) + + # Apply the circular mask + result = Image.composite(img, mask, mask) + return result + + +def fit_image( + image: Image.Image, width: int, height: int, object_fit: PptxObjectFitModel +) -> Image.Image: + if not object_fit.fit: + return image + + img_width, img_height = image.size + img_aspect = img_width / img_height + box_aspect = width / height + + if object_fit.fit == PptxObjectFitEnum.CONTAIN: + # Scale image to fit within the box while maintaining aspect ratio + if img_aspect > box_aspect: + new_width = width + new_height = int(width / img_aspect) + else: + new_height = height + new_width = int(height * img_aspect) + resized_image = image.resize((new_width, new_height), Image.LANCZOS) + + # Use focus point for positioning if available + focus_x = 50.0 + focus_y = 50.0 + if object_fit.focus and len(object_fit.focus) == 2: + focus_x, focus_y = object_fit.focus[0], object_fit.focus[1] + + # Calculate paste position based on focus + paste_x = int((width - new_width) * (focus_x / 100.0)) + paste_y = int((height - new_height) * (focus_y / 100.0)) + + result = Image.new("RGBA", (width, height), (0, 0, 0, 0)) + result.paste(resized_image, (paste_x, paste_y)) + return result + + elif object_fit.fit == PptxObjectFitEnum.COVER: + # Scale image to cover the box while maintaining aspect ratio + if img_aspect > box_aspect: + new_height = height + new_width = int(height * img_aspect) + else: + new_width = width + new_height = int(width / img_aspect) + resized_image = image.resize((new_width, new_height), Image.LANCZOS) + + # Use focus point for positioning if available + focus_x = 50.0 + focus_y = 50.0 + if object_fit.focus and len(object_fit.focus) == 2: + focus_x, focus_y = object_fit.focus[0], object_fit.focus[1] + + # Calculate paste position based on focus + paste_x = int((new_width - width) * (focus_x / 100.0)) + paste_y = int((new_height - height) * (focus_y / 100.0)) + + # Clip the image to the box size + return resized_image.crop((paste_x, paste_y, paste_x + width, paste_y + height)) + + elif object_fit.fit == PptxObjectFitEnum.FILL: + # Stretch image to fill the box exactly + return image.resize((width, height), Image.LANCZOS) + + return image diff --git a/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py b/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py index 2b772bc5..f3fac493 100644 --- a/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py +++ b/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py @@ -11,28 +11,138 @@ from utils.llm_provider import ( is_google_selected, ) +# system_prompt = """ +# Create a presentation based on the provided prompt, number of slides, output language, and additional informational details. +# Format the output in the specified JSON schema with structured markdown content. + +# # Steps + +# 1. Identify key points from the provided prompt, including the topic, number of slides, output language, and additional content directions. +# 2. Create a concise and descriptive title reflecting the main topic, adhering to the specified language. +# 3. Generate a clear title for each slide. +# 4. Develop comprehensive content using markdown structure: +# * Use bullet points (- or *) for lists. +# * Use **bold** for emphasis, *italic* for secondary emphasis, and `code` for technical terms. +# 5. Provide important points from prompt as notes. + +# # Notes +# - Content must be generated for every slide. +# - Images or Icons information provided in **Input** must be included in the **notes**. +# - Notes should cleary define if it is for specific slide or for the presentation. +# - Slide **body** should not contain slide **title**. +# - Slide **title** should not contain "Slide 1", "Slide 2", etc. +# - Slide **title** should not be in markdown format. +# - There must be exact **Number of Slides** as specified. +# """ system_prompt = """ - Create a presentation based on the provided prompt, number of slides, output language, and additional informational details. - Format the output in the specified JSON schema with structured markdown content. +You are an expert presentation creator. Generate structured presentations based on user requirements and format them according to the specified JSON schema with markdown content. - # Steps +## Core Requirements - 1. Identify key points from the provided prompt, including the topic, number of slides, output language, and additional content directions. - 2. Create a concise and descriptive title reflecting the main topic, adhering to the specified language. - 3. Generate a clear title for each slide. - 4. Develop comprehensive content using markdown structure: - * Use bullet points (- or *) for lists. - * Use **bold** for emphasis, *italic* for secondary emphasis, and `code` for technical terms. - 5. Provide important points from prompt as notes. - - # Notes - - Content must be generated for every slide. - - Images or Icons information provided in **Input** must be included in the **notes**. - - Notes should cleary define if it is for specific slide or for the presentation. - - Slide **body** should not contain slide **title**. - - Slide **title** should not contain "Slide 1", "Slide 2", etc. - - Slide **title** should not be in markdown format. - - There must be exact **Number of Slides** as specified. +### Input Processing +1. **Extract key information** from the user's prompt: + - Main topic/subject matter + - Required number of slides + - Target language for output + - Specific content requirements or focus areas + - Target audience (if specified) + - Presentation style or tone preferences + + +## Content Generation Guidelines + +### Presentation Title +- Create a **concise, descriptive title** that captures the essence of the topic +- Use **plain text format** (no markdown formatting) +- Make it **engaging and professional** +- Ensure it reflects the main theme and target audience + +### Slide Titles +- Generate **clear, specific titles** for each slide +- Use **plain text format** (no markdown, no "Slide 1", "Slide 2" prefixes) +- Make each title **descriptive and informative** +- Ensure titles create a **logical flow** through the presentation +- Keep titles **concise but meaningful** + +### Slide Body Content +- Use **full markdown formatting** for rich content structure +- Apply consistent formatting: + - `**bold**` for key concepts and emphasis + - `*italic*` for secondary emphasis or definitions + - `- or *` for bullet points and lists + - `> ` for quotes or callouts + - `### ` for subsections within slides + - ``` for code blocks (when applicable) + - `inline code` for technical terms or specific terminology + +### Content Structure Per Slide +- **Opening/Hook**: Start with engaging content +- **Main Points**: 3-5 key points maximum per slide +- **Supporting Details**: Brief explanations or examples +- **Visual Cues**: Suggest where charts, images, or diagrams would be beneficial +- **Transitions**: Natural flow to next slide topic + +### Speaker Notes +- Include **comprehensive speaker notes** for each slide +- Provide **additional context** not covered in slide content +- Add **timing suggestions** and **delivery tips** +- Include **visual element descriptions** (charts, images, icons) +- Specify if notes apply to **specific slides** or **entire presentation** +- Add **interaction opportunities** (questions, polls, discussions) + +## Quality Standards + +### Content Quality +- Ensure **factual accuracy** and **current information** +- Maintain **consistent tone** throughout presentation +- Create **logical progression** between slides +- Include **actionable insights** where appropriate +- Balance **depth and accessibility** for target audience + +### Formatting Consistency +- Use **uniform markdown styling** across all slides +- Maintain **consistent bullet point structure** +- Apply **appropriate heading levels** +- Ensure **readable content density** + +### Language and Tone +- Generate content in the **specified language** +- Adapt **tone and complexity** to target audience +- Use **active voice** and **clear, direct language** +- Include **engaging elements** (questions, scenarios, examples) + +## Special Considerations + +### Slide Count Compliance +- Generate **exactly** the number of slides requested +- Distribute content **evenly** across slides +- Ensure **no slide is significantly longer** than others +- Create **balanced information flow** + +### Visual Integration +- Suggest **relevant visual elements** in notes +- Indicate **optimal placement** for charts, graphs, images +- Recommend **slide layouts** for different content types +- Specify **color schemes** or **design elements** when relevant + +### Interactivity Elements +- Include **audience engagement opportunities** +- Suggest **discussion points** or **questions** +- Recommend **interactive elements** (polls, breakout sessions) +- Provide **transition phrases** between sections + +## Validation Checklist + +Before finalizing, ensure: +- [ ] Exact number of slides generated +- [ ] All titles are plain text (no markdown) +- [ ] All slide bodies use proper markdown formatting +- [ ] Comprehensive notes provided for each slide +- [ ] Logical flow between slides +- [ ] Consistent formatting throughout +- [ ] Content appropriate for specified language +- [ ] No slide title appears in slide body +- [ ] Speaker notes clearly indicate scope (slide-specific or presentation-wide) """ diff --git a/servers/nextjs/app/(presentation-generator)/presentation/components/PresentationPage.tsx b/servers/nextjs/app/(presentation-generator)/presentation/components/PresentationPage.tsx index 87f34916..18ec7f59 100644 --- a/servers/nextjs/app/(presentation-generator)/presentation/components/PresentationPage.tsx +++ b/servers/nextjs/app/(presentation-generator)/presentation/components/PresentationPage.tsx @@ -144,7 +144,7 @@ const PresentationPage: React.FC = ({ presentation_id }) />
-
+
{!presentationData || loading || !presentationData?.slides || diff --git a/servers/nextjs/app/api/presentation_to_pptx_model/route.ts b/servers/nextjs/app/api/presentation_to_pptx_model/route.ts index 2adde0fa..716e07e2 100644 --- a/servers/nextjs/app/api/presentation_to_pptx_model/route.ts +++ b/servers/nextjs/app/api/presentation_to_pptx_model/route.ts @@ -1,30 +1,207 @@ +import { ApiError } from "@/models/errors"; import { NextRequest, NextResponse } from "next/server"; -import puppeteer from "puppeteer"; +import puppeteer, { ElementHandle } from "puppeteer"; +import { ElementAttributes } from "@/types/element_attibutes"; + + +export async function GET(request: NextRequest) { -export async function POST(request: NextRequest) { - let id: string; try { - const body = await request.json(); - id = body.id; - } catch (error) { - return NextResponse.json({ detail: "Invalid request body" }, { status: 400 }); + const id = await getPresentationId(request); + const slides = await getSlides(id); + const slide = slides[0]; + const attributes = await getAllChildElementsAttributes(slide); + console.log(attributes); + + // Temporary + return NextResponse.json({ + attributes: attributes, + }); + } catch (error: any) { + console.error(error); + if (error instanceof ApiError) { + return NextResponse.json(error, { status: 400 }); + } + return NextResponse.json({ detail: `Internal server error: ${error.message}` }, { status: 500 }); } - - - return NextResponse.json({ message: "Hello, world!" }); } +async function getPresentationId(request: NextRequest) { + const id = request.nextUrl.searchParams.get("id"); + if (!id) { + throw new ApiError("Presentation ID not found"); + } + return id; +} -async function get_presentation_page(id: string) { +async function getPresentationPage(id: string) { const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'] }); const page = await browser.newPage(); - await page.setViewport({ width: 1440, height: 900, deviceScaleFactor: 1 }); - await page.goto(`http://localhost/pdf-maker?id=${id}`, { + await page.setViewport({ width: 1640, height: 720, deviceScaleFactor: 1 }); + await page.goto(`http://localhost/presentation?id=${id}`, { waitUntil: "networkidle0", timeout: 60000, }); return page; +} + +async function getSlidesWrapper(id: string): Promise> { + const page = await getPresentationPage(id); + const slides_wrapper = await page.$("#presentation-slides-wrapper"); + if (!slides_wrapper) { + throw new ApiError("Presentation slides not found"); + } + return slides_wrapper; +} + +async function getSlides(id: string) { + const slides_wrapper = await getSlidesWrapper(id); + const slides = await slides_wrapper.$$(":scope > div > div"); + return slides; +} + +async function getElementAttributes(element: ElementHandle): Promise { + const attributes = await element.evaluate((el) => { + // Helper function to convert color to hex + function colorToHex(color: string): string | undefined { + if (!color || color === 'transparent' || color === 'rgba(0, 0, 0, 0)') { + return undefined; + } + + // Create a temporary canvas to convert colors to hex + const canvas = document.createElement('canvas'); + const ctx = canvas.getContext('2d'); + if (!ctx) return color; + + ctx.fillStyle = color; + return ctx.fillStyle; + } + + const computedStyles = window.getComputedStyle(el); + + // Parse position and dimensions + const rect = el.getBoundingClientRect(); + const position = { + left: rect.left, + top: rect.top, + width: rect.width, + height: rect.height, + }; + + // Parse background + const backgroundColor = colorToHex(computedStyles.backgroundColor); + const backgroundOpacity = parseFloat(computedStyles.opacity); + const background = { + color: backgroundColor, + opacity: isNaN(backgroundOpacity) ? undefined : backgroundOpacity, + }; + + // Parse border + const borderColor = colorToHex(computedStyles.borderColor); + const borderWidth = parseFloat(computedStyles.borderWidth); + const border = borderWidth === 0 ? undefined : { + color: borderColor, + width: isNaN(borderWidth) ? undefined : borderWidth, + }; + + // Parse shadow (box-shadow) + const boxShadow = computedStyles.boxShadow; + let shadow = { + offset: undefined as [number, number] | undefined, + color: undefined as string | undefined, + opacity: undefined as number | undefined, + }; + + if (boxShadow && boxShadow !== 'none') { + const shadowParts = boxShadow.split(' '); + if (shadowParts.length >= 4) { + const offsetX = parseFloat(shadowParts[0]); + const offsetY = parseFloat(shadowParts[1]); + shadow = { + offset: (!isNaN(offsetX) && !isNaN(offsetY)) ? [offsetX, offsetY] as [number, number] : undefined, + color: colorToHex(shadowParts[3]), + opacity: 1, + }; + } + } + + // Parse font + const fontSize = parseFloat(computedStyles.fontSize); + const fontWeight = parseInt(computedStyles.fontWeight); + const fontColor = colorToHex(computedStyles.color); + const font = { + size: isNaN(fontSize) ? undefined : fontSize, + weight: isNaN(fontWeight) ? undefined : fontWeight, + color: fontColor, + }; + + // Parse margin + const marginTop = parseFloat(computedStyles.marginTop); + const marginBottom = parseFloat(computedStyles.marginBottom); + const marginLeft = parseFloat(computedStyles.marginLeft); + const marginRight = parseFloat(computedStyles.marginRight); + const margin = { + top: isNaN(marginTop) ? undefined : marginTop, + bottom: isNaN(marginBottom) ? undefined : marginBottom, + left: isNaN(marginLeft) ? undefined : marginLeft, + right: isNaN(marginRight) ? undefined : marginRight, + }; + + // Parse padding + const paddingTop = parseFloat(computedStyles.paddingTop); + const paddingBottom = parseFloat(computedStyles.paddingBottom); + const paddingLeft = parseFloat(computedStyles.paddingLeft); + const paddingRight = parseFloat(computedStyles.paddingRight); + const padding = { + top: isNaN(paddingTop) ? undefined : paddingTop, + bottom: isNaN(paddingBottom) ? undefined : paddingBottom, + left: isNaN(paddingLeft) ? undefined : paddingLeft, + right: isNaN(paddingRight) ? undefined : paddingRight, + }; + + return { + tagName: el.tagName.toLowerCase(), + id: el.id || undefined, + className: el.className || undefined, + innerText: el.textContent || undefined, + background, + border, + shadow, + font, + position, + margin, + padding, + }; + }); + return attributes; +} + +async function getAllChildElementsAttributes(element: ElementHandle): Promise { + // Get the root element's bounding rect for relative positioning + const rootRect = await element.evaluate((el) => el.getBoundingClientRect()); + + // Get all child elements as ElementHandles + const childElementHandles = await element.$$(':scope *'); + + // Get attributes for each child element using getElementAttributes + const attributesPromises = childElementHandles.map(async (childElementHandle) => { + const attributes = await getElementAttributes(childElementHandle); + + // Convert positions to relative positions + if (attributes.position && attributes.position.left !== undefined && attributes.position.top !== undefined) { + attributes.position = { + left: attributes.position.left - rootRect.left, + top: attributes.position.top - rootRect.top, + width: attributes.position.width, + height: attributes.position.height, + }; + } + + return attributes; + }); + + return Promise.all(attributesPromises); } \ No newline at end of file diff --git a/servers/nextjs/models/errors.ts b/servers/nextjs/models/errors.ts new file mode 100644 index 00000000..720890dc --- /dev/null +++ b/servers/nextjs/models/errors.ts @@ -0,0 +1,7 @@ +export class ApiError { + detail: string; + + constructor(detail: string) { + this.detail = detail; + } +} \ No newline at end of file diff --git a/servers/nextjs/types/element_attibutes.ts b/servers/nextjs/types/element_attibutes.ts new file mode 100644 index 00000000..5913e730 --- /dev/null +++ b/servers/nextjs/types/element_attibutes.ts @@ -0,0 +1,42 @@ +export interface ElementAttributes { + tagName: string; + id?: string; + className?: string; + innerText?: string; + background?: { + color?: string; + opacity?: number; + }; + border?: { + color?: string; + width?: number; + }; + shadow?: { + offset?: [number, number]; + color?: string; + opacity?: number; + }, + font?: { + size?: number; + weight?: number; + color?: string; + }; + position?: { + left?: number; + top?: number; + width?: number; + height?: number; + }; + margin?: { + top?: number; + bottom?: number; + left?: number; + right?: number; + }; + padding?: { + top?: number; + bottom?: number; + left?: number; + right?: number; + }; +} \ No newline at end of file diff --git a/servers/nextjs/types/pptx_models.ts b/servers/nextjs/types/pptx_models.ts new file mode 100644 index 00000000..83bd9ee3 --- /dev/null +++ b/servers/nextjs/types/pptx_models.ts @@ -0,0 +1,150 @@ +export enum PptxBoxShapeEnum { + RECTANGLE = "rectangle", + CIRCLE = "circle" +} + +export enum PptxObjectFitEnum { + CONTAIN = "contain", + COVER = "cover", + FILL = "fill" +} + +export interface PptxSpacingModel { + top?: number; + bottom?: number; + left?: number; + right?: number; +} + +export interface PptxPositionModel { + left?: number; + top?: number; + width?: number; + height?: number; +} + +export interface PptxFontModel { + name?: string; + size?: number; + bold?: boolean; + italic?: boolean; + color?: string; +} + +export interface PptxFillModel { + color: string; +} + +export interface PptxStrokeModel { + color: string; + thickness: number; +} + +export interface PptxShadowModel { + radius: number; + offset?: number; + color?: string; + opacity?: number; + angle?: number; +} + +export interface PptxTextRunModel { + text: string; + font?: PptxFontModel; +} + +export interface PptxParagraphModel { + spacing?: PptxSpacingModel; + alignment?: any; + font?: PptxFontModel; + text?: string; + text_runs?: PptxTextRunModel[]; +} + +export interface PptxObjectFitModel { + fit?: PptxObjectFitEnum; + focus?: [number | null, number | null]; +} + +export interface PptxPictureModel { + is_network: boolean; + path: string; +} + +export interface PptxShapeModel { +} + +export interface PptxTextBoxModel extends PptxShapeModel { + margin?: PptxSpacingModel; + fill?: PptxFillModel; + position: PptxPositionModel; + text_wrap?: boolean; + paragraphs: PptxParagraphModel[]; +} + +export interface PptxAutoShapeBoxModel extends PptxShapeModel { + type?: any; + margin?: PptxSpacingModel; + fill?: PptxFillModel; + stroke?: PptxStrokeModel; + shadow?: PptxShadowModel; + position: PptxPositionModel; + text_wrap?: boolean; + border_radius?: number; + paragraphs?: PptxParagraphModel[]; +} + +export interface PptxPictureBoxModel extends PptxShapeModel { + position: PptxPositionModel; + margin?: PptxSpacingModel; + clip?: boolean; + overlay?: string; + border_radius?: number[]; + shape?: PptxBoxShapeEnum; + object_fit?: PptxObjectFitModel; + picture: PptxPictureModel; +} + +export interface PptxConnectorModel extends PptxShapeModel { + type?: any; + position: PptxPositionModel; + thickness?: number; + color?: string; +} + +export interface PptxSlideModel { + shapes: (PptxTextBoxModel | PptxAutoShapeBoxModel | PptxConnectorModel | PptxPictureBoxModel)[]; +} + +export interface PptxPresentationModel { + background_color: string; + shapes?: PptxShapeModel[]; + slides: PptxSlideModel[]; +} + +export const createPptxSpacingAll = (num: number): PptxSpacingModel => ({ + top: num, + left: num, + bottom: num, + right: num +}); + +export const createPptxPositionForTextbox = (left: number, top: number, width: number): PptxPositionModel => ({ + left, + top, + width, + height: 100 +}); + +export const positionToPtList = (position: PptxPositionModel): number[] => { + return [position.left || 0, position.top || 0, position.width || 0, position.height || 0]; +}; + +export const positionToPtXyxy = (position: PptxPositionModel): number[] => { + const left = position.left || 0; + const top = position.top || 0; + const width = position.width || 0; + const height = position.height || 0; + + return [left, top, left + width, top + height]; +}; diff --git a/servers/nextjs/utils/error_helpers.ts b/servers/nextjs/utils/error_helpers.ts new file mode 100644 index 00000000..98e837a2 --- /dev/null +++ b/servers/nextjs/utils/error_helpers.ts @@ -0,0 +1,12 @@ +import { ApiError } from "@/models/errors"; + +export function wrap_errors(func: any) { + try { + return func(); + } catch (error: any) { + if (error instanceof ApiError) { + throw error; + } + throw new ApiError(`Internal server error: ${error.message}`); + } +} \ No newline at end of file