Merge pull request #96 from presenton/feat/presentation_export

feat(fastapi): adds slide element attributes to pptx_model and improves element attributes scraping
This commit is contained in:
Saurav Niraula 2025-07-19 12:45:23 +05:45 committed by GitHub
commit 73782f3594
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 503 additions and 85 deletions

View file

@ -144,6 +144,7 @@ class PptxConnectorModel(PptxShapeModel):
class PptxSlideModel(BaseModel):
background: Optional[PptxFillModel] = None
shapes: List[
PptxTextBoxModel
| PptxAutoShapeBoxModel

View file

@ -108,6 +108,9 @@ class PptxPresentationCreator:
def add_and_populate_slide(self, slide_model: PptxSlideModel):
slide = self._ppt.slides.add_slide(self._ppt.slide_layouts[BLANK_SLIDE_LAYOUT])
if slide_model.background:
self.apply_fill_to_shape(slide.background, slide_model.background)
for shape_model in slide_model.shapes:
model_type = type(shape_model)

View file

@ -11,29 +11,7 @@ from utils.llm_provider import (
is_google_selected,
)
# system_prompt = """
# Create a presentation based on the provided prompt, number of slides, output language, and additional informational details.
# Format the output in the specified JSON schema with structured markdown content.
# # Steps
# 1. Identify key points from the provided prompt, including the topic, number of slides, output language, and additional content directions.
# 2. Create a concise and descriptive title reflecting the main topic, adhering to the specified language.
# 3. Generate a clear title for each slide.
# 4. Develop comprehensive content using markdown structure:
# * Use bullet points (- or *) for lists.
# * Use **bold** for emphasis, *italic* for secondary emphasis, and `code` for technical terms.
# 5. Provide important points from prompt as notes.
# # Notes
# - Content must be generated for every slide.
# - Images or Icons information provided in **Input** must be included in the **notes**.
# - Notes should cleary define if it is for specific slide or for the presentation.
# - Slide **body** should not contain slide **title**.
# - Slide **title** should not contain "Slide 1", "Slide 2", etc.
# - Slide **title** should not be in markdown format.
# - There must be exact **Number of Slides** as specified.
# """
system_prompt = """
You are an expert presentation creator. Generate structured presentations based on user requirements and format them according to the specified JSON schema with markdown content.
@ -183,13 +161,7 @@ async def generate_ppt_outline(
async with client.beta.chat.completions.stream(
model=model,
messages=get_prompt_template(prompt, n_slides, language, content),
response_format={
"type": "json_schema",
"json_schema": {
"name": "PresentationOutline",
"schema": response_model.model_json_schema(),
},
},
response_format=response_model,
) as stream:
async for event in stream:
if isinstance(event, ContentDeltaEvent):

View file

@ -1,7 +1,9 @@
import { ApiError } from "@/models/errors";
import { NextRequest, NextResponse } from "next/server";
import puppeteer, { ElementHandle } from "puppeteer";
import { ElementAttributes } from "@/types/element_attibutes";
import { ElementAttributes, SlideAttributesResult } from "@/types/element_attibutes";
import { convertElementAttributesToPptxSlides } from "@/utils/pptx_models_utils";
import { PptxPresentationModel } from "@/types/pptx_models";
export async function GET(request: NextRequest) {
@ -9,14 +11,12 @@ export async function GET(request: NextRequest) {
try {
const id = await getPresentationId(request);
const slides = await getSlides(id);
const slide = slides[0];
const attributes = await getAllChildElementsAttributes(slide);
console.log(attributes);
// Temporary
return NextResponse.json({
attributes: attributes,
});
const slides_attributes = await getSlidesAttributes(slides);
const slides_pptx_models = convertElementAttributesToPptxSlides(slides_attributes.elements, slides_attributes.backgroundColors);
const presentation_pptx_model: PptxPresentationModel = {
slides: slides_pptx_models,
};
return NextResponse.json(presentation_pptx_model);
} catch (error: any) {
console.error(error);
if (error instanceof ApiError) {
@ -34,6 +34,38 @@ async function getPresentationId(request: NextRequest) {
return id;
}
async function getSlidesAttributes(slides: ElementHandle<Element>[]) {
const slideResults = await Promise.all(slides.map(async (slide) => {
return await getAllChildElementsAttributes(slide);
}));
// Extract elements and background colors from each slide result
const elements = slideResults.map(result => result.elements);
const backgroundColors = slideResults.map(result => result.backgroundColor);
return {
elements,
backgroundColors
};
}
async function getSlides(id: string) {
const slides_wrapper = await getSlidesWrapper(id);
const slides = await slides_wrapper.$$(":scope > div > div");
return slides;
}
async function getSlidesWrapper(id: string): Promise<ElementHandle<Element>> {
const page = await getPresentationPage(id);
const slides_wrapper = await page.$("#presentation-slides-wrapper");
if (!slides_wrapper) {
throw new ApiError("Presentation slides not found");
}
return slides_wrapper;
}
async function getPresentationPage(id: string) {
const browser = await puppeteer.launch({
headless: true,
@ -48,20 +80,111 @@ async function getPresentationPage(id: string) {
return page;
}
async function getSlidesWrapper(id: string): Promise<ElementHandle<Element>> {
const page = await getPresentationPage(id);
const slides_wrapper = await page.$("#presentation-slides-wrapper");
if (!slides_wrapper) {
throw new ApiError("Presentation slides not found");
async function getAllChildElementsAttributes(element: ElementHandle<Element>): Promise<SlideAttributesResult> {
// Get the root element's bounding rect for relative positioning
const rootRect = await element.evaluate((el) => {
const rect = el.getBoundingClientRect();
return {
left: isFinite(rect.left) ? rect.left : 0,
top: isFinite(rect.top) ? rect.top : 0,
width: isFinite(rect.width) ? rect.width : 0,
height: isFinite(rect.height) ? rect.height : 0,
};
});
// Get all child elements as ElementHandles
const childElementHandles = await element.$$(':scope *');
// Get attributes and depth for each child element
const attributesPromises = childElementHandles.map(async (childElementHandle) => {
const attributes = await getElementAttributes(childElementHandle);
// Calculate the depth of the element in the DOM tree
const depth = await childElementHandle.evaluate((el) => {
let depth = 0;
let current = el;
while (current.parentElement) {
depth++;
current = current.parentElement;
}
return depth;
});
// Convert positions to relative positions
if (attributes.position && attributes.position.left !== undefined && attributes.position.top !== undefined) {
attributes.position = {
left: attributes.position.left - rootRect.left,
top: attributes.position.top - rootRect.top,
width: attributes.position.width,
height: attributes.position.height,
};
}
return { attributes, depth };
});
const allResults = await Promise.all(attributesPromises);
// Extract background color from elements whose position is the same as root element
let backgroundColor: string | undefined;
const elementsWithRootPosition = allResults.filter(({ attributes }) => {
return attributes.position &&
attributes.position.left === 0 &&
attributes.position.top === 0 &&
attributes.position.width === rootRect.width &&
attributes.position.height === rootRect.height;
});
// Get the background color from the first element with root position that has a background
for (const { attributes } of elementsWithRootPosition) {
if (attributes.background && attributes.background.color) {
backgroundColor = attributes.background.color;
break;
}
}
return slides_wrapper;
// Filter out elements with no meaningful styling and elements with same position as root
const filteredResults = allResults.filter(({ attributes }) => {
// Check if element has any meaningful styling or content
const hasBackground = attributes.background && attributes.background.color;
const hasBorder = attributes.border && attributes.border.color;
const hasShadow = attributes.shadow && attributes.shadow.color;
const hasText = attributes.innerText && attributes.innerText.trim().length > 0;
// Check if element position is the same as root (exclude these elements)
const isRootPosition = attributes.position &&
attributes.position.left === 0 &&
attributes.position.top === 0 &&
attributes.position.width === rootRect.width &&
attributes.position.height === rootRect.height;
// Return true if element has at least one of these properties AND is not at root position
return (hasBackground || hasBorder || hasShadow || hasText) && !isRootPosition;
});
// Sort elements by z-index first, then by depth if z-index is not provided
const sortedElements = filteredResults
.sort((a, b) => {
const zIndexA = a.attributes.zIndex || 0;
const zIndexB = b.attributes.zIndex || 0;
// If both elements have the same z-index (including 0), sort by depth
if (zIndexA === zIndexB) {
return b.depth - a.depth; // Higher depth first (children before parents)
}
// Otherwise sort by z-index (higher z-index first, as elements below come first)
return zIndexB - zIndexA;
})
.map(({ attributes }) => attributes); // Extract just the attributes
return {
elements: sortedElements,
backgroundColor
};
}
async function getSlides(id: string) {
const slides_wrapper = await getSlidesWrapper(id);
const slides = await slides_wrapper.$$(":scope > div > div");
return slides;
}
async function getElementAttributes(element: ElementHandle<Element>): Promise<ElementAttributes> {
const attributes = await element.evaluate((el) => {
@ -80,15 +203,28 @@ async function getElementAttributes(element: ElementHandle<Element>): Promise<El
return ctx.fillStyle;
}
// Helper function to check if element has only text nodes as direct children
function hasOnlyTextNodes(el: Element): boolean {
const children = el.childNodes;
for (let i = 0; i < children.length; i++) {
const child = children[i];
// If any child is an element node (not a text node), return false
if (child.nodeType === Node.ELEMENT_NODE) {
return false;
}
}
return true;
}
const computedStyles = window.getComputedStyle(el);
// Parse position and dimensions
const rect = el.getBoundingClientRect();
const position = {
left: rect.left,
top: rect.top,
width: rect.width,
height: rect.height,
left: isFinite(rect.left) ? rect.left : 0,
top: isFinite(rect.top) ? rect.top : 0,
width: isFinite(rect.width) ? rect.width : 0,
height: isFinite(rect.height) ? rect.height : 0,
};
// Parse background
@ -113,6 +249,8 @@ async function getElementAttributes(element: ElementHandle<Element>): Promise<El
offset: undefined as [number, number] | undefined,
color: undefined as string | undefined,
opacity: undefined as number | undefined,
radius: undefined as number | undefined,
angle: undefined as number | undefined,
};
if (boxShadow && boxShadow !== 'none') {
@ -120,10 +258,13 @@ async function getElementAttributes(element: ElementHandle<Element>): Promise<El
if (shadowParts.length >= 4) {
const offsetX = parseFloat(shadowParts[0]);
const offsetY = parseFloat(shadowParts[1]);
const blurRadius = parseFloat(shadowParts[2]);
shadow = {
offset: (!isNaN(offsetX) && !isNaN(offsetY)) ? [offsetX, offsetY] as [number, number] : undefined,
color: colorToHex(shadowParts[3]),
opacity: 1,
radius: !isNaN(blurRadius) ? blurRadius : undefined,
angle: !isNaN(offsetX) && !isNaN(offsetY) ? Math.atan2(offsetY, offsetX) * (180 / Math.PI) : undefined,
};
}
}
@ -132,10 +273,22 @@ async function getElementAttributes(element: ElementHandle<Element>): Promise<El
const fontSize = parseFloat(computedStyles.fontSize);
const fontWeight = parseInt(computedStyles.fontWeight);
const fontColor = colorToHex(computedStyles.color);
const fontFamily = computedStyles.fontFamily;
const fontStyle = computedStyles.fontStyle;
// Extract only the first font from font-family (e.g., "Hack, sans-serif" -> "Hack")
let fontName = undefined;
if (fontFamily !== 'initial') {
const firstFont = fontFamily.split(',')[0].trim().replace(/['"]/g, '');
fontName = firstFont;
}
const font = {
name: fontName,
size: isNaN(fontSize) ? undefined : fontSize,
weight: isNaN(fontWeight) ? undefined : fontWeight,
color: fontColor,
italic: fontStyle === 'italic',
};
// Parse margin
@ -143,30 +296,73 @@ async function getElementAttributes(element: ElementHandle<Element>): Promise<El
const marginBottom = parseFloat(computedStyles.marginBottom);
const marginLeft = parseFloat(computedStyles.marginLeft);
const marginRight = parseFloat(computedStyles.marginRight);
const margin = {
const marginObj = {
top: isNaN(marginTop) ? undefined : marginTop,
bottom: isNaN(marginBottom) ? undefined : marginBottom,
left: isNaN(marginLeft) ? undefined : marginLeft,
right: isNaN(marginRight) ? undefined : marginRight,
};
// Set margin as undefined if all fields are 0
const margin = (marginObj.top === 0 && marginObj.bottom === 0 && marginObj.left === 0 && marginObj.right === 0)
? undefined
: marginObj;
// Parse padding
const paddingTop = parseFloat(computedStyles.paddingTop);
const paddingBottom = parseFloat(computedStyles.paddingBottom);
const paddingLeft = parseFloat(computedStyles.paddingLeft);
const paddingRight = parseFloat(computedStyles.paddingRight);
const padding = {
const paddingObj = {
top: isNaN(paddingTop) ? undefined : paddingTop,
bottom: isNaN(paddingBottom) ? undefined : paddingBottom,
left: isNaN(paddingLeft) ? undefined : paddingLeft,
right: isNaN(paddingRight) ? undefined : paddingRight,
};
// Set padding as undefined if all fields are 0
const padding = (paddingObj.top === 0 && paddingObj.bottom === 0 && paddingObj.left === 0 && paddingObj.right === 0)
? undefined
: paddingObj;
// Only include innerText if the element has only text nodes as direct children
const innerText = hasOnlyTextNodes(el) ? (el.textContent || undefined) : undefined;
// Parse z-index
const zIndex = parseInt(computedStyles.zIndex);
const zIndexValue = isNaN(zIndex) ? 0 : zIndex;
// Parse additional attributes
const textAlign = computedStyles.textAlign as 'left' | 'center' | 'right' | 'justify';
const borderRadius = computedStyles.borderRadius;
const objectFit = computedStyles.objectFit as 'contain' | 'cover' | 'fill' | undefined;
const imageSrc = (el as HTMLImageElement).src;
// Parse border radius
let borderRadiusValue: number | number[] | undefined;
if (borderRadius && borderRadius !== '0px') {
const radiusParts = borderRadius.split(' ').map(part => parseFloat(part));
if (radiusParts.length === 1) {
borderRadiusValue = radiusParts[0];
} else if (radiusParts.length === 4) {
borderRadiusValue = radiusParts;
}
}
// Determine shape for images
let shape: 'rectangle' | 'circle' | undefined;
if (el.tagName.toLowerCase() === 'img') {
shape = borderRadiusValue === 50 ? 'circle' : 'rectangle';
}
// Check for text wrap
const textWrap = computedStyles.whiteSpace !== 'nowrap';
return {
tagName: el.tagName.toLowerCase(),
id: el.id || undefined,
className: el.className || undefined,
innerText: el.textContent || undefined,
innerText,
background,
border,
shadow,
@ -174,34 +370,17 @@ async function getElementAttributes(element: ElementHandle<Element>): Promise<El
position,
margin,
padding,
zIndex: zIndexValue,
textAlign: textAlign !== 'left' ? textAlign : undefined,
borderRadius: borderRadiusValue,
imageSrc: imageSrc || undefined,
objectFit,
clip: false, // Default value
overlay: undefined,
shape,
connectorType: undefined,
textWrap,
};
});
return attributes;
}
async function getAllChildElementsAttributes(element: ElementHandle<Element>): Promise<ElementAttributes[]> {
// Get the root element's bounding rect for relative positioning
const rootRect = await element.evaluate((el) => el.getBoundingClientRect());
// Get all child elements as ElementHandles
const childElementHandles = await element.$$(':scope *');
// Get attributes for each child element using getElementAttributes
const attributesPromises = childElementHandles.map(async (childElementHandle) => {
const attributes = await getElementAttributes(childElementHandle);
// Convert positions to relative positions
if (attributes.position && attributes.position.left !== undefined && attributes.position.top !== undefined) {
attributes.position = {
left: attributes.position.left - rootRect.left,
top: attributes.position.top - rootRect.top,
width: attributes.position.width,
height: attributes.position.height,
};
}
return attributes;
});
return Promise.all(attributesPromises);
}

View file

@ -15,11 +15,15 @@ export interface ElementAttributes {
offset?: [number, number];
color?: string;
opacity?: number;
radius?: number;
angle?: number;
},
font?: {
name?: string;
size?: number;
weight?: number;
color?: string;
italic?: boolean;
};
position?: {
left?: number;
@ -39,4 +43,19 @@ export interface ElementAttributes {
left?: number;
right?: number;
};
zIndex?: number;
textAlign?: 'left' | 'center' | 'right' | 'justify';
borderRadius?: number | number[];
imageSrc?: string;
objectFit?: 'contain' | 'cover' | 'fill';
clip?: boolean;
overlay?: string;
shape?: 'rectangle' | 'circle';
connectorType?: string;
textWrap?: boolean;
}
export interface SlideAttributesResult {
elements: ElementAttributes[];
backgroundColor?: string;
}

View file

@ -112,12 +112,13 @@ export interface PptxConnectorModel extends PptxShapeModel {
color?: string;
}
export interface PptxSlideModel {
background?: PptxFillModel;
shapes: (PptxTextBoxModel | PptxAutoShapeBoxModel | PptxConnectorModel | PptxPictureBoxModel)[];
}
export interface PptxPresentationModel {
background_color: string;
shapes?: PptxShapeModel[];
slides: PptxSlideModel[];
}
@ -145,6 +146,6 @@ export const positionToPtXyxy = (position: PptxPositionModel): number[] => {
const top = position.top || 0;
const width = position.width || 0;
const height = position.height || 0;
return [left, top, left + width, top + height];
};

View file

@ -0,0 +1,243 @@
import { ElementAttributes } from "@/types/element_attibutes";
import {
PptxSlideModel,
PptxTextBoxModel,
PptxAutoShapeBoxModel,
PptxPictureBoxModel,
PptxConnectorModel,
PptxPositionModel,
PptxSpacingModel,
PptxFillModel,
PptxStrokeModel,
PptxShadowModel,
PptxFontModel,
PptxParagraphModel,
PptxPictureModel,
PptxObjectFitModel,
PptxBoxShapeEnum,
PptxObjectFitEnum
} from "@/types/pptx_models";
/**
* Converts ElementAttributes[][] to PptxSlideModel[]
* Each inner array represents elements on a slide
*/
export function convertElementAttributesToPptxSlides(
slidesAttributes: ElementAttributes[][],
backgroundColors?: (string | undefined)[]
): PptxSlideModel[] {
return slidesAttributes.map((slideElements, index) => {
const shapes = slideElements.map(element => {
return convertElementToPptxShape(element);
}).filter(Boolean); // Remove any null/undefined shapes
const slide: PptxSlideModel = {
shapes: shapes as (PptxTextBoxModel | PptxAutoShapeBoxModel | PptxConnectorModel | PptxPictureBoxModel)[]
};
// Add background color if available
if (backgroundColors && backgroundColors[index]) {
slide.background = {
color: backgroundColors[index]
};
}
return slide;
});
}
/**
* Converts a single ElementAttributes to the appropriate PPTX shape model
*/
function convertElementToPptxShape(
element: ElementAttributes
): PptxTextBoxModel | PptxAutoShapeBoxModel | PptxConnectorModel | PptxPictureBoxModel | null {
// Skip elements without position
if (!element.position) {
return null;
}
// Check if it's an image element
if (element.tagName === 'img' || element.className?.includes('image')) {
return convertToPictureBox(element);
}
// Check if it's a text element
if (element.innerText && element.innerText.trim().length > 0) {
return convertToTextBox(element);
}
// Check if it's a connector/line element
if (element.tagName === 'hr' || element.className?.includes('connector') || element.className?.includes('line')) {
return convertToConnector(element);
}
// Default to auto shape box for other elements
return convertToAutoShapeBox(element);
}
/**
* Converts element to PptxTextBoxModel
*/
function convertToTextBox(element: ElementAttributes): PptxTextBoxModel {
const position: PptxPositionModel = {
left: element.position?.left,
top: element.position?.top,
width: element.position?.width,
height: element.position?.height
};
const margin: PptxSpacingModel | undefined = element.margin ? {
top: element.margin.top,
bottom: element.margin.bottom,
left: element.margin.left,
right: element.margin.right
} : undefined;
const fill: PptxFillModel | undefined = element.background?.color ? {
color: element.background.color
} : undefined;
const font: PptxFontModel | undefined = element.font ? {
name: element.font.name,
size: element.font.size,
bold: element.font.weight ? element.font.weight >= 600 : undefined,
italic: element.font.italic,
color: element.font.color
} : undefined;
const paragraph: PptxParagraphModel = {
spacing: undefined,
alignment: element.textAlign,
font,
text: element.innerText
};
return {
margin,
fill,
position,
text_wrap: element.textWrap ?? true,
paragraphs: [paragraph]
};
}
/**
* Converts element to PptxAutoShapeBoxModel
*/
function convertToAutoShapeBox(element: ElementAttributes): PptxAutoShapeBoxModel {
const position: PptxPositionModel = {
left: element.position?.left,
top: element.position?.top,
width: element.position?.width,
height: element.position?.height
};
const margin: PptxSpacingModel | undefined = element.margin ? {
top: element.margin.top,
bottom: element.margin.bottom,
left: element.margin.left,
right: element.margin.right
} : undefined;
const fill: PptxFillModel | undefined = element.background?.color ? {
color: element.background.color
} : undefined;
const stroke: PptxStrokeModel | undefined = element.border?.color ? {
color: element.border.color,
thickness: element.border.width || 1
} : undefined;
const shadow: PptxShadowModel | undefined = element.shadow?.color ? {
radius: element.shadow.radius ?? 4,
offset: element.shadow.offset ? Math.sqrt(element.shadow.offset[0] ** 2 + element.shadow.offset[1] ** 2) : undefined,
color: element.shadow.color,
opacity: element.shadow.opacity,
angle: element.shadow.angle
} : undefined;
// Check if element has text content
const paragraphs: PptxParagraphModel[] | undefined = element.innerText ? [{
spacing: undefined,
alignment: element.textAlign,
font: element.font ? {
name: element.font.name,
size: element.font.size,
bold: element.font.weight ? element.font.weight >= 600 : undefined,
italic: element.font.italic,
color: element.font.color
} : undefined,
text: element.innerText
}] : undefined;
return {
margin,
fill,
stroke,
shadow,
position,
text_wrap: element.textWrap ?? true,
border_radius: element.borderRadius ? (Array.isArray(element.borderRadius) ? element.borderRadius[0] : element.borderRadius) : 0,
paragraphs
};
}
/**
* Converts element to PptxPictureBoxModel
*/
function convertToPictureBox(element: ElementAttributes): PptxPictureBoxModel {
const position: PptxPositionModel = {
left: element.position?.left,
top: element.position?.top,
width: element.position?.width,
height: element.position?.height
};
const margin: PptxSpacingModel | undefined = element.margin ? {
top: element.margin.top,
bottom: element.margin.bottom,
left: element.margin.left,
right: element.margin.right
} : undefined;
const objectFit: PptxObjectFitModel = {
fit: element.objectFit ? (element.objectFit as PptxObjectFitEnum) : PptxObjectFitEnum.CONTAIN
};
// Extract image path from element attributes
const picture: PptxPictureModel = {
is_network: element.imageSrc ? element.imageSrc.startsWith('http') : false,
path: element.imageSrc || ''
};
return {
position,
margin,
clip: element.clip ?? false,
overlay: element.overlay,
border_radius: element.borderRadius ? (Array.isArray(element.borderRadius) ? element.borderRadius : [element.borderRadius]) : undefined,
shape: element.shape ? (element.shape as PptxBoxShapeEnum) : PptxBoxShapeEnum.RECTANGLE,
object_fit: objectFit,
picture
};
}
/**
* Converts element to PptxConnectorModel
*/
function convertToConnector(element: ElementAttributes): PptxConnectorModel {
const position: PptxPositionModel = {
left: element.position?.left,
top: element.position?.top,
width: element.position?.width,
height: element.position?.height
};
return {
type: element.connectorType,
position,
thickness: element.border?.width || 1,
color: element.border?.color || element.background?.color || '#000000'
};
}