import xml.etree.ElementTree as ET import json from typing import List, Dict NS = { "a": "http://schemas.openxmlformats.org/drawingml/2006/main", "p": "http://schemas.openxmlformats.org/presentationml/2006/main", "r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships", } # 1 EMU (English Metric Unit) = 1/360000 of a centimeter = 1/914400 of an inch # 1 pixel = 9525 EMU (assuming 96 DPI: 914400 / 96 = 9525) EMU_TO_PX = 9525 def _px(emu_str: str) -> int: if not emu_str: return 0 try: return round(int(emu_str) / EMU_TO_PX) except Exception: return 0 def extract_geometry_from_oxml(xml_content: str) -> List[Dict]: """ Parses OXML slide content and extracts bounding boxes for shapes, pictures, and text. Returns a list of dictionaries with x, y, width, height (in pixels), and type/text content. """ elements = [] try: root = ET.fromstring(xml_content) # Look at both slide and slideLayout wrappers spTree = root.find(".//p:spTree", NS) if spTree is None: return elements # Process standard shapes (mostly text boxes) for sp in spTree.findall("p:sp", NS): element_data = _extract_xfrm(sp) if element_data: nvSpPr = sp.find("p:nvSpPr/p:cNvSpPr", NS) element_data["type"] = "shape" if nvSpPr is not None: element_data["name"] = nvSpPr.get("name", "") # Check for text txBody = sp.find("p:txBody", NS) if txBody is not None: text_parts = [] for t in txBody.findall(".//a:t", NS): if t.text: text_parts.append(t.text) if text_parts: element_data["type"] = "text_box" element_data["text"] = " ".join(text_parts).strip() elements.append(element_data) # Process pictures for pic in spTree.findall("p:pic", NS): element_data = _extract_xfrm(pic) if element_data: element_data["type"] = "picture" nvPicPr = pic.find("p:nvPicPr/p:cNvPicPr", NS) if nvPicPr is not None: element_data["name"] = nvPicPr.get("name", "") element_data["description"] = nvPicPr.get("descr", "") elements.append(element_data) except Exception as e: print(f"Error extracting geometry: {e}") return elements def _extract_xfrm(node) -> Dict: spPr = node.find("p:spPr", NS) if spPr is None: return None xfrm = spPr.find("a:xfrm", NS) if xfrm is None: return None off = xfrm.find("a:off", NS) ext = xfrm.find("a:ext", NS) if off is None or ext is None: return None return { "x": _px(off.get("x")), "y": _px(off.get("y")), "width": _px(ext.get("cx")), "height": _px(ext.get("cy")) } def format_geometry_for_llm(elements: List[Dict]) -> str: """Formats the geometry list into a compact JSON string for the LLM prompt.""" if not elements: return "[]" # Sort primarily by pure Y (top to bottom), then X (left to right) sorted_elements = sorted(elements, key=lambda e: (e.get("y", 0), e.get("x", 0))) return json.dumps(sorted_elements, indent=2)