ppt-tool/backend/services/native_chart_service.py
Vadym Samoilenko a2bd4cfefa Phase 3: Content Pipeline — file parsing, content intelligence, slide mapping, native charts
- Step 10: Extended file upload for Excel/CSV/images/URLs (openpyxl, trafilatura)
- Step 11: Content intelligence service with rule-based + LLM classification
- Step 12: Slide mapping engine mapping content blocks to master deck layouts
- Step 13: Chart data extractor, native PPTX chart service (bar/line/pie/gantt/waterfall), ChartDataEditor skeleton

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 15:54:04 +00:00

327 lines
12 KiB
Python

"""Native Chart Service: render ChartData as native python-pptx charts on slides."""
from typing import List, Optional
from pptx.chart.data import CategoryChartData
from pptx.dml.color import RGBColor
from pptx.enum.chart import XL_CHART_TYPE, XL_LEGEND_POSITION, XL_LABEL_POSITION
from pptx.oxml.xmlchemy import OxmlElement
from pptx.slide import Slide
from pptx.util import Emu, Pt
from services.chart_data_extractor import ChartData
# Map our chart_type strings to python-pptx chart type enums
_CHART_TYPE_MAP = {
"bar": XL_CHART_TYPE.BAR_CLUSTERED,
"column": XL_CHART_TYPE.COLUMN_CLUSTERED,
"line": XL_CHART_TYPE.LINE_MARKERS,
"pie": XL_CHART_TYPE.PIE,
"doughnut": XL_CHART_TYPE.DOUGHNUT,
"area": XL_CHART_TYPE.AREA,
"scatter": XL_CHART_TYPE.XY_SCATTER,
}
# Default brand-neutral colors for chart series
_DEFAULT_COLORS = [
"4472C4", "ED7D31", "A5A5A5", "FFC000", "5B9BD5",
"70AD47", "264478", "9B57A0", "636363", "EB6E1F",
]
class NativeChartService:
def add_chart(
self,
slide: Slide,
chart_data: ChartData,
left: int,
top: int,
width: int,
height: int,
brand_colors: Optional[List[str]] = None,
font_name: Optional[str] = None,
) -> None:
"""Add a native chart to a slide.
For standard chart types (bar, column, line, pie, doughnut, area, scatter)
uses python-pptx's add_chart API. For gantt and waterfall, falls back
to shape-based rendering.
Args:
slide: The pptx Slide object
chart_data: ChartData with type, categories, series
left, top, width, height: Position/size in Pt units (raw int, will be wrapped)
brand_colors: List of hex color strings (e.g. ["4472C4", "ED7D31"])
font_name: Font family name for labels
"""
if chart_data.chart_type == "gantt":
self._add_gantt_chart(slide, chart_data, left, top, width, height, brand_colors, font_name)
return
if chart_data.chart_type == "waterfall":
self._add_waterfall_chart(slide, chart_data, left, top, width, height, brand_colors, font_name)
return
xl_chart_type = _CHART_TYPE_MAP.get(chart_data.chart_type, XL_CHART_TYPE.COLUMN_CLUSTERED)
colors = brand_colors or _DEFAULT_COLORS
# Build chart data object
pptx_data = CategoryChartData()
pptx_data.categories = chart_data.categories
for series in chart_data.series:
pptx_data.add_series(series.name, series.values)
# Add chart to slide
chart_frame = slide.shapes.add_chart(
xl_chart_type,
Pt(left), Pt(top), Pt(width), Pt(height),
pptx_data,
)
chart = chart_frame.chart
# Style the chart
chart.has_legend = len(chart_data.series) > 1
if chart.has_legend:
chart.legend.position = XL_LEGEND_POSITION.BOTTOM
chart.legend.include_in_layout = False
if font_name:
chart.legend.font.name = font_name
chart.legend.font.size = Pt(9)
# Apply title
chart.has_title = True
chart.chart_title.text_frame.text = chart_data.title
if font_name:
chart.chart_title.text_frame.paragraphs[0].font.name = font_name
chart.chart_title.text_frame.paragraphs[0].font.size = Pt(12)
chart.chart_title.text_frame.paragraphs[0].font.bold = True
# Apply brand colors to series
self._apply_series_colors(chart, colors)
# Style axes
if chart_data.chart_type not in ("pie", "doughnut"):
self._style_axes(chart, font_name)
# Add data labels for pie/doughnut
if chart_data.chart_type in ("pie", "doughnut"):
self._add_pie_labels(chart, font_name)
def _apply_series_colors(self, chart, colors: List[str]) -> None:
"""Apply brand colors to each series in the chart."""
plot = chart.plots[0]
for i, series in enumerate(plot.series):
color_hex = colors[i % len(colors)]
series.format.fill.solid()
series.format.fill.fore_color.rgb = RGBColor.from_string(color_hex)
# For line charts, also color the line
if hasattr(series, 'smooth'):
series.format.line.color.rgb = RGBColor.from_string(color_hex)
def _style_axes(self, chart, font_name: Optional[str]) -> None:
"""Style category and value axes."""
try:
category_axis = chart.category_axis
category_axis.has_minor_gridlines = False
if font_name:
category_axis.tick_labels.font.name = font_name
category_axis.tick_labels.font.size = Pt(8)
value_axis = chart.value_axis
value_axis.has_minor_gridlines = False
if font_name:
value_axis.tick_labels.font.name = font_name
value_axis.tick_labels.font.size = Pt(8)
except Exception:
pass # Some chart types don't have axes
def _add_pie_labels(self, chart, font_name: Optional[str]) -> None:
"""Add percentage labels to pie/doughnut charts."""
plot = chart.plots[0]
plot.has_data_labels = True
data_labels = plot.data_labels
data_labels.show_percentage = True
data_labels.show_category_name = False
data_labels.show_value = False
data_labels.number_format = '0%'
if font_name:
data_labels.font.name = font_name
data_labels.font.size = Pt(9)
# --- Shape-based charts (Gantt, Waterfall) ---
def _add_gantt_chart(
self,
slide: Slide,
chart_data: ChartData,
left: int, top: int, width: int, height: int,
brand_colors: Optional[List[str]] = None,
font_name: Optional[str] = None,
) -> None:
"""Render a Gantt chart using rectangles.
Expects categories = task names, series[0] = start values, series[1] = duration values.
Values are numeric (e.g. week numbers or day offsets).
"""
colors = brand_colors or _DEFAULT_COLORS
if len(chart_data.series) < 2:
return
starts = chart_data.series[0].values
durations = chart_data.series[1].values
tasks = chart_data.categories
n_tasks = len(tasks)
if n_tasks == 0:
return
# Calculate bounds
max_end = max(s + d for s, d in zip(starts, durations)) if starts else 1
min_start = min(starts) if starts else 0
chart_left = left + 120 # leave room for labels
chart_width = width - 130
bar_height_total = height - 40 # leave room for title
bar_h = max(bar_height_total // n_tasks - 4, 10)
# Title
title_box = slide.shapes.add_textbox(Pt(left), Pt(top), Pt(width), Pt(24))
tf = title_box.text_frame
tf.text = chart_data.title
if font_name:
tf.paragraphs[0].font.name = font_name
tf.paragraphs[0].font.size = Pt(12)
tf.paragraphs[0].font.bold = True
# Draw task bars
range_span = max_end - min_start or 1
for i, (task, start, dur) in enumerate(zip(tasks, starts, durations)):
y = top + 30 + i * (bar_h + 4)
# Task label
label = slide.shapes.add_textbox(Pt(left), Pt(y), Pt(115), Pt(bar_h))
label.text_frame.word_wrap = True
label.text_frame.text = task
if font_name:
label.text_frame.paragraphs[0].font.name = font_name
label.text_frame.paragraphs[0].font.size = Pt(8)
# Bar
bar_x = chart_left + int((start - min_start) / range_span * chart_width)
bar_w = max(int(dur / range_span * chart_width), 6)
from pptx.enum.shapes import MSO_SHAPE
bar = slide.shapes.add_shape(
MSO_SHAPE.ROUNDED_RECTANGLE,
Pt(bar_x), Pt(y), Pt(bar_w), Pt(bar_h),
)
bar.fill.solid()
color = colors[i % len(colors)]
bar.fill.fore_color.rgb = RGBColor.from_string(color)
bar.line.fill.background() # no border
def _add_waterfall_chart(
self,
slide: Slide,
chart_data: ChartData,
left: int, top: int, width: int, height: int,
brand_colors: Optional[List[str]] = None,
font_name: Optional[str] = None,
) -> None:
"""Render a waterfall chart using stacked shapes.
series[0].values = incremental changes (positive or negative).
The last category is treated as the total.
"""
colors = brand_colors or _DEFAULT_COLORS
if not chart_data.series:
return
values = chart_data.series[0].values
cats = chart_data.categories
n = len(values)
if n == 0:
return
# Compute cumulative
cumulative = []
running = 0
for v in values:
cumulative.append(running)
running += v
all_levels = cumulative + [running]
max_val = max(max(all_levels), max(abs(v) for v in values), 1)
min_val = min(min(all_levels), 0)
val_range = max_val - min_val or 1
chart_area_top = top + 30
chart_area_height = height - 60
chart_area_left = left + 10
chart_area_width = width - 20
bar_width = max(chart_area_width // n - 8, 12)
# Title
title_box = slide.shapes.add_textbox(Pt(left), Pt(top), Pt(width), Pt(24))
tf = title_box.text_frame
tf.text = chart_data.title
if font_name:
tf.paragraphs[0].font.name = font_name
tf.paragraphs[0].font.size = Pt(12)
tf.paragraphs[0].font.bold = True
from pptx.enum.shapes import MSO_SHAPE
positive_color = colors[0] if colors else "4472C4"
negative_color = colors[1] if len(colors) > 1 else "ED7D31"
total_color = colors[2] if len(colors) > 2 else "A5A5A5"
for i in range(n):
x = chart_area_left + i * (bar_width + 8)
val = values[i]
base = cumulative[i]
is_last = i == n - 1
if is_last:
# Total bar from 0 to cumulative total
total = base + val
bar_bottom = 0
bar_val = total
color = total_color
else:
if val >= 0:
bar_bottom = base
bar_val = val
color = positive_color
else:
bar_bottom = base + val
bar_val = abs(val)
color = negative_color
# Convert to pixel positions
bar_top_y = chart_area_top + int((max_val - bar_bottom - bar_val) / val_range * chart_area_height)
bar_h = max(int(bar_val / val_range * chart_area_height), 4)
bar = slide.shapes.add_shape(
MSO_SHAPE.RECTANGLE,
Pt(x), Pt(bar_top_y), Pt(bar_width), Pt(bar_h),
)
bar.fill.solid()
bar.fill.fore_color.rgb = RGBColor.from_string(color)
bar.line.fill.background()
# Category label below
lbl = slide.shapes.add_textbox(
Pt(x - 4), Pt(chart_area_top + chart_area_height + 2),
Pt(bar_width + 8), Pt(20),
)
lbl.text_frame.word_wrap = True
lbl.text_frame.text = cats[i] if i < len(cats) else ""
if font_name:
lbl.text_frame.paragraphs[0].font.name = font_name
lbl.text_frame.paragraphs[0].font.size = Pt(7)