pimco-charts/app/data/transformer.py
DJP a3a38e85d2 Initial commit: PIMCO chart generator with iterative refinement
AI-powered tool that generates publication-quality SVG charts matching
PIMCO's InDesign style. Upload Excel/CSV data, write a plain-English
brief, then iterate with natural language edits until the chart is
exactly right.

- Claude Opus 4.6 interprets briefs into structured ChartSpec JSON
- Deterministic SVG renderer via drawsvg (no visual hallucinations)
- Roboto/Roboto Condensed fonts base64-embedded in SVG
- FastAPI + HTMX web frontend with live preview
- Conversational refinement: "make lines thicker", "change title", etc.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 16:29:47 -05:00

64 lines
1.8 KiB
Python

"""Data transformation: date parsing, column selection, resampling."""
from __future__ import annotations
import pandas as pd
def prepare_dataframe(df: pd.DataFrame, date_column: str | None = None) -> pd.DataFrame:
"""Prepare a DataFrame for charting: parse dates, ensure numeric columns.
Args:
df: Raw DataFrame
date_column: Optional name of the date column. Auto-detected if None.
Returns:
Cleaned DataFrame with parsed dates and numeric columns.
"""
df = df.copy()
# Auto-detect date column
if date_column is None:
date_column = _detect_date_column(df)
if date_column and date_column in df.columns:
df[date_column] = pd.to_datetime(df[date_column], errors="coerce")
# Drop rows where date is NaT
df = df.dropna(subset=[date_column])
# Sort by date
df = df.sort_values(date_column).reset_index(drop=True)
# Convert numeric-looking columns
for col in df.columns:
if col == date_column:
continue
if df[col].dtype == object:
try:
df[col] = pd.to_numeric(df[col], errors="coerce")
except Exception:
pass
return df
def _detect_date_column(df: pd.DataFrame) -> str | None:
"""Auto-detect the date column in a DataFrame."""
# Check by name
for col in df.columns:
if str(col).lower() in ("date", "dates", "time", "timestamp", "period", "month"):
return col
# Check by dtype
for col in df.columns:
if df[col].dtype == "datetime64[ns]":
return col
# Try parsing first column
first_col = df.columns[0]
try:
parsed = pd.to_datetime(df[first_col].head(10), errors="coerce")
if parsed.notna().sum() >= 5:
return first_col
except Exception:
pass
return None