AI-powered tool that generates publication-quality SVG charts matching PIMCO's InDesign style. Upload Excel/CSV data, write a plain-English brief, then iterate with natural language edits until the chart is exactly right. - Claude Opus 4.6 interprets briefs into structured ChartSpec JSON - Deterministic SVG renderer via drawsvg (no visual hallucinations) - Roboto/Roboto Condensed fonts base64-embedded in SVG - FastAPI + HTMX web frontend with live preview - Conversational refinement: "make lines thicker", "change title", etc. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
64 lines
1.8 KiB
Python
64 lines
1.8 KiB
Python
"""Data transformation: date parsing, column selection, resampling."""
|
|
|
|
from __future__ import annotations
|
|
import pandas as pd
|
|
|
|
|
|
def prepare_dataframe(df: pd.DataFrame, date_column: str | None = None) -> pd.DataFrame:
|
|
"""Prepare a DataFrame for charting: parse dates, ensure numeric columns.
|
|
|
|
Args:
|
|
df: Raw DataFrame
|
|
date_column: Optional name of the date column. Auto-detected if None.
|
|
|
|
Returns:
|
|
Cleaned DataFrame with parsed dates and numeric columns.
|
|
"""
|
|
df = df.copy()
|
|
|
|
# Auto-detect date column
|
|
if date_column is None:
|
|
date_column = _detect_date_column(df)
|
|
|
|
if date_column and date_column in df.columns:
|
|
df[date_column] = pd.to_datetime(df[date_column], errors="coerce")
|
|
# Drop rows where date is NaT
|
|
df = df.dropna(subset=[date_column])
|
|
# Sort by date
|
|
df = df.sort_values(date_column).reset_index(drop=True)
|
|
|
|
# Convert numeric-looking columns
|
|
for col in df.columns:
|
|
if col == date_column:
|
|
continue
|
|
if df[col].dtype == object:
|
|
try:
|
|
df[col] = pd.to_numeric(df[col], errors="coerce")
|
|
except Exception:
|
|
pass
|
|
|
|
return df
|
|
|
|
|
|
def _detect_date_column(df: pd.DataFrame) -> str | None:
|
|
"""Auto-detect the date column in a DataFrame."""
|
|
# Check by name
|
|
for col in df.columns:
|
|
if str(col).lower() in ("date", "dates", "time", "timestamp", "period", "month"):
|
|
return col
|
|
|
|
# Check by dtype
|
|
for col in df.columns:
|
|
if df[col].dtype == "datetime64[ns]":
|
|
return col
|
|
|
|
# Try parsing first column
|
|
first_col = df.columns[0]
|
|
try:
|
|
parsed = pd.to_datetime(df[first_col].head(10), errors="coerce")
|
|
if parsed.notna().sum() >= 5:
|
|
return first_col
|
|
except Exception:
|
|
pass
|
|
|
|
return None
|