pimco-charts/app/data/transformer.py
Vadym Samoilenko db853cea9e Fix font exports, chart overflow, legend wrapping, and data interpretation
- Add RobotoCondensed-Bold.ttf and RobotoCondensed-Light.ttf; embed all three
  weights (300/normal/bold) in SVG @font-face for correct browser preview
- Dockerfile: install fontconfig and register fonts as system fonts so
  CairoSVG/Pango resolves Roboto Condensed in PNG/PDF exports (not a fallback)
- Export PNG/PDF at dpi=150 for higher quality output
- Add SVG <clipPath> per panel so data series cannot overflow axis boundaries
- Replace np.arange tick generation with safe while-loop (no float artifacts)
- Legend: multi-row wrapping when items exceed chart width; improved per-char
  text width estimation; legend_row_count() helper for dynamic layout
- Dynamic pad_top in compute_layout() based on legend rows + subtitle presence,
  preventing title/subtitle/legend overlap regardless of series count
- Y-axis: skip overlapping labels (gridlines always drawn); adaptive tick
  precision so -0.0042% shows correctly instead of -0.00%
- transformer.py: strip % signs from string-formatted percentages before
  pd.to_numeric; handle pandas 2.x StringDtype; expand date column detection
  to cover Year, Quarter, report_date, substring matches (start_datetime etc.)
- Sync _find_date_column() in engine.py with transformer.py detection logic
- prompts.py: enforce Roboto Condensed for all text; add axis range, label
  length, and decimal-percentage guidance rules

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-14 17:17:25 +01:00

93 lines
3.1 KiB
Python

"""Data transformation: date parsing, column selection, resampling."""
from __future__ import annotations
import pandas as pd
def prepare_dataframe(df: pd.DataFrame, date_column: str | None = None) -> pd.DataFrame:
"""Prepare a DataFrame for charting: parse dates, ensure numeric columns.
Args:
df: Raw DataFrame
date_column: Optional name of the date column. Auto-detected if None.
Returns:
Cleaned DataFrame with parsed dates and numeric columns.
"""
df = df.copy()
# Auto-detect date column
if date_column is None:
date_column = _detect_date_column(df)
if date_column and date_column in df.columns:
try:
df[date_column] = pd.to_datetime(df[date_column], errors="coerce")
except Exception:
date_column = None
if date_column and date_column in df.columns:
# Drop rows where date is NaT
df = df.dropna(subset=[date_column])
# Sort by date
df = df.sort_values(date_column).reset_index(drop=True)
# Convert numeric-looking columns
for col in df.columns:
if col == date_column:
continue
# Handle object and string dtypes (pandas 2.x may use StringDtype)
if pd.api.types.is_string_dtype(df[col]) or df[col].dtype == object:
try:
# Check if values are percentage strings like "-0.42%" before
# calling to_numeric (which silently coerces them to NaN)
sample = df[col].dropna().head(20).astype(str)
pct_count = sample.str.match(r'^\s*-?\d[\d,.]*\s*%\s*$').sum()
if pct_count > len(sample) * 0.5:
# Strip the % sign, then convert; value stays as-is (e.g. -0.42)
df[col] = (
df[col].astype(str)
.str.replace(r'%\s*$', '', regex=True)
.str.replace(',', '', regex=False)
)
df[col] = pd.to_numeric(df[col], errors="coerce")
except Exception:
pass
return df
_DATE_EXACT_NAMES = {
"date", "dates", "time", "timestamp", "period", "month", "quarter",
"year", "as of", "as_of", "report_date", "reporting_date",
"fiscal_year", "observation_date", "obs_date",
}
def _detect_date_column(df: pd.DataFrame) -> str | None:
"""Auto-detect the date column in a DataFrame."""
# Exact name match
for col in df.columns:
if str(col).lower() in _DATE_EXACT_NAMES:
return col
# Substring match: any column whose name contains "date" or "time"
for col in df.columns:
col_lower = str(col).lower()
if "date" in col_lower or "time" in col_lower:
return col
# Check by dtype
for col in df.columns:
if df[col].dtype == "datetime64[ns]":
return col
# Try parsing first column
first_col = df.columns[0]
try:
parsed = pd.to_datetime(df[first_col].head(10), errors="coerce")
if parsed.notna().sum() >= 5:
return first_col
except Exception:
pass
return None