diff --git a/backend/app/services/zoho_parse.py b/backend/app/services/zoho_parse.py index f4a05da..973960c 100644 --- a/backend/app/services/zoho_parse.py +++ b/backend/app/services/zoho_parse.py @@ -1,9 +1,11 @@ """Zoho timelog parser. Decisions: -- Header matching is case-insensitive and trim-stripped. Unknown headers - are surfaced in `unrecognised_columns` so the operator notices when - Zoho silently renames a column. +- Header matching is case-insensitive and trim-stripped. Real Zoho exports + have ~120 columns; we only consume ~20. Reporting every unused column + floods the UI with noise. Instead, `unrecognised_columns` lists only + REQUIRED canonical fields we could not locate (e.g. date / hours / + employee) — that's the case that actually signals a Zoho rename. - Billable detection: we keep TWO canonical fields. `billable` accepts literal "Billable" / "Is Billable" columns (boolean-ish). `billingType` accepts a "Billing Type" column whose values look like @@ -306,34 +308,34 @@ _DEFAULT_ROW: dict[str, Any] = { } +REQUIRED_CANONICALS = ("date", "submitter", "hoursLogged") + + def _build_rows( raw_rows: Iterable[list[Any]], headers: list[Any], ) -> tuple[list[dict[str, Any]], list[str]]: - # Map column index → canonical key. Track unknown ones. + # Map column index → canonical key. # FIRST occurrence of a header wins — the real Zoho CSV repeats # "Project Number" later in the row, and only the first column has # reliable per-time-entry data. + # Columns we don't map are silently ignored — Zoho exports carry ~100 + # extra fields we don't need; flooding the UI with them is unhelpful. canonical_by_idx: dict[int, str] = {} canonical_seen: set[str] = set() - unrecognised: list[str] = [] - unrecognised_seen: set[str] = set() for idx, raw in enumerate(headers): if raw is None or str(raw).strip() == "": continue canon = _canonicalise_header(raw) - if canon: - if canon in canonical_seen: - continue + if canon and canon not in canonical_seen: canonical_by_idx[idx] = canon canonical_seen.add(canon) - else: - name = str(raw).strip() - if name and name not in unrecognised_seen: - unrecognised.append(name) - unrecognised_seen.add(name) present_canonicals = set(canonical_seen) + # Only surface a column as "unrecognised" when it's REQUIRED and missing — + # this is the actual signal that Zoho renamed something on us. Reported + # with the canonical field name so the user knows what to look for. + unrecognised = [c for c in REQUIRED_CANONICALS if c not in present_canonicals] out: list[dict[str, Any]] = [] for raw_row in raw_rows: diff --git a/backend/tests/test_zoho_parse.py b/backend/tests/test_zoho_parse.py index f848bc7..5712e00 100644 --- a/backend/tests/test_zoho_parse.py +++ b/backend/tests/test_zoho_parse.py @@ -47,18 +47,33 @@ def test_aliased_headers(): assert out["rows"][0]["date"] == date(2026, 5, 4) -def test_unrecognised_header_surfaced(): +def test_unknown_extra_columns_are_silently_ignored(): + # Real Zoho exports carry ~100 columns we don't need. The parser + # should not flood the UI with "unrecognised" warnings for them. csv = ( - "Date,Resource,Total Hours,Wibble Factor\n" - "2026-05-04,Bhakti,7,5\n" + "Date,Resource,Total Hours,Wibble Factor,Some Other Field\n" + "2026-05-04,Bhakti,7,5,foo\n" ).encode("utf-8") out = parse("u.csv", csv) - assert "Wibble Factor" in out["unrecognised_columns"] + assert out["unrecognised_columns"] == [] # Known columns still parse. assert out["rows"][0]["employee"] == "Bhakti" assert out["rows"][0]["hours"] == 7.0 +def test_missing_required_column_is_surfaced(): + # Only fire the "unrecognised columns" banner when a REQUIRED canonical + # field can't be found — that's the actual "Zoho renamed something" signal. + csv = ( + "Resource,Total Hours\n" # no Date column + "Bhakti,7\n" + ).encode("utf-8") + out = parse("u.csv", csv) + assert "date" in out["unrecognised_columns"] + assert "submitter" not in out["unrecognised_columns"] + assert "hoursLogged" not in out["unrecognised_columns"] + + def test_xlsx_path(): wb = Workbook() ws = wb.active diff --git a/frontend/src/components/UploadButton.tsx b/frontend/src/components/UploadButton.tsx index dbf382d..79ec905 100644 --- a/frontend/src/components/UploadButton.tsx +++ b/frontend/src/components/UploadButton.tsx @@ -83,7 +83,11 @@ export default function UploadButton({ {unrecognised.length > 0 && (
- Unrecognised columns: {unrecognised.join(', ')} + Couldn't find expected column{unrecognised.length > 1 ? 's' : ''}:{' '} + {unrecognised.join(', ')} + + — Zoho may have renamed a header. Charts will be incomplete. +
)}