fix: stop flooding Upload banner with every Zoho column we don't use

Real Zoho time-log exports carry ~120 columns; we only consume ~20. The parser was reporting every unused header (Project Billing Client, Task Stage, Project Owner Email, … ~90 of them) under "Unrecognised columns", which surfaced a multi-line warning banner on every upload even though nothing was wrong. New semantics — `unrecognised_columns` now lists only REQUIRED canonical fields we COULDN'T locate (date / submitter / hoursLogged). Empty list on every clean export. Surfaces the actual signal: "Zoho renamed something you depend on" — buried before, prominent now. - zoho_parse.py: extras silently ignored; only missing requireds reported. - UploadButton banner copy: "Couldn't find expected columns: …" with a hint that charts will be incomplete. - Tests updated: extras don't trigger, missing requireds do. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-17 21:52:04 -04:00 · 2026-05-17 21:52:04 -04:00 · 6e7338de99
commit 6e7338de99
parent 5efb5897db
3 changed files with 40 additions and 19 deletions
--- a/backend/app/services/zoho_parse.py
+++ b/backend/app/services/zoho_parse.py
@ -1,9 +1,11 @@
 """Zoho timelog parser.

 Decisions:
- Header matching is case-insensitive and trim-stripped. Unknown headers
-  are surfaced in `unrecognised_columns` so the operator notices when
-  Zoho silently renames a column.
+- Header matching is case-insensitive and trim-stripped. Real Zoho exports
+  have ~120 columns; we only consume ~20. Reporting every unused column
+  floods the UI with noise. Instead, `unrecognised_columns` lists only
+  REQUIRED canonical fields we could not locate (e.g. date / hours /
+  employee) — that's the case that actually signals a Zoho rename.
 - Billable detection: we keep TWO canonical fields. `billable` accepts
  literal "Billable" / "Is Billable" columns (boolean-ish). `billingType`
  accepts a "Billing Type" column whose values look like
@ -306,34 +308,34 @@ _DEFAULT_ROW: dict[str, Any] = {
 }


+REQUIRED_CANONICALS = ("date", "submitter", "hoursLogged")
+
+
 def _build_rows(
    raw_rows: Iterable[list[Any]],
    headers: list[Any],
 ) -> tuple[list[dict[str, Any]], list[str]]:
-    # Map column index → canonical key. Track unknown ones.
+    # Map column index → canonical key.
    # FIRST occurrence of a header wins — the real Zoho CSV repeats
    # "Project Number" later in the row, and only the first column has
    # reliable per-time-entry data.
+    # Columns we don't map are silently ignored — Zoho exports carry ~100
+    # extra fields we don't need; flooding the UI with them is unhelpful.
    canonical_by_idx: dict[int, str] = {}
    canonical_seen: set[str] = set()
-    unrecognised: list[str] = []
-    unrecognised_seen: set[str] = set()
    for idx, raw in enumerate(headers):
        if raw is None or str(raw).strip() == "":
            continue
        canon = _canonicalise_header(raw)
-        if canon:
-            if canon in canonical_seen:
-                continue
+        if canon and canon not in canonical_seen:
            canonical_by_idx[idx] = canon
            canonical_seen.add(canon)
-        else:
-            name = str(raw).strip()
-            if name and name not in unrecognised_seen:
-                unrecognised.append(name)
-                unrecognised_seen.add(name)

    present_canonicals = set(canonical_seen)
+    # Only surface a column as "unrecognised" when it's REQUIRED and missing —
+    # this is the actual signal that Zoho renamed something on us. Reported
+    # with the canonical field name so the user knows what to look for.
+    unrecognised = [c for c in REQUIRED_CANONICALS if c not in present_canonicals]

    out: list[dict[str, Any]] = []
    for raw_row in raw_rows:
--- a/backend/tests/test_zoho_parse.py
+++ b/backend/tests/test_zoho_parse.py
@ -47,18 +47,33 @@ def test_aliased_headers():
    assert out["rows"][0]["date"] == date(2026, 5, 4)


-def test_unrecognised_header_surfaced():
+def test_unknown_extra_columns_are_silently_ignored():
+    # Real Zoho exports carry ~100 columns we don't need. The parser
+    # should not flood the UI with "unrecognised" warnings for them.
    csv = (
-        "Date,Resource,Total Hours,Wibble Factor\n"
-        "2026-05-04,Bhakti,7,5\n"
+        "Date,Resource,Total Hours,Wibble Factor,Some Other Field\n"
+        "2026-05-04,Bhakti,7,5,foo\n"
    ).encode("utf-8")
    out = parse("u.csv", csv)
-    assert "Wibble Factor" in out["unrecognised_columns"]
+    assert out["unrecognised_columns"] == []
    # Known columns still parse.
    assert out["rows"][0]["employee"] == "Bhakti"
    assert out["rows"][0]["hours"] == 7.0


+def test_missing_required_column_is_surfaced():
+    # Only fire the "unrecognised columns" banner when a REQUIRED canonical
+    # field can't be found — that's the actual "Zoho renamed something" signal.
+    csv = (
+        "Resource,Total Hours\n"  # no Date column
+        "Bhakti,7\n"
+    ).encode("utf-8")
+    out = parse("u.csv", csv)
+    assert "date" in out["unrecognised_columns"]
+    assert "submitter" not in out["unrecognised_columns"]
+    assert "hoursLogged" not in out["unrecognised_columns"]
+
+
 def test_xlsx_path():
    wb = Workbook()
    ws = wb.active
--- a/frontend/src/components/UploadButton.tsx
+++ b/frontend/src/components/UploadButton.tsx
@ -83,7 +83,11 @@ export default function UploadButton({

      {unrecognised.length > 0 && (
        <div className="rounded-md border border-yellow-300 bg-yellow-50 p-2 text-xs text-yellow-900">
-          <strong>Unrecognised columns:</strong> {unrecognised.join(', ')}
+          <strong>Couldn&apos;t find expected column{unrecognised.length > 1 ? 's' : ''}:</strong>{' '}
+          {unrecognised.join(', ')}
+          <span className="ml-1 text-yellow-700">
+            — Zoho may have renamed a header. Charts will be incomplete.
+          </span>
        </div>
      )}
    </div>