fix: stop flooding Upload banner with every Zoho column we don't use
Real Zoho time-log exports carry ~120 columns; we only consume ~20. The parser was reporting every unused header (Project Billing Client, Task Stage, Project Owner Email, … ~90 of them) under "Unrecognised columns", which surfaced a multi-line warning banner on every upload even though nothing was wrong. New semantics — `unrecognised_columns` now lists only REQUIRED canonical fields we COULDN'T locate (date / submitter / hoursLogged). Empty list on every clean export. Surfaces the actual signal: "Zoho renamed something you depend on" — buried before, prominent now. - zoho_parse.py: extras silently ignored; only missing requireds reported. - UploadButton banner copy: "Couldn't find expected columns: …" with a hint that charts will be incomplete. - Tests updated: extras don't trigger, missing requireds do. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
5efb5897db
commit
6e7338de99
3 changed files with 40 additions and 19 deletions
|
|
@ -1,9 +1,11 @@
|
|||
"""Zoho timelog parser.
|
||||
|
||||
Decisions:
|
||||
- Header matching is case-insensitive and trim-stripped. Unknown headers
|
||||
are surfaced in `unrecognised_columns` so the operator notices when
|
||||
Zoho silently renames a column.
|
||||
- Header matching is case-insensitive and trim-stripped. Real Zoho exports
|
||||
have ~120 columns; we only consume ~20. Reporting every unused column
|
||||
floods the UI with noise. Instead, `unrecognised_columns` lists only
|
||||
REQUIRED canonical fields we could not locate (e.g. date / hours /
|
||||
employee) — that's the case that actually signals a Zoho rename.
|
||||
- Billable detection: we keep TWO canonical fields. `billable` accepts
|
||||
literal "Billable" / "Is Billable" columns (boolean-ish). `billingType`
|
||||
accepts a "Billing Type" column whose values look like
|
||||
|
|
@ -306,34 +308,34 @@ _DEFAULT_ROW: dict[str, Any] = {
|
|||
}
|
||||
|
||||
|
||||
REQUIRED_CANONICALS = ("date", "submitter", "hoursLogged")
|
||||
|
||||
|
||||
def _build_rows(
|
||||
raw_rows: Iterable[list[Any]],
|
||||
headers: list[Any],
|
||||
) -> tuple[list[dict[str, Any]], list[str]]:
|
||||
# Map column index → canonical key. Track unknown ones.
|
||||
# Map column index → canonical key.
|
||||
# FIRST occurrence of a header wins — the real Zoho CSV repeats
|
||||
# "Project Number" later in the row, and only the first column has
|
||||
# reliable per-time-entry data.
|
||||
# Columns we don't map are silently ignored — Zoho exports carry ~100
|
||||
# extra fields we don't need; flooding the UI with them is unhelpful.
|
||||
canonical_by_idx: dict[int, str] = {}
|
||||
canonical_seen: set[str] = set()
|
||||
unrecognised: list[str] = []
|
||||
unrecognised_seen: set[str] = set()
|
||||
for idx, raw in enumerate(headers):
|
||||
if raw is None or str(raw).strip() == "":
|
||||
continue
|
||||
canon = _canonicalise_header(raw)
|
||||
if canon:
|
||||
if canon in canonical_seen:
|
||||
continue
|
||||
if canon and canon not in canonical_seen:
|
||||
canonical_by_idx[idx] = canon
|
||||
canonical_seen.add(canon)
|
||||
else:
|
||||
name = str(raw).strip()
|
||||
if name and name not in unrecognised_seen:
|
||||
unrecognised.append(name)
|
||||
unrecognised_seen.add(name)
|
||||
|
||||
present_canonicals = set(canonical_seen)
|
||||
# Only surface a column as "unrecognised" when it's REQUIRED and missing —
|
||||
# this is the actual signal that Zoho renamed something on us. Reported
|
||||
# with the canonical field name so the user knows what to look for.
|
||||
unrecognised = [c for c in REQUIRED_CANONICALS if c not in present_canonicals]
|
||||
|
||||
out: list[dict[str, Any]] = []
|
||||
for raw_row in raw_rows:
|
||||
|
|
|
|||
|
|
@ -47,18 +47,33 @@ def test_aliased_headers():
|
|||
assert out["rows"][0]["date"] == date(2026, 5, 4)
|
||||
|
||||
|
||||
def test_unrecognised_header_surfaced():
|
||||
def test_unknown_extra_columns_are_silently_ignored():
|
||||
# Real Zoho exports carry ~100 columns we don't need. The parser
|
||||
# should not flood the UI with "unrecognised" warnings for them.
|
||||
csv = (
|
||||
"Date,Resource,Total Hours,Wibble Factor\n"
|
||||
"2026-05-04,Bhakti,7,5\n"
|
||||
"Date,Resource,Total Hours,Wibble Factor,Some Other Field\n"
|
||||
"2026-05-04,Bhakti,7,5,foo\n"
|
||||
).encode("utf-8")
|
||||
out = parse("u.csv", csv)
|
||||
assert "Wibble Factor" in out["unrecognised_columns"]
|
||||
assert out["unrecognised_columns"] == []
|
||||
# Known columns still parse.
|
||||
assert out["rows"][0]["employee"] == "Bhakti"
|
||||
assert out["rows"][0]["hours"] == 7.0
|
||||
|
||||
|
||||
def test_missing_required_column_is_surfaced():
|
||||
# Only fire the "unrecognised columns" banner when a REQUIRED canonical
|
||||
# field can't be found — that's the actual "Zoho renamed something" signal.
|
||||
csv = (
|
||||
"Resource,Total Hours\n" # no Date column
|
||||
"Bhakti,7\n"
|
||||
).encode("utf-8")
|
||||
out = parse("u.csv", csv)
|
||||
assert "date" in out["unrecognised_columns"]
|
||||
assert "submitter" not in out["unrecognised_columns"]
|
||||
assert "hoursLogged" not in out["unrecognised_columns"]
|
||||
|
||||
|
||||
def test_xlsx_path():
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
|
|
|
|||
|
|
@ -83,7 +83,11 @@ export default function UploadButton({
|
|||
|
||||
{unrecognised.length > 0 && (
|
||||
<div className="rounded-md border border-yellow-300 bg-yellow-50 p-2 text-xs text-yellow-900">
|
||||
<strong>Unrecognised columns:</strong> {unrecognised.join(', ')}
|
||||
<strong>Couldn't find expected column{unrecognised.length > 1 ? 's' : ''}:</strong>{' '}
|
||||
{unrecognised.join(', ')}
|
||||
<span className="ml-1 text-yellow-700">
|
||||
— Zoho may have renamed a header. Charts will be incomplete.
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue