loreal-utilisation-dept/backend/tests/test_zoho_parse.py
DJP 6e7338de99 fix: stop flooding Upload banner with every Zoho column we don't use
Real Zoho time-log exports carry ~120 columns; we only consume ~20. The
parser was reporting every unused header (Project Billing Client, Task
Stage, Project Owner Email, … ~90 of them) under "Unrecognised columns",
which surfaced a multi-line warning banner on every upload even though
nothing was wrong.

New semantics — `unrecognised_columns` now lists only REQUIRED canonical
fields we COULDN'T locate (date / submitter / hoursLogged). Empty list
on every clean export. Surfaces the actual signal: "Zoho renamed
something you depend on" — buried before, prominent now.

- zoho_parse.py: extras silently ignored; only missing requireds reported.
- UploadButton banner copy: "Couldn't find expected columns: …" with a
  hint that charts will be incomplete.
- Tests updated: extras don't trigger, missing requireds do.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-17 21:52:04 -04:00

139 lines
4.4 KiB
Python

"""Tests for the Zoho timelog parser."""
from __future__ import annotations
import io
from datetime import date
from pathlib import Path
import pytest
from openpyxl import Workbook
from app.services.zoho_parse import parse
FIXTURE_CSV = Path(__file__).parent / "fixtures" / "sample_zoho.csv"
def test_canonical_csv_headers():
content = FIXTURE_CSV.read_bytes()
out = parse("sample_zoho.csv", content)
rows = out["rows"]
assert out["content_hash"].startswith("sha256:")
assert out["unrecognised_columns"] == []
assert len(rows) == 4
r0 = rows[0]
assert r0["date"] == date(2026, 5, 4)
assert r0["employee"] == "Bhakti Doshi"
assert r0["project"] == "Acme Spring Launch"
assert r0["hours"] == 7.0
assert r0["billable"] is True
# Idle Time → not billable
assert rows[2]["billable"] is False
# Fee Related → billable
assert rows[3]["billable"] is True
def test_aliased_headers():
csv = (
"Resource,Project,Total Hours,Log Date,Is Billable\n"
"Bhakti Doshi,Acme,7.5,2026-05-04,true\n"
).encode("utf-8")
out = parse("aliased.csv", csv)
assert out["unrecognised_columns"] == []
assert out["rows"][0]["employee"] == "Bhakti Doshi"
assert out["rows"][0]["hours"] == 7.5
assert out["rows"][0]["billable"] is True
assert out["rows"][0]["date"] == date(2026, 5, 4)
def test_unknown_extra_columns_are_silently_ignored():
# Real Zoho exports carry ~100 columns we don't need. The parser
# should not flood the UI with "unrecognised" warnings for them.
csv = (
"Date,Resource,Total Hours,Wibble Factor,Some Other Field\n"
"2026-05-04,Bhakti,7,5,foo\n"
).encode("utf-8")
out = parse("u.csv", csv)
assert out["unrecognised_columns"] == []
# Known columns still parse.
assert out["rows"][0]["employee"] == "Bhakti"
assert out["rows"][0]["hours"] == 7.0
def test_missing_required_column_is_surfaced():
# Only fire the "unrecognised columns" banner when a REQUIRED canonical
# field can't be found — that's the actual "Zoho renamed something" signal.
csv = (
"Resource,Total Hours\n" # no Date column
"Bhakti,7\n"
).encode("utf-8")
out = parse("u.csv", csv)
assert "date" in out["unrecognised_columns"]
assert "submitter" not in out["unrecognised_columns"]
assert "hoursLogged" not in out["unrecognised_columns"]
def test_xlsx_path():
wb = Workbook()
ws = wb.active
ws.append(["Date", "Resource Name", "Project Title", "Task", "Hours", "Billable"])
ws.append(["2026-05-04", "Bhakti Doshi", "Acme", "Design", 7.5, "Yes"])
buf = io.BytesIO()
wb.save(buf)
buf.seek(0)
out = parse("up.xlsx", buf.read())
assert out["rows"][0]["employee"] == "Bhakti Doshi"
assert out["rows"][0]["hours"] == 7.5
assert out["rows"][0]["date"] == date(2026, 5, 4)
assert out["rows"][0]["billable"] is True
def test_empty_rows_skipped():
csv = (
"Date,Resource,Hours\n"
"\n"
"2026-05-04,Bhakti,7\n"
",,\n"
).encode("utf-8")
out = parse("blank.csv", csv)
assert len(out["rows"]) == 1
def test_hh_mm_hours_parsed():
csv = (
"Date,Resource,Hours\n"
"2026-05-04,Bhakti,7:30\n"
).encode("utf-8")
out = parse("hhmm.csv", csv)
assert out["rows"][0]["hours"] == pytest.approx(7.5)
def test_content_hash_stable():
out1 = parse("a.csv", FIXTURE_CSV.read_bytes())
out2 = parse("a.csv", FIXTURE_CSV.read_bytes())
assert out1["content_hash"] == out2["content_hash"]
def test_billing_type_header_populates_billingType_and_billable():
"""When the upload uses a 'Billing Type' header, each row gains
`billingType` (lowercased) and `billable` is cross-filled from it."""
csv = (
"Date,Resource,Total Hours,Billing Type\n"
"2026-05-04,Bhakti,7,Client Related\n"
"2026-05-05,Bhakti,8,Leave Hours\n"
"2026-05-06,Bhakti,4,Idle Time\n"
"2026-05-07,Bhakti,6,Fee Related\n"
).encode("utf-8")
out = parse("bt.csv", csv)
assert out["unrecognised_columns"] == []
rows = out["rows"]
assert len(rows) == 4
assert rows[0]["billingType"] == "client related"
assert rows[0]["billable"] is True
assert rows[1]["billingType"] == "leave hours"
assert rows[1]["billable"] is False
assert rows[2]["billingType"] == "idle time"
assert rows[2]["billable"] is False
assert rows[3]["billingType"] == "fee related"
assert rows[3]["billable"] is True