Real Zoho time-log exports carry ~120 columns; we only consume ~20. The parser was reporting every unused header (Project Billing Client, Task Stage, Project Owner Email, … ~90 of them) under "Unrecognised columns", which surfaced a multi-line warning banner on every upload even though nothing was wrong. New semantics — `unrecognised_columns` now lists only REQUIRED canonical fields we COULDN'T locate (date / submitter / hoursLogged). Empty list on every clean export. Surfaces the actual signal: "Zoho renamed something you depend on" — buried before, prominent now. - zoho_parse.py: extras silently ignored; only missing requireds reported. - UploadButton banner copy: "Couldn't find expected columns: …" with a hint that charts will be incomplete. - Tests updated: extras don't trigger, missing requireds do. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
139 lines
4.4 KiB
Python
139 lines
4.4 KiB
Python
"""Tests for the Zoho timelog parser."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import io
|
|
from datetime import date
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from openpyxl import Workbook
|
|
|
|
from app.services.zoho_parse import parse
|
|
|
|
|
|
FIXTURE_CSV = Path(__file__).parent / "fixtures" / "sample_zoho.csv"
|
|
|
|
|
|
def test_canonical_csv_headers():
|
|
content = FIXTURE_CSV.read_bytes()
|
|
out = parse("sample_zoho.csv", content)
|
|
rows = out["rows"]
|
|
assert out["content_hash"].startswith("sha256:")
|
|
assert out["unrecognised_columns"] == []
|
|
assert len(rows) == 4
|
|
r0 = rows[0]
|
|
assert r0["date"] == date(2026, 5, 4)
|
|
assert r0["employee"] == "Bhakti Doshi"
|
|
assert r0["project"] == "Acme Spring Launch"
|
|
assert r0["hours"] == 7.0
|
|
assert r0["billable"] is True
|
|
# Idle Time → not billable
|
|
assert rows[2]["billable"] is False
|
|
# Fee Related → billable
|
|
assert rows[3]["billable"] is True
|
|
|
|
|
|
def test_aliased_headers():
|
|
csv = (
|
|
"Resource,Project,Total Hours,Log Date,Is Billable\n"
|
|
"Bhakti Doshi,Acme,7.5,2026-05-04,true\n"
|
|
).encode("utf-8")
|
|
out = parse("aliased.csv", csv)
|
|
assert out["unrecognised_columns"] == []
|
|
assert out["rows"][0]["employee"] == "Bhakti Doshi"
|
|
assert out["rows"][0]["hours"] == 7.5
|
|
assert out["rows"][0]["billable"] is True
|
|
assert out["rows"][0]["date"] == date(2026, 5, 4)
|
|
|
|
|
|
def test_unknown_extra_columns_are_silently_ignored():
|
|
# Real Zoho exports carry ~100 columns we don't need. The parser
|
|
# should not flood the UI with "unrecognised" warnings for them.
|
|
csv = (
|
|
"Date,Resource,Total Hours,Wibble Factor,Some Other Field\n"
|
|
"2026-05-04,Bhakti,7,5,foo\n"
|
|
).encode("utf-8")
|
|
out = parse("u.csv", csv)
|
|
assert out["unrecognised_columns"] == []
|
|
# Known columns still parse.
|
|
assert out["rows"][0]["employee"] == "Bhakti"
|
|
assert out["rows"][0]["hours"] == 7.0
|
|
|
|
|
|
def test_missing_required_column_is_surfaced():
|
|
# Only fire the "unrecognised columns" banner when a REQUIRED canonical
|
|
# field can't be found — that's the actual "Zoho renamed something" signal.
|
|
csv = (
|
|
"Resource,Total Hours\n" # no Date column
|
|
"Bhakti,7\n"
|
|
).encode("utf-8")
|
|
out = parse("u.csv", csv)
|
|
assert "date" in out["unrecognised_columns"]
|
|
assert "submitter" not in out["unrecognised_columns"]
|
|
assert "hoursLogged" not in out["unrecognised_columns"]
|
|
|
|
|
|
def test_xlsx_path():
|
|
wb = Workbook()
|
|
ws = wb.active
|
|
ws.append(["Date", "Resource Name", "Project Title", "Task", "Hours", "Billable"])
|
|
ws.append(["2026-05-04", "Bhakti Doshi", "Acme", "Design", 7.5, "Yes"])
|
|
buf = io.BytesIO()
|
|
wb.save(buf)
|
|
buf.seek(0)
|
|
out = parse("up.xlsx", buf.read())
|
|
assert out["rows"][0]["employee"] == "Bhakti Doshi"
|
|
assert out["rows"][0]["hours"] == 7.5
|
|
assert out["rows"][0]["date"] == date(2026, 5, 4)
|
|
assert out["rows"][0]["billable"] is True
|
|
|
|
|
|
def test_empty_rows_skipped():
|
|
csv = (
|
|
"Date,Resource,Hours\n"
|
|
"\n"
|
|
"2026-05-04,Bhakti,7\n"
|
|
",,\n"
|
|
).encode("utf-8")
|
|
out = parse("blank.csv", csv)
|
|
assert len(out["rows"]) == 1
|
|
|
|
|
|
def test_hh_mm_hours_parsed():
|
|
csv = (
|
|
"Date,Resource,Hours\n"
|
|
"2026-05-04,Bhakti,7:30\n"
|
|
).encode("utf-8")
|
|
out = parse("hhmm.csv", csv)
|
|
assert out["rows"][0]["hours"] == pytest.approx(7.5)
|
|
|
|
|
|
def test_content_hash_stable():
|
|
out1 = parse("a.csv", FIXTURE_CSV.read_bytes())
|
|
out2 = parse("a.csv", FIXTURE_CSV.read_bytes())
|
|
assert out1["content_hash"] == out2["content_hash"]
|
|
|
|
|
|
def test_billing_type_header_populates_billingType_and_billable():
|
|
"""When the upload uses a 'Billing Type' header, each row gains
|
|
`billingType` (lowercased) and `billable` is cross-filled from it."""
|
|
csv = (
|
|
"Date,Resource,Total Hours,Billing Type\n"
|
|
"2026-05-04,Bhakti,7,Client Related\n"
|
|
"2026-05-05,Bhakti,8,Leave Hours\n"
|
|
"2026-05-06,Bhakti,4,Idle Time\n"
|
|
"2026-05-07,Bhakti,6,Fee Related\n"
|
|
).encode("utf-8")
|
|
out = parse("bt.csv", csv)
|
|
assert out["unrecognised_columns"] == []
|
|
rows = out["rows"]
|
|
assert len(rows) == 4
|
|
assert rows[0]["billingType"] == "client related"
|
|
assert rows[0]["billable"] is True
|
|
assert rows[1]["billingType"] == "leave hours"
|
|
assert rows[1]["billable"] is False
|
|
assert rows[2]["billingType"] == "idle time"
|
|
assert rows[2]["billable"] is False
|
|
assert rows[3]["billingType"] == "fee related"
|
|
assert rows[3]["billable"] is True
|