"""Tests for the Zoho timelog parser.""" from __future__ import annotations import io from datetime import date from pathlib import Path import pytest from openpyxl import Workbook from app.services.zoho_parse import parse FIXTURE_CSV = Path(__file__).parent / "fixtures" / "sample_zoho.csv" def test_canonical_csv_headers(): content = FIXTURE_CSV.read_bytes() out = parse("sample_zoho.csv", content) rows = out["rows"] assert out["content_hash"].startswith("sha256:") assert out["unrecognised_columns"] == [] assert len(rows) == 4 r0 = rows[0] assert r0["date"] == date(2026, 5, 4) assert r0["employee"] == "Bhakti Doshi" assert r0["project"] == "Acme Spring Launch" assert r0["hours"] == 7.0 assert r0["billable"] is True # Idle Time → not billable assert rows[2]["billable"] is False # Fee Related → billable assert rows[3]["billable"] is True def test_aliased_headers(): csv = ( "Resource,Project,Total Hours,Log Date,Is Billable\n" "Bhakti Doshi,Acme,7.5,2026-05-04,true\n" ).encode("utf-8") out = parse("aliased.csv", csv) assert out["unrecognised_columns"] == [] assert out["rows"][0]["employee"] == "Bhakti Doshi" assert out["rows"][0]["hours"] == 7.5 assert out["rows"][0]["billable"] is True assert out["rows"][0]["date"] == date(2026, 5, 4) def test_unknown_extra_columns_are_silently_ignored(): # Real Zoho exports carry ~100 columns we don't need. The parser # should not flood the UI with "unrecognised" warnings for them. csv = ( "Date,Resource,Total Hours,Wibble Factor,Some Other Field\n" "2026-05-04,Bhakti,7,5,foo\n" ).encode("utf-8") out = parse("u.csv", csv) assert out["unrecognised_columns"] == [] # Known columns still parse. assert out["rows"][0]["employee"] == "Bhakti" assert out["rows"][0]["hours"] == 7.0 def test_missing_required_column_is_surfaced(): # Only fire the "unrecognised columns" banner when a REQUIRED canonical # field can't be found — that's the actual "Zoho renamed something" signal. csv = ( "Resource,Total Hours\n" # no Date column "Bhakti,7\n" ).encode("utf-8") out = parse("u.csv", csv) assert "date" in out["unrecognised_columns"] assert "submitter" not in out["unrecognised_columns"] assert "hoursLogged" not in out["unrecognised_columns"] def test_xlsx_path(): wb = Workbook() ws = wb.active ws.append(["Date", "Resource Name", "Project Title", "Task", "Hours", "Billable"]) ws.append(["2026-05-04", "Bhakti Doshi", "Acme", "Design", 7.5, "Yes"]) buf = io.BytesIO() wb.save(buf) buf.seek(0) out = parse("up.xlsx", buf.read()) assert out["rows"][0]["employee"] == "Bhakti Doshi" assert out["rows"][0]["hours"] == 7.5 assert out["rows"][0]["date"] == date(2026, 5, 4) assert out["rows"][0]["billable"] is True def test_empty_rows_skipped(): csv = ( "Date,Resource,Hours\n" "\n" "2026-05-04,Bhakti,7\n" ",,\n" ).encode("utf-8") out = parse("blank.csv", csv) assert len(out["rows"]) == 1 def test_hh_mm_hours_parsed(): csv = ( "Date,Resource,Hours\n" "2026-05-04,Bhakti,7:30\n" ).encode("utf-8") out = parse("hhmm.csv", csv) assert out["rows"][0]["hours"] == pytest.approx(7.5) def test_content_hash_stable(): out1 = parse("a.csv", FIXTURE_CSV.read_bytes()) out2 = parse("a.csv", FIXTURE_CSV.read_bytes()) assert out1["content_hash"] == out2["content_hash"] def test_billing_type_header_populates_billingType_and_billable(): """When the upload uses a 'Billing Type' header, each row gains `billingType` (lowercased) and `billable` is cross-filled from it.""" csv = ( "Date,Resource,Total Hours,Billing Type\n" "2026-05-04,Bhakti,7,Client Related\n" "2026-05-05,Bhakti,8,Leave Hours\n" "2026-05-06,Bhakti,4,Idle Time\n" "2026-05-07,Bhakti,6,Fee Related\n" ).encode("utf-8") out = parse("bt.csv", csv) assert out["unrecognised_columns"] == [] rows = out["rows"] assert len(rows) == 4 assert rows[0]["billingType"] == "client related" assert rows[0]["billable"] is True assert rows[1]["billingType"] == "leave hours" assert rows[1]["billable"] is False assert rows[2]["billingType"] == "idle time" assert rows[2]["billable"] is False assert rows[3]["billingType"] == "fee related" assert rows[3]["billable"] is True