loreal-utilisation-dept/backend/tests/test_zoho_parse.py

"""Tests for the Zoho timelog parser."""

from __future__ import annotations

import io
from datetime import date
from pathlib import Path

import pytest
from openpyxl import Workbook

from app.services.zoho_parse import parse


FIXTURE_CSV = Path(__file__).parent / "fixtures" / "sample_zoho.csv"


def test_canonical_csv_headers():
    content = FIXTURE_CSV.read_bytes()
    out = parse("sample_zoho.csv", content)
    rows = out["rows"]
    assert out["content_hash"].startswith("sha256:")
    assert out["unrecognised_columns"] == []
    assert len(rows) == 4
    r0 = rows[0]
    assert r0["date"] == date(2026, 5, 4)
    assert r0["employee"] == "Bhakti Doshi"
    assert r0["project"] == "Acme Spring Launch"
    assert r0["hours"] == 7.0
    assert r0["billable"] is True
    # Idle Time → not billable
    assert rows[2]["billable"] is False
    # Fee Related → billable
    assert rows[3]["billable"] is True


def test_aliased_headers():
    csv = (
        "Resource,Project,Total Hours,Log Date,Is Billable\n"
        "Bhakti Doshi,Acme,7.5,2026-05-04,true\n"
    ).encode("utf-8")
    out = parse("aliased.csv", csv)
    assert out["unrecognised_columns"] == []
    assert out["rows"][0]["employee"] == "Bhakti Doshi"
    assert out["rows"][0]["hours"] == 7.5
    assert out["rows"][0]["billable"] is True
    assert out["rows"][0]["date"] == date(2026, 5, 4)


def test_unknown_extra_columns_are_silently_ignored():
    # Real Zoho exports carry ~100 columns we don't need. The parser
    # should not flood the UI with "unrecognised" warnings for them.
    csv = (
        "Date,Resource,Total Hours,Wibble Factor,Some Other Field\n"
        "2026-05-04,Bhakti,7,5,foo\n"
    ).encode("utf-8")
    out = parse("u.csv", csv)
    assert out["unrecognised_columns"] == []
    # Known columns still parse.
    assert out["rows"][0]["employee"] == "Bhakti"
    assert out["rows"][0]["hours"] == 7.0


def test_missing_required_column_is_surfaced():
    # Only fire the "unrecognised columns" banner when a REQUIRED canonical
    # field can't be found — that's the actual "Zoho renamed something" signal.
    csv = (
        "Resource,Total Hours\n"  # no Date column
        "Bhakti,7\n"
    ).encode("utf-8")
    out = parse("u.csv", csv)
    assert "date" in out["unrecognised_columns"]
    assert "submitter" not in out["unrecognised_columns"]
    assert "hoursLogged" not in out["unrecognised_columns"]


def test_xlsx_path():
    wb = Workbook()
    ws = wb.active
    ws.append(["Date", "Resource Name", "Project Title", "Task", "Hours", "Billable"])
    ws.append(["2026-05-04", "Bhakti Doshi", "Acme", "Design", 7.5, "Yes"])
    buf = io.BytesIO()
    wb.save(buf)
    buf.seek(0)
    out = parse("up.xlsx", buf.read())
    assert out["rows"][0]["employee"] == "Bhakti Doshi"
    assert out["rows"][0]["hours"] == 7.5
    assert out["rows"][0]["date"] == date(2026, 5, 4)
    assert out["rows"][0]["billable"] is True


def test_empty_rows_skipped():
    csv = (
        "Date,Resource,Hours\n"
        "\n"
        "2026-05-04,Bhakti,7\n"
        ",,\n"
    ).encode("utf-8")
    out = parse("blank.csv", csv)
    assert len(out["rows"]) == 1


def test_hh_mm_hours_parsed():
    csv = (
        "Date,Resource,Hours\n"
        "2026-05-04,Bhakti,7:30\n"
    ).encode("utf-8")
    out = parse("hhmm.csv", csv)
    assert out["rows"][0]["hours"] == pytest.approx(7.5)


def test_content_hash_stable():
    out1 = parse("a.csv", FIXTURE_CSV.read_bytes())
    out2 = parse("a.csv", FIXTURE_CSV.read_bytes())
    assert out1["content_hash"] == out2["content_hash"]


def test_billing_type_header_populates_billingType_and_billable():
    """When the upload uses a 'Billing Type' header, each row gains
    `billingType` (lowercased) and `billable` is cross-filled from it."""
    csv = (
        "Date,Resource,Total Hours,Billing Type\n"
        "2026-05-04,Bhakti,7,Client Related\n"
        "2026-05-05,Bhakti,8,Leave Hours\n"
        "2026-05-06,Bhakti,4,Idle Time\n"
        "2026-05-07,Bhakti,6,Fee Related\n"
    ).encode("utf-8")
    out = parse("bt.csv", csv)
    assert out["unrecognised_columns"] == []
    rows = out["rows"]
    assert len(rows) == 4
    assert rows[0]["billingType"] == "client related"
    assert rows[0]["billable"] is True
    assert rows[1]["billingType"] == "leave hours"
    assert rows[1]["billable"] is False
    assert rows[2]["billingType"] == "idle time"
    assert rows[2]["billable"] is False
    assert rows[3]["billingType"] == "fee related"
    assert rows[3]["billable"] is True