Backend (33/33 tests, +5 new): - Split Zoho parser's canonical "billable" into "billable" (bool column) and "billingType" (string column with values like "Client Related" / "Leave Hours" / "Idle Time"). Each parsed row now carries both, and billable is cross-filled from billingType when only the latter is present. - Merge service computes leaveHours separately from non_billable_h: any row with billingType "leave hours"/"leave" lands in the leave bucket and is no longer double-counted as non-billable. - UtilisationSummaryRow gains leaveHours: float; TimelogRow gains billingType: str | None. - /api/airtable/bookings accepts ?department=&name= (comma-separated multi-value), folded into the filterByFormula alongside the date overlap. Apostrophes in names are escaped. Cache key now includes the filter values so different selections don't collide. - /api/airtable/meta computes departments + employmentTypes from a live fetch_resources call (sorted distinct), falls back to the hardcoded lists on any exception. billingTypes/bookingStatuses stay static. - Logout cookie now mirrors the login cookie's HttpOnly / Secure / SameSite / Path attributes with max_age=0 and empty value, for consistency. Frontend (typecheck/lint/build clean): - types.ts: UtilisationSummaryRow.leaveHours: number. - BillabilityBreakdown uses r.leaveHours directly; idle becomes max(0, available - billable - nonBillable - leave). Capped to top 20 employees by (available + billable) with "Other (N)" rollup; Legend replaced with compact inline swatches. - BookingVsActual and FTEvsFreelancer: same top-20 + Other treatment to prevent the ProjectLoad-style x-axis explosion at scale. - Defensive sweep on WeeklyUtilisation, MonthlyUtilisation, BookingVsActual, FTEvsFreelancer: null-coerce sort keys, Number()- guard arithmetic, skip rows with no usable period/employee. - getBookings signature gains department + name; Resourcing passes them through. Client-side visibleBookings filter retained as belt-and-braces since linked-lookup filterByFormula on Airtable can be flaky. - Tutorial steps.ts restructured to cover the new chart and CSV export tags; existing TutorialOverlay defensive selector check preserved. - ErrorBoundary: removed dead eslint-disable directive flagged by --report-unused-disable-directives. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
237 lines
7.8 KiB
Python
237 lines
7.8 KiB
Python
"""Zoho timelog parser.
|
|
|
|
Decisions:
|
|
- Header matching is case-insensitive and trim-stripped. Unknown headers
|
|
are surfaced in `unrecognised_columns` so the operator notices when
|
|
Zoho silently renames a column.
|
|
- Billable detection: we keep TWO canonical fields. `billable` accepts
|
|
literal "Billable" / "Is Billable" columns (boolean-ish). `billingType`
|
|
accepts a "Billing Type" column whose values look like
|
|
"Client Related" / "Fee Related" / "Idle Time" / "Leave Hours".
|
|
When only one of the two is present we cross-fill the other: a
|
|
billingType of client/fee implies billable=True; leave implies False.
|
|
- Date parsing tries ISO first, then dateutil for the messy formats Zoho
|
|
occasionally emits ("01/05/2026", "1-May-2026", etc.).
|
|
- For .xlsx we use openpyxl read-only mode — keeps memory low on big files.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import csv
|
|
import hashlib
|
|
import io
|
|
import logging
|
|
from datetime import date, datetime
|
|
from typing import Any, Iterable
|
|
|
|
from dateutil import parser as dateparser
|
|
from openpyxl import load_workbook
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# Canonical name → set of accepted aliases (compared after .strip().lower()).
|
|
HEADER_ALIASES: dict[str, set[str]] = {
|
|
"date": {"date", "log date", "time log start", "start date"},
|
|
"employee": {"resource name", "resource", "employee", "user", "name"},
|
|
"project": {"project title", "project name", "project"},
|
|
"task": {"task description", "task", "description"},
|
|
"hours": {"hours logged", "total hours", "hours", "time logged", "actual logged"},
|
|
"billable": {"billable", "is billable"},
|
|
"billingType": {"billing type"},
|
|
}
|
|
|
|
# Generic truthy strings for a literal "Billable" column.
|
|
BILLABLE_TRUE_VALUES = {"true", "yes", "1", "billable"}
|
|
|
|
# Billing-type values (lower-cased) that imply billable=True.
|
|
BILLING_TYPE_BILLABLE = {"client related", "fee related"}
|
|
|
|
# Billing-type values that imply billable=False (and are leave-coded).
|
|
BILLING_TYPE_LEAVE = {"leave hours", "leave"}
|
|
|
|
|
|
def _canonicalise_header(raw: str) -> str | None:
|
|
if raw is None:
|
|
return None
|
|
key = str(raw).strip().lower()
|
|
if not key:
|
|
return None
|
|
for canonical, aliases in HEADER_ALIASES.items():
|
|
if key in aliases:
|
|
return canonical
|
|
return None
|
|
|
|
|
|
def _parse_date(v: Any) -> date | None:
|
|
if v is None or v == "":
|
|
return None
|
|
if isinstance(v, date) and not isinstance(v, datetime):
|
|
return v
|
|
if isinstance(v, datetime):
|
|
return v.date()
|
|
try:
|
|
# ISO short-circuit
|
|
return date.fromisoformat(str(v)[:10])
|
|
except ValueError:
|
|
pass
|
|
try:
|
|
# dayfirst=True because Zoho regional defaults are commonly DD/MM.
|
|
return dateparser.parse(str(v), dayfirst=True).date()
|
|
except (ValueError, TypeError, OverflowError):
|
|
return None
|
|
|
|
|
|
def _parse_hours(v: Any) -> float:
|
|
if v is None or v == "":
|
|
return 0.0
|
|
if isinstance(v, (int, float)):
|
|
return float(v)
|
|
s = str(v).strip()
|
|
# Zoho sometimes outputs "7:30" (HH:MM). Convert.
|
|
if ":" in s and all(p.isdigit() for p in s.split(":") if p):
|
|
parts = s.split(":")
|
|
try:
|
|
h = int(parts[0])
|
|
m = int(parts[1]) if len(parts) > 1 else 0
|
|
return h + m / 60.0
|
|
except ValueError:
|
|
pass
|
|
try:
|
|
return float(s.replace(",", ""))
|
|
except ValueError:
|
|
return 0.0
|
|
|
|
|
|
def _parse_billable(v: Any) -> bool:
|
|
"""Parse a literal Billable / Is Billable column value."""
|
|
if v is None:
|
|
return False
|
|
if isinstance(v, bool):
|
|
return v
|
|
if isinstance(v, (int, float)):
|
|
return bool(v)
|
|
s = str(v).strip().lower()
|
|
if not s:
|
|
return False
|
|
return s in BILLABLE_TRUE_VALUES
|
|
|
|
|
|
def _parse_billing_type(v: Any) -> str | None:
|
|
"""Parse a Billing Type column value to a lowercase canonical string."""
|
|
if v is None:
|
|
return None
|
|
s = str(v).strip().lower()
|
|
return s or None
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Public API
|
|
# ----------------------------------------------------------------------
|
|
|
|
def parse(filename: str, content: bytes) -> dict[str, Any]:
|
|
"""Parse uploaded file. Returns dict with rows, unrecognised_columns, content_hash."""
|
|
fn = (filename or "").lower()
|
|
if fn.endswith(".xlsx") or fn.endswith(".xlsm"):
|
|
rows, unknown = _parse_xlsx(content)
|
|
elif fn.endswith(".csv") or fn.endswith(".txt"):
|
|
rows, unknown = _parse_csv(content)
|
|
else:
|
|
# Best-effort sniff: try CSV first, fall back to xlsx.
|
|
try:
|
|
rows, unknown = _parse_csv(content)
|
|
except Exception:
|
|
rows, unknown = _parse_xlsx(content)
|
|
|
|
digest = hashlib.sha256(content).hexdigest()
|
|
return {
|
|
"rows": rows,
|
|
"unrecognised_columns": unknown,
|
|
"content_hash": f"sha256:{digest}",
|
|
}
|
|
|
|
|
|
def _build_rows(raw_rows: Iterable[list[Any]], headers: list[Any]) -> tuple[list[dict[str, Any]], list[str]]:
|
|
# Map column index → canonical key. Track unknown ones.
|
|
canonical_by_idx: dict[int, str] = {}
|
|
unrecognised: list[str] = []
|
|
for idx, raw in enumerate(headers):
|
|
if raw is None or str(raw).strip() == "":
|
|
continue
|
|
canon = _canonicalise_header(raw)
|
|
if canon:
|
|
canonical_by_idx[idx] = canon
|
|
else:
|
|
unrecognised.append(str(raw).strip())
|
|
|
|
# Track whether each canonical was actually present in the headers
|
|
# so we can decide whether to cross-fill billable from billingType
|
|
# (or vice versa) without clobbering a user-supplied value.
|
|
present_canonicals = set(canonical_by_idx.values())
|
|
|
|
out: list[dict[str, Any]] = []
|
|
for raw_row in raw_rows:
|
|
if not raw_row or all(c in (None, "") for c in raw_row):
|
|
continue
|
|
row: dict[str, Any] = {
|
|
"date": None,
|
|
"employee": None,
|
|
"project": None,
|
|
"task": None,
|
|
"hours": 0.0,
|
|
"billable": False,
|
|
"billingType": None,
|
|
}
|
|
for idx, canon in canonical_by_idx.items():
|
|
if idx >= len(raw_row):
|
|
continue
|
|
v = raw_row[idx]
|
|
if canon == "date":
|
|
row["date"] = _parse_date(v)
|
|
elif canon == "hours":
|
|
row["hours"] = _parse_hours(v)
|
|
elif canon == "billable":
|
|
row["billable"] = _parse_billable(v)
|
|
elif canon == "billingType":
|
|
row["billingType"] = _parse_billing_type(v)
|
|
else:
|
|
row[canon] = (str(v).strip() if v is not None else None) or None
|
|
|
|
# Cross-fill: when only billingType is present, derive billable.
|
|
# When only billable is present, billingType stays None.
|
|
bt = row.get("billingType")
|
|
if "billingType" in present_canonicals and bt is not None:
|
|
if bt in BILLING_TYPE_BILLABLE:
|
|
row["billable"] = True
|
|
elif bt in BILLING_TYPE_LEAVE:
|
|
row["billable"] = False
|
|
|
|
out.append(row)
|
|
return out, unrecognised
|
|
|
|
|
|
def _parse_csv(content: bytes) -> tuple[list[dict[str, Any]], list[str]]:
|
|
# Decode permissively; Zoho exports are usually utf-8 or utf-8-sig.
|
|
text = content.decode("utf-8-sig", errors="replace")
|
|
reader = csv.reader(io.StringIO(text))
|
|
rows = list(reader)
|
|
if not rows:
|
|
return [], []
|
|
headers = rows[0]
|
|
data = rows[1:]
|
|
return _build_rows(data, headers)
|
|
|
|
|
|
def _parse_xlsx(content: bytes) -> tuple[list[dict[str, Any]], list[str]]:
|
|
wb = load_workbook(io.BytesIO(content), read_only=True, data_only=True)
|
|
ws = wb.active
|
|
if ws is None:
|
|
return [], []
|
|
rows_iter = ws.iter_rows(values_only=True)
|
|
try:
|
|
headers = list(next(rows_iter))
|
|
except StopIteration:
|
|
return [], []
|
|
data = (list(r) for r in rows_iter)
|
|
return _build_rows(data, headers)
|