gmal-scope-builder/backend/app/services/export_excel.py

"""Export ratecard data to Excel."""

import io
import logging
from collections import defaultdict

from openpyxl import Workbook
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.utils import get_column_letter
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload

from app.models.gmal import GmalAsset, Role
from app.models.project import Project, ClientAsset, Match, RatecardLine
from app.services.team_shape import calculate_team_shape

logger = logging.getLogger(__name__)

HEADER_FILL = PatternFill(start_color="1F4E79", end_color="1F4E79", fill_type="solid")
HEADER_FONT = Font(color="FFFFFF", bold=True, size=11)
DISCIPLINE_FILL = PatternFill(start_color="D6E4F0", end_color="D6E4F0", fill_type="solid")
THIN_BORDER = Border(
    left=Side(style="thin"),
    right=Side(style="thin"),
    top=Side(style="thin"),
    bottom=Side(style="thin"),
)


async def export_ratecard_excel(db: AsyncSession, project: Project, efficiency_levels: list[int] | None = None) -> bytes:
    """Generate an Excel workbook with the ratecard data.

    If efficiency_levels is provided (e.g. [10, 25, 50]), generates additional
    Team Shape tabs showing adjusted FTE at each efficiency level.

    Returns the workbook as bytes.
    """
    wb = Workbook()

    # Load all data
    lines_result = await db.execute(
        select(RatecardLine).where(RatecardLine.project_id == project.id)
    )
    lines = lines_result.scalars().all()

    if not lines:
        ws = wb.active
        ws.title = "Ratecard"
        ws["A1"] = "No ratecard data available"
        return _workbook_to_bytes(wb)

    # Load related entities
    role_ids = list(set(l.role_id for l in lines))
    asset_ids = list(set(l.client_asset_id for l in lines))
    gmal_ids = list(set(l.gmal_asset_id for l in lines))

    roles_result = await db.execute(select(Role).where(Role.id.in_(role_ids)))
    roles = {r.id: r for r in roles_result.scalars().all()}

    assets_result = await db.execute(select(ClientAsset).where(ClientAsset.id.in_(asset_ids)))
    client_assets = {a.id: a for a in assets_result.scalars().all()}

    gmals_result = await db.execute(select(GmalAsset).where(GmalAsset.id.in_(gmal_ids)))
    gmals = {g.id: g for g in gmals_result.scalars().all()}

    # Load selected matches for caveat lookup
    matches_result = await db.execute(
        select(Match).where(
            Match.client_asset_id.in_(asset_ids),
            Match.is_selected == True,
        )
    )
    selected_matches = matches_result.scalars().all()

    caveat_by_asset = {}
    for m in selected_matches:
        parts = []
        if m.caveat_text:
            parts.append(m.caveat_text)
        gmal = gmals.get(m.gmal_asset_id)
        if gmal and gmal.caveats:
            parts.append(f"GMAL Standard Caveats: {gmal.caveats}")
        caveat_by_asset[m.client_asset_id] = "\n\n".join(parts)

    # Sheet 1: Ratecard Summary (roles x assets matrix)
    ws1 = wb.active
    ws1.title = "Ratecard Summary"
    _build_ratecard_sheet(ws1, lines, roles, client_assets, gmals, caveat_by_asset)

    # Sheet 2: Asset Detail
    ws2 = wb.create_sheet("Asset Detail")
    await _build_asset_detail_sheet(ws2, db, project, client_assets, gmals)

    # Sheet 3: Assumptions & Rates
    ws_rates = wb.create_sheet("Assumptions & Rates")
    _build_assumptions_sheet(ws_rates, roles, lines)

    # Sheet 4: Team Shape (base)
    ws3 = wb.create_sheet("Team Shape")
    await _build_team_shape_sheet(ws3, db, project, efficiency_pct=0)

    # Additional sheets for each efficiency level
    if efficiency_levels:
        for level in efficiency_levels:
            ws_eff = wb.create_sheet(f"Team - {level}% AI Efficiency")
            await _build_team_shape_sheet(ws_eff, db, project, efficiency_pct=level)

    return _workbook_to_bytes(wb)


def _build_ratecard_sheet(ws, lines, roles, client_assets, gmals, caveats: dict | None = None):
    """Build the main ratecard matrix: rows=roles, cols=client assets."""
    if caveats is None:
        caveats = {}
    # Get unique sorted client assets and roles
    asset_ids_ordered = sorted(client_assets.keys())
    role_ids_ordered = sorted(roles.keys(), key=lambda rid: (roles[rid].discipline, roles[rid].sort_order or 0))

    # Build hours lookup: {(role_id, client_asset_id): total_hours}
    hours_map = {}
    for line in lines:
        effective_hours = line.manual_override if line.manual_override is not None else line.total_hours
        hours_map[(line.role_id, line.client_asset_id)] = float(effective_hours or 0)

    # Headers
    ws.cell(row=1, column=1, value="Discipline").font = HEADER_FONT
    ws.cell(row=1, column=1).fill = HEADER_FILL
    ws.cell(row=1, column=2, value="Role").font = HEADER_FONT
    ws.cell(row=1, column=2).fill = HEADER_FILL

    for col_idx, asset_id in enumerate(asset_ids_ordered, 3):
        ca = client_assets[asset_id]
        gmal_id = None
        for line in lines:
            if line.client_asset_id == asset_id:
                g = gmals.get(line.gmal_asset_id)
                gmal_id = g.gmal_id if g else None
                break

        header = f"{ca.raw_name}\n(Vol: {ca.volume})"
        if gmal_id:
            header += f"\n[{gmal_id}]"

        cell = ws.cell(row=1, column=col_idx, value=header)
        cell.font = HEADER_FONT
        cell.fill = HEADER_FILL
        cell.alignment = Alignment(wrap_text=True, horizontal="center")

    # Total column
    total_col = len(asset_ids_ordered) + 3
    ws.cell(row=1, column=total_col, value="Total Hours").font = HEADER_FONT
    ws.cell(row=1, column=total_col).fill = HEADER_FILL

    # Caveats row (row 2)
    CAVEAT_FONT = Font(italic=True, size=9, color="555555")
    CAVEAT_FILL = PatternFill(start_color="FFFBF0", end_color="FFFBF0", fill_type="solid")
    ws.cell(row=2, column=1, value="").fill = CAVEAT_FILL
    ws.cell(row=2, column=2, value="Assumptions / Caveats").font = Font(italic=True, bold=True, size=9, color="92400E")
    ws.cell(row=2, column=2).fill = CAVEAT_FILL
    for col_idx, asset_id in enumerate(asset_ids_ordered, 3):
        caveat = caveats.get(asset_id, "")
        cell = ws.cell(row=2, column=col_idx, value=caveat)
        cell.font = CAVEAT_FONT
        cell.fill = CAVEAT_FILL
        cell.alignment = Alignment(wrap_text=True, vertical="top")
    ws.cell(row=2, column=total_col).fill = CAVEAT_FILL
    ws.row_dimensions[2].height = 60

    # Data rows
    current_discipline = None
    row_idx = 3

    for role_id in role_ids_ordered:
        role = roles[role_id]

        # Check if this role has any hours at all
        role_total = sum(hours_map.get((role_id, aid), 0) for aid in asset_ids_ordered)
        if role_total == 0:
            continue

        # Discipline grouping
        if role.discipline != current_discipline:
            current_discipline = role.discipline
            ws.cell(row=row_idx, column=1, value=current_discipline).font = Font(bold=True)
            ws.cell(row=row_idx, column=1).fill = DISCIPLINE_FILL
            for c in range(1, total_col + 1):
                ws.cell(row=row_idx, column=c).fill = DISCIPLINE_FILL
            row_idx += 1

        ws.cell(row=row_idx, column=1, value=role.discipline)
        ws.cell(row=row_idx, column=2, value=role.role_title)

        for col_idx, asset_id in enumerate(asset_ids_ordered, 3):
            hours = hours_map.get((role_id, asset_id), 0)
            if hours > 0:
                ws.cell(row=row_idx, column=col_idx, value=round(hours, 2))

        # Total Hours = SUM formula across asset columns
        first_col = get_column_letter(3)
        last_col = get_column_letter(total_col - 1)
        total_cell = ws.cell(row=row_idx, column=total_col)
        total_cell.value = f"=SUM({first_col}{row_idx}:{last_col}{row_idx})"
        total_cell.font = Font(bold=True)
        total_cell.number_format = '#,##0.00'
        row_idx += 1

    # Grand total row with SUM formulas down each column
    data_start_row = 3  # first data row (after headers + caveats)
    row_idx += 1
    ws.cell(row=row_idx, column=1, value="TOTAL").font = Font(bold=True, size=12)
    for col_idx in range(3, total_col + 1):
        col_letter = get_column_letter(col_idx)
        cell = ws.cell(row=row_idx, column=col_idx)
        cell.value = f"=SUM({col_letter}{data_start_row}:{col_letter}{row_idx - 1})"
        cell.font = Font(bold=True, size=12 if col_idx == total_col else 11)
        cell.number_format = '#,##0.00'

    # Column widths
    ws.column_dimensions["A"].width = 25
    ws.column_dimensions["B"].width = 35
    for col_idx in range(3, total_col + 1):
        ws.column_dimensions[get_column_letter(col_idx)].width = 18


async def _build_asset_detail_sheet(ws, db, project, client_assets, gmals):
    """Build the asset detail sheet showing matches and caveats."""
    headers = ["Client Asset", "Volume", "Matched GMAL", "GMAL Name", "Confidence", "Score", "Match Caveats", "GMAL Standard Caveats"]
    for col_idx, header in enumerate(headers, 1):
        cell = ws.cell(row=1, column=col_idx, value=header)
        cell.font = HEADER_FONT
        cell.fill = HEADER_FILL

    # Load matches
    from app.models.project import Match
    matches_result = await db.execute(
        select(Match).where(
            Match.client_asset_id.in_(list(client_assets.keys())),
            Match.is_selected == True,
        )
    )
    matches = matches_result.scalars().all()
    match_by_asset = {m.client_asset_id: m for m in matches}

    row_idx = 2
    for asset_id in sorted(client_assets.keys()):
        ca = client_assets[asset_id]
        match = match_by_asset.get(asset_id)

        ws.cell(row=row_idx, column=1, value=ca.raw_name)
        ws.cell(row=row_idx, column=2, value=ca.volume)

        if match:
            gmal = gmals.get(match.gmal_asset_id)
            ws.cell(row=row_idx, column=3, value=gmal.gmal_id if gmal else "")
            ws.cell(row=row_idx, column=4, value=gmal.unique_name if gmal else "")
            ws.cell(row=row_idx, column=5, value=match.confidence.value)
            ws.cell(row=row_idx, column=6, value=float(match.confidence_score) if match.confidence_score else 0)
            ws.cell(row=row_idx, column=7, value=match.caveat_text or "")
            ws.cell(row=row_idx, column=7).alignment = Alignment(wrap_text=True, vertical="top")
            gmal_caveats = (gmal.caveats or "") if gmal else ""
            ws.cell(row=row_idx, column=8, value=gmal_caveats)
            ws.cell(row=row_idx, column=8).alignment = Alignment(wrap_text=True, vertical="top")
        else:
            ws.cell(row=row_idx, column=3, value="No match")

        row_idx += 1

    # Column widths
    widths = [30, 10, 15, 40, 12, 10, 60, 60]
    for i, w in enumerate(widths, 1):
        ws.column_dimensions[get_column_letter(i)].width = w


ASSUMPTIONS_FILL = PatternFill(start_color="4A148C", end_color="4A148C", fill_type="solid")
INPUT_FILL = PatternFill(start_color="FFF9C4", end_color="FFF9C4", fill_type="solid")


def _build_assumptions_sheet(ws, roles, lines):
    """Build an editable Assumptions & Rates sheet that other sheets can reference."""
    ws.cell(row=1, column=1, value="Assumptions & Rates").font = Font(bold=True, size=14)
    ws.cell(row=2, column=1, value="Edit the yellow cells to adjust the financial model").font = Font(italic=True, color="666666")

    # Global assumptions
    ws.cell(row=4, column=1, value="GLOBAL ASSUMPTIONS").font = Font(bold=True, size=12)
    assumptions = [
        ("Hours per FTE per year", 1800),
        ("Margin %", 15),
        ("Overhead %", 10),
    ]
    for i, (label, value) in enumerate(assumptions):
        row = 5 + i
        ws.cell(row=row, column=1, value=label).font = Font(bold=True)
        cell = ws.cell(row=row, column=2, value=value)
        cell.fill = INPUT_FILL
        cell.number_format = '#,##0' if isinstance(value, int) else '0.0%'

    # Role rates table
    ws.cell(row=10, column=1, value="ROLE RATES").font = Font(bold=True, size=12)
    headers = ["Discipline", "Role", "Day Rate (£)", "Annual Salary (£)"]
    for col_idx, header in enumerate(headers, 1):
        cell = ws.cell(row=11, column=col_idx, value=header)
        cell.font = HEADER_FONT
        cell.fill = ASSUMPTIONS_FILL

    # Get unique roles that appear in the ratecard
    role_ids_used = set(l.role_id for l in lines)
    role_list = sorted(
        [r for r in roles.values() if r.id in role_ids_used],
        key=lambda r: (r.discipline, r.sort_order or 0)
    )

    row_idx = 12
    for role in role_list:
        ws.cell(row=row_idx, column=1, value=role.discipline)
        ws.cell(row=row_idx, column=2, value=role.role_title)
        # Day rate - editable (yellow)
        day_rate_cell = ws.cell(row=row_idx, column=3, value=0)
        day_rate_cell.fill = INPUT_FILL
        day_rate_cell.number_format = '£#,##0'
        # Annual salary - editable (yellow)
        salary_cell = ws.cell(row=row_idx, column=4, value=0)
        salary_cell.fill = INPUT_FILL
        salary_cell.number_format = '£#,##0'
        row_idx += 1

    ws.cell(row=row_idx + 1, column=1, value="Fill in day rates and annual salaries above.").font = Font(italic=True, color="666666")
    ws.cell(row=row_idx + 2, column=1, value="These values will be used in future formula-linked exports.").font = Font(italic=True, color="666666")

    ws.column_dimensions["A"].width = 30
    ws.column_dimensions["B"].width = 40
    ws.column_dimensions["C"].width = 18
    ws.column_dimensions["D"].width = 20


TEAM_HEADER_FILL = PatternFill(start_color="2E7D32", end_color="2E7D32", fill_type="solid")
PROGRAMME_FILL = PatternFill(start_color="FFF3E0", end_color="FFF3E0", fill_type="solid")
FTE_FONT = Font(bold=True, size=11, color="1B5E20")


async def _build_team_shape_sheet(ws, db, project, efficiency_pct: float = 0):
    """Build the team shape sheet: FTE per role from ratecard hours / 1800.

    If efficiency_pct > 0, shows original + adjusted columns.
    """
    import math

    team = await calculate_team_shape(db, project, efficiency_pct=efficiency_pct)

    if not team:
        ws["A1"] = "No ratecard data - build ratecard first"
        return

    has_efficiency = efficiency_pct > 0
    total_hours = sum(t["total_hours"] for t in team)
    total_fte = sum(t["fte"] for t in team)
    adjusted_hours = sum(t["adjusted_hours"] for t in team)
    adjusted_fte = sum(t["adjusted_fte"] for t in team)

    # Title
    title = f"Team Shape - {project.name}"
    if has_efficiency:
        title += f" ({efficiency_pct}% AI/Automation Efficiency)"
    ws.merge_cells("A1:I1")
    ws.cell(row=1, column=1, value=title).font = Font(bold=True, size=14)

    subtitle = f"Based on {total_hours:,.0f} total hours / 1,800 hours per FTE = {total_fte:.2f} FTE"
    if has_efficiency:
        subtitle += f" → {adjusted_hours:,.0f} adjusted hours = {adjusted_fte:.2f} FTE ({efficiency_pct}% efficiency on delivery roles)"
    ws.cell(row=2, column=1, value=subtitle).font = Font(italic=True, color="666666")

    # Headers
    if has_efficiency:
        headers = ["Discipline", "Role", "Type", "Original Hours", "Original FTE",
                    "Adjusted Hours", "Adjusted FTE", "Hours Saved", "Headcount"]
    else:
        headers = ["Discipline", "Role", "Type", "Total Hours", "FTE", "Headcount"]

    num_cols = len(headers)
    for col_idx, header in enumerate(headers, 1):
        cell = ws.cell(row=4, column=col_idx, value=header)
        cell.font = HEADER_FONT
        cell.fill = TEAM_HEADER_FILL

    # Data rows
    row_idx = 5
    current_discipline = None
    SAVED_FONT = Font(color="2E7D32", italic=True)

    for t in team:
        if t["discipline"] != current_discipline:
            current_discipline = t["discipline"]
            ws.cell(row=row_idx, column=1, value=current_discipline).font = Font(bold=True)
            for c in range(1, num_cols + 1):
                ws.cell(row=row_idx, column=c).fill = DISCIPLINE_FILL
            row_idx += 1

        role_type = "Programme" if t["is_programme_role"] else "Delivery"
        ws.cell(row=row_idx, column=1, value=t["discipline"])
        ws.cell(row=row_idx, column=2, value=t["role_title"])
        ws.cell(row=row_idx, column=3, value=role_type)

        if has_efficiency:
            ws.cell(row=row_idx, column=4, value=t["total_hours"]).number_format = '#,##0.00'
            ws.cell(row=row_idx, column=5, value=t["fte"]).number_format = '0.00'
            ws.cell(row=row_idx, column=6, value=t["adjusted_hours"]).number_format = '#,##0.00'
            adj_cell = ws.cell(row=row_idx, column=7, value=t["adjusted_fte"])
            adj_cell.number_format = '0.00'
            if t["adjusted_fte"] >= 1:
                adj_cell.font = FTE_FONT
            saved = ws.cell(row=row_idx, column=8, value=t["hours_saved"])
            saved.number_format = '#,##0.00'
            if t["hours_saved"] > 0:
                saved.font = SAVED_FONT
            hc = t["adjusted_fte"]
            headcount = math.ceil(hc) if hc >= 0.5 else (0.5 if hc > 0 else 0)
            ws.cell(row=row_idx, column=9, value=headcount).number_format = '0.0'
        else:
            ws.cell(row=row_idx, column=4, value=t["total_hours"]).number_format = '#,##0.00'
            fte_cell = ws.cell(row=row_idx, column=5, value=t["fte"])
            fte_cell.number_format = '0.00'
            if t["fte"] >= 1:
                fte_cell.font = FTE_FONT
            hc = t["fte"]
            headcount = math.ceil(hc) if hc >= 0.5 else (0.5 if hc > 0 else 0)
            ws.cell(row=row_idx, column=6, value=headcount).number_format = '0.0'

        if t["is_programme_role"]:
            for c in range(1, num_cols + 1):
                ws.cell(row=row_idx, column=c).fill = PROGRAMME_FILL

        row_idx += 1

    # Summary
    row_idx += 1
    ws.cell(row=row_idx, column=1, value="SUMMARY").font = Font(bold=True, size=12)
    row_idx += 1

    delivery_hours = sum(t["total_hours"] for t in team if not t["is_programme_role"])
    delivery_fte = sum(t["fte"] for t in team if not t["is_programme_role"])
    prog_hours = sum(t["total_hours"] for t in team if t["is_programme_role"])
    prog_fte = sum(t["fte"] for t in team if t["is_programme_role"])

    if has_efficiency:
        adj_del_hours = sum(t["adjusted_hours"] for t in team if not t["is_programme_role"])
        adj_del_fte = sum(t["adjusted_fte"] for t in team if not t["is_programme_role"])

        ws.cell(row=row_idx, column=3, value="Orig Hours").font = Font(bold=True)
        ws.cell(row=row_idx, column=4, value="Orig FTE").font = Font(bold=True)
        ws.cell(row=row_idx, column=5, value="Adj Hours").font = Font(bold=True)
        ws.cell(row=row_idx, column=6, value="Adj FTE").font = Font(bold=True)
        ws.cell(row=row_idx, column=7, value="Saved").font = Font(bold=True)
        row_idx += 1

        summary_rows = [
            ("Delivery Roles", delivery_hours, delivery_fte, adj_del_hours, adj_del_fte),
            ("Programme Roles (unchanged)", prog_hours, prog_fte, prog_hours, prog_fte),
            ("TOTAL", total_hours, total_fte, adjusted_hours, adjusted_fte),
        ]
        for label, hrs, fte, adj_hrs, adj_fte in summary_rows:
            is_total = label == "TOTAL"
            ws.cell(row=row_idx, column=2, value=label).font = Font(bold=is_total)
            ws.cell(row=row_idx, column=3, value=round(hrs, 2)).number_format = '#,##0.00'
            ws.cell(row=row_idx, column=4, value=round(fte, 2)).number_format = '0.00'
            ws.cell(row=row_idx, column=5, value=round(adj_hrs, 2)).number_format = '#,##0.00'
            adj_cell = ws.cell(row=row_idx, column=6, value=round(adj_fte, 2))
            adj_cell.number_format = '0.00'
            if is_total:
                adj_cell.font = Font(bold=True, size=12)
            saved = ws.cell(row=row_idx, column=7, value=round(hrs - adj_hrs, 2))
            saved.number_format = '#,##0.00'
            if hrs - adj_hrs > 0:
                saved.font = Font(color="2E7D32", italic=True)
            row_idx += 1
    else:
        summary_data = [
            ("Delivery Roles", delivery_hours, delivery_fte),
            ("Programme Roles", prog_hours, prog_fte),
            ("TOTAL", total_hours, total_fte),
        ]
        ws.cell(row=row_idx, column=3, value="Hours").font = Font(bold=True)
        ws.cell(row=row_idx, column=4, value="FTE").font = Font(bold=True)
        row_idx += 1

        for label, hours, fte in summary_data:
            ws.cell(row=row_idx, column=2, value=label).font = Font(bold=(label == "TOTAL"))
            ws.cell(row=row_idx, column=3, value=round(hours, 2)).number_format = '#,##0.00'
            fte_cell = ws.cell(row=row_idx, column=4, value=round(fte, 2))
            fte_cell.number_format = '0.00'
            if label == "TOTAL":
                fte_cell.font = Font(bold=True, size=12)
            row_idx += 1

    # Column widths
    ws.column_dimensions["A"].width = 25
    ws.column_dimensions["B"].width = 40
    ws.column_dimensions["C"].width = 12
    ws.column_dimensions["D"].width = 15
    ws.column_dimensions["E"].width = 15
    ws.column_dimensions["F"].width = 15
    ws.column_dimensions["G"].width = 15
    ws.column_dimensions["H"].width = 15
    ws.column_dimensions["I"].width = 15


def _workbook_to_bytes(wb: Workbook) -> bytes:
    buf = io.BytesIO()
    wb.save(buf)
    buf.seek(0)
    return buf.read()