hp-studios-ai-content-agent/backend/tests/test_renderer_schema_alignment.py

"""
Schema-renderer alignment tests.

For each deliverable type:
  1. Build a pydantic-VALID payload using model_validate (with all required fields
     satisfying schema constraints such as exact list lengths).
  2. Serialise to dict with model.model_dump().
  3. Pass the dict to render_to_bytes.
  4. Assert no exception is raised and valid .docx bytes are returned.

This is the key contract: a pydantic-valid payload MUST NOT raise in the renderer.
"""
from __future__ import annotations

import sys
import os

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

import pytest

from app.hp_branding.render import render_to_bytes
from app.schemas import (
    LeadershipThemes,
    RegionalEnrichment,
    LinkedInPosts,
    WebinarSpec,
    InfographicSpecs,
    ABMEnablement,
)


# ---------------------------------------------------------------------------
# Helper: assert render succeeds and returns valid .docx bytes
# ---------------------------------------------------------------------------

def _assert_renders(deliverable_type: str, payload: dict) -> None:
    result = render_to_bytes(deliverable_type, payload)
    assert isinstance(result, bytes), f'{deliverable_type}: result is not bytes'
    assert result[:2] == b'PK', f'{deliverable_type}: not a valid docx (missing PK header)'
    assert len(result) > 2048, f'{deliverable_type}: result suspiciously small ({len(result)} bytes)'


# ---------------------------------------------------------------------------
# LeadershipThemes — requires exactly 4 tracks, each with exactly 4 pillars
#                    and at least 5 content_outputs; cross_track.tone_guidelines >= 4
# ---------------------------------------------------------------------------

def _make_messaging_pillars():
    return [
        {'name': f'Pillar {i}', 'message': f'Message for pillar {i}.'}
        for i in range(1, 5)
    ]


def _make_content_outputs():
    types = ['anchor_article', 'data_brief', 'linkedin', 'webinar', 'visual_concept']
    return [
        {'type': t, 'title': f'{t.title()} title', 'description': f'Description for {t}.'}
        for t in types
    ]


def _make_track(number: int):
    return {
        'number': number,
        'theme': f'Track {number} Theme',
        'narrative': f'Strategic narrative for track {number}.',
        'messaging_pillars': _make_messaging_pillars(),
        'data_points': [f'Data point A for track {number}', f'Data point B for track {number}'],
        'audience_hook': f'Audience hook paragraph for track {number}.',
        'content_outputs': _make_content_outputs(),
        'dos': [f'Do this for track {number}', f'Do that for track {number}'],
        'donts': [f"Don't do this for track {number}", f"Don't do that for track {number}"],
    }


def _make_proof_point_matrix():
    return [
        {'claim': 'Claim 1', 'evidence': 'Evidence 1', 'source': 'Source Report 2024'},
        {'claim': 'Claim 2', 'evidence': 'Evidence 2', 'source': 'Source Report 2024'},
    ]


def _make_cadence():
    return [
        {'week': 1, 'focus': 'Track 1 launch', 'outputs': ['Anchor article', 'LinkedIn post #1']},
        {'week': 2, 'focus': 'Track 2 launch', 'outputs': ['Data brief', 'LinkedIn post #2']},
    ]


def _make_tone_guidelines():
    return [
        'Confident, not arrogant.',
        'Evidence-led.',
        'Local-first.',
        'Empathetic.',
    ]


def test_leadership_themes_schema_renders():
    data = {
        'title': 'HP — Four Thought-Leadership Content Tracks',
        'subtitle': 'Derived from IT\'s Moment to Lead',
        'tracks': [_make_track(i) for i in range(1, 5)],
        'cross_track': {
            'unified_positioning': 'HP empowers IT leaders to drive business growth through AI.',
            'proof_point_matrix': _make_proof_point_matrix(),
            'cadence': _make_cadence(),
            'tone_guidelines': _make_tone_guidelines(),
        },
    }
    model = LeadershipThemes.model_validate(data)
    _assert_renders('leadership_themes', model.model_dump())


# ---------------------------------------------------------------------------
# RegionalEnrichment — same track structure as LeadershipThemes but with
#   local_data_points, scenarios (>= 3), and enriched cross_track.
# ---------------------------------------------------------------------------

def _make_enriched_track(number: int):
    return {
        'number': number,
        'theme': f'Track {number} Theme',
        'narrative': f'Localised strategic narrative for track {number}.',
        'messaging_pillars': _make_messaging_pillars(),
        'data_points': [f'Global data point for track {number}'],
        'local_data_points': [f'Local data point for track {number} — Local Survey 2024'],
        'audience_hook': f'Localised audience hook for track {number}.',
        'scenarios': [
            {
                'title': f'Scenario {j} for track {number}',
                'body': f'Body text for scenario {j} in track {number}, set in City {j}.',
                'city': f'City {j}',
                'industry': f'Industry {j}',
            }
            for j in range(1, 4)  # exactly 3 scenarios (min_length=3)
        ],
        'content_outputs': _make_content_outputs(),
        'dos': [f'Localised do for track {number}', f'Another do for track {number}'],
        'donts': [f'Localised dont for track {number}', f'Another dont for track {number}'],
    }


def test_regional_enrichment_schema_renders():
    data = {
        'title': 'HP Poland — Enriched Leadership Themes',
        'subtitle': 'Local Market Edition',
        'region': 'Poland',
        'currency': 'PLN',
        'regulatory_frameworks': ['GDPR', 'NIS2 Directive', 'EU AI Act'],
        'market_context': {
            'narrative': 'The Polish AI market is growing steadily.',
            'data_tables': [
                {
                    'heading': 'AI Adoption Status',
                    'headers': ['Indicator', 'Data'],
                    'rows': [['Planning AI', '41%'], ['In production', '10%']],
                }
            ],
        },
        'tracks': [_make_enriched_track(i) for i in range(1, 5)],
        'sector_matrix': [
            {
                'sector': 'Banking',
                'relevance': 'High NIS2 exposure and AI readiness.',
                'priority_tracks': [1, 4],
            },
            {
                'sector': 'Energy',
                'relevance': 'Critical infrastructure, large fleets.',
                'priority_tracks': [3, 4],
            },
        ],
        'cross_track': {
            'unified_positioning': 'HP empowers Polish IT leaders.',
            'unifying_theme': 'From AI ambition to business results.',
            'proof_point_matrix': _make_proof_point_matrix(),
            'cadence': _make_cadence(),
            'tone_guidelines': _make_tone_guidelines(),
            'source_attribution': [
                {
                    'title': 'Polish AI Survey',
                    'description': 'HP Inc Polska & Computerworld, N=101, 2024',
                }
            ],
        },
    }
    model = RegionalEnrichment.model_validate(data)
    _assert_renders('regional_enrichment', model.model_dump())


# ---------------------------------------------------------------------------
# LinkedInPosts — exactly 5 posts (min/max=5), 5-7 hashtags per post,
#                 visual_pairing_metadata list (no length constraint but
#                 should have one per post for completeness).
# ---------------------------------------------------------------------------

def _make_post(number: int, track):
    return {
        'number': number,
        'title': f'Post {number} Title',
        'track': track,
        'audience': 'CIOs and IT Directors',
        'post_text': (
            f'Hook line for post {number} — stat-led and compelling.\n'
            f'Second line builds on the hook with supporting detail.'
        ),
        'cta': f'Download the report for post {number} [link]',
        'hashtags': [f'#Hashtag{i}' for i in range(1, 6)],  # exactly 5 hashtags
        'visual_hint': f'Visual hint for post {number}',
    }


def _make_visual_pairing(post_number: int):
    return {
        'post_number': post_number,
        'concept': f'Visual concept for post {post_number}',
        'colour_cue': 'HP Blue (#0096D6)',
        'image_brief': f'Designer brief for post {post_number}: clean, data-forward composition.',
    }


def test_linkedin_posts_schema_renders():
    tracks = [1, 4, 2, 3, 'cross-track']
    data = {
        'posts': [_make_post(i, tracks[i - 1]) for i in range(1, 6)],
        'visual_pairing_metadata': [_make_visual_pairing(i) for i in range(1, 6)],
    }
    model = LinkedInPosts.model_validate(data)
    _assert_renders('linkedin_posts', model.model_dump())


# ---------------------------------------------------------------------------
# WebinarSpec — 3-4 speakers, >= 2 narrative_arc phases,
#               >= 4 post_webinar_activation steps, duration 20-120 min.
# ---------------------------------------------------------------------------

def _make_speaker(idx: int):
    return {
        'role': f'Speaker Role {idx}',
        'name_placeholder': f'[Speaker {idx} Name]',
        'bio': f'Speaker {idx} brings relevant expertise.',
        'responsibilities': [f'Responsibility {idx}A', f'Responsibility {idx}B'],
    }


def test_webinar_spec_schema_renders():
    data = {
        'topic': 'From Pilots to Productivity: A Polish Enterprise Webinar',
        'duration_minutes': 40,
        'overview': {
            'format': 'Virtual panel webinar',
            'target_audience': 'CIOs and IT Directors at Polish enterprises',
            'registration_goal': '150+ registrants',
            'promotion_channels': ['LinkedIn', 'Email', 'Partner networks'],
            'follow_up_assets': ['Recording', 'Executive summary PDF'],
            'rationale': 'Polish AI market shows a clear gap between ambition and production.',
        },
        'agenda': [
            {
                'time_range': '0:00-0:02',
                'segment': 'Welcome',
                'description': 'Host opens the session.',
                'duration_minutes': 2,
            },
            {
                'time_range': '0:02-0:30',
                'segment': 'Panel Discussion',
                'description': 'Moderated discussion covering three acts.',
                'duration_minutes': 28,
            },
            {
                'time_range': '0:30-0:40',
                'segment': 'Q&A and Close',
                'description': 'Audience questions and session wrap.',
                'duration_minutes': 10,
            },
        ],
        'speakers': [_make_speaker(i) for i in range(1, 4)],  # exactly 3 speakers
        'narrative_arc': [
            {'phase': 'Act 1: Reality Check', 'description': 'Acknowledge the AI adoption gap.'},
            {'phase': 'Act 2: The Bridge', 'description': 'Share paths from pilot to production.'},
            {'phase': 'Act 3: Enablement Layer', 'description': 'Connect strategy to infrastructure.'},
        ],
        'post_webinar_activation': [
            {'when': 'Within 24 hours', 'action': 'Send thank-you email with recording link.', 'owner': 'Marketing Ops'},
            {'when': 'Within 48 hours', 'action': 'Sales team follows up with priority accounts.', 'owner': 'Sales'},
            {'when': 'Week 2', 'action': 'Publish key takeaways as LinkedIn post.', 'owner': 'Content team'},
            {'when': 'Week 3', 'action': 'Re-send to non-openers with new subject line.', 'owner': 'Marketing Ops'},
        ],
    }
    model = WebinarSpec.model_validate(data)
    _assert_renders('webinar_spec', model.model_dump())


# ---------------------------------------------------------------------------
# InfographicSpecs — exactly 3 infographics (min/max=3).
# ---------------------------------------------------------------------------

def _make_infographic(idx: int):
    return {
        'title': f'Infographic {idx} Title',
        'subtitle': f'Infographic {idx} Subtitle',
        'format_options': ['Vertical scroll (digital)', 'A3 (print)', 'LinkedIn carousel'],
        'colors': ['#0096D6', '#005A8C', '#FFFFFF'],
        'typography_notes': 'Montserrat Bold for headlines, Montserrat Regular for body.',
        'copy': {
            'headline': f'Headline for infographic {idx}.',
            'subhead': f'Subhead for infographic {idx}.',
            'supporting_text': f'Supporting text for infographic {idx}.',
        },
        'data_points': [
            {'stat': f'{10 * idx}%', 'label': f'Label for stat {idx}', 'source': 'Research 2024'},
        ],
        'cta': f'CTA line for infographic {idx}.',
        'source_attribution': ['HP Research 2024'],
    }


def test_infographic_specs_schema_renders():
    data = {
        'infographics': [_make_infographic(i) for i in range(1, 4)],  # exactly 3
    }
    model = InfographicSpecs.model_validate(data)
    _assert_renders('infographic_specs', model.model_dump())


# ---------------------------------------------------------------------------
# ABMEnablement — GenericTemplate: exactly 3 subject_lines;
#                 AccountTailored: 5-6 rationale rows, exactly 6 talking points.
# ---------------------------------------------------------------------------

def _make_talking_point(idx: int):
    return {
        'topic': f'Topic {idx}',
        'angle': f'HP angle for topic {idx}.',
        'supporting_data': f'Key data point {idx} supporting this topic.',
        'hp_capability': f'HP Capability {idx}',
    }


def test_abm_enablement_schema_renders():
    data = {
        'generic_template': {
            'subject_lines': [
                'Subject line option 1 — insight-led',
                'Subject line option 2 — data-led',
                'Subject line option 3 — question-led',
            ],  # exactly 3
            'email_body': (
                'Dear [First Name],\n\n'
                'I am reaching out because [Company Name] is at the centre of the AI shift '
                'in [industry].\n\n'
                'Best regards,\n[Name]\n[Title], HP Inc Polska\n\n'
                'P.S. See attached report.'
            ),
            'placeholders': ['[Company Name]', '[First Name]', '[industry]'],
            'follow_up_protocol': [
                'Day 3: Follow up with local data point.',
                'Day 7: Connect on LinkedIn.',
                'Day 14: Share relevant content.',
                'Log all activity in CRM.',
            ],
        },
        'account_tailored': {
            'account_name': 'Example Enterprise SA',
            'account_rationale': [
                {'dimension': 'Fleet size', 'detail': '10,000+ endpoints across 5 countries.'},
                {'dimension': 'NIS2 classification', 'detail': 'Essential entity under NIS2.'},
                {'dimension': 'Regulatory exposure', 'detail': 'GDPR and sector-specific regulation.'},
                {'dimension': 'Transformation initiative', 'detail': 'Active digital transformation programme.'},
                {'dimension': 'AI interest signals', 'detail': 'CTO quoted on AI in trade media Q3 2024.'},
            ],  # exactly 5 rows (min=5, max=6)
            'email_body': (
                'Dear [CIO Name],\n\n'
                'Your organisation\'s position as an essential entity under NIS2 means that '
                'the infrastructure decision is also a compliance decision.\n\n'
                'Best regards,\n[Name]\nEnterprise Account Director, HP Inc'
            ),
            'talking_points': [_make_talking_point(i) for i in range(1, 7)],  # exactly 6
        },
    }
    model = ABMEnablement.model_validate(data)
    _assert_renders('abm_enablement', model.model_dump())