hp-studios-ai-content-agent/backend/tests/test_renderer_schema_alignment.py
DJP 72c8a0d0fe Initial import — HP Studios AI Content Agent
Full-stack app that turns HP customer briefs (master asset + regional
supporting docs) into a set of branded Word deliverables via a RAG +
agent pipeline.

Stack
- FastAPI + SQLAlchemy + pgvector + RQ (backend, Python 3.12)
- React + Vite + TypeScript + Tailwind + TanStack Query (frontend)
- Claude Opus 4.7 (generation) + Haiku 4.5 (translation/OCR)
- Voyage voyage-3 or OpenAI text-embedding-3-small (embeddings)
- python-docx (branded Word output, Montserrat + HP blue)
- Docker Compose (5 services)

Features
- 6 built-in deliverable types (leadership themes, regional enrichment,
  LinkedIn posts, webinar spec, infographic specs, ABM enablement)
- Data-driven deliverable types: admins add new types at runtime via
  prompt + JSON schema + template_json — no code, no deploy
- Generic schema-driven review form + generic Word template renderer
- Document ingestion pipeline with translation, chunking, pgvector RAG
- Pluggable auth provider (password now, Entra SSO later); admin/user roles
- Re-roll / retry on every deliverable; cascading delete; brief editing;
  inline document upload; progress hints; router-level ErrorBoundary
- Admin panel with test-render preview for new deliverable types
- Help page at /help with architecture overview and usage guide

82 backend tests passing, 18 skipped (gated live-API tests).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 17:11:25 -04:00

386 lines
16 KiB
Python

"""
Schema-renderer alignment tests.
For each deliverable type:
1. Build a pydantic-VALID payload using model_validate (with all required fields
satisfying schema constraints such as exact list lengths).
2. Serialise to dict with model.model_dump().
3. Pass the dict to render_to_bytes.
4. Assert no exception is raised and valid .docx bytes are returned.
This is the key contract: a pydantic-valid payload MUST NOT raise in the renderer.
"""
from __future__ import annotations
import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
import pytest
from app.hp_branding.render import render_to_bytes
from app.schemas import (
LeadershipThemes,
RegionalEnrichment,
LinkedInPosts,
WebinarSpec,
InfographicSpecs,
ABMEnablement,
)
# ---------------------------------------------------------------------------
# Helper: assert render succeeds and returns valid .docx bytes
# ---------------------------------------------------------------------------
def _assert_renders(deliverable_type: str, payload: dict) -> None:
result = render_to_bytes(deliverable_type, payload)
assert isinstance(result, bytes), f'{deliverable_type}: result is not bytes'
assert result[:2] == b'PK', f'{deliverable_type}: not a valid docx (missing PK header)'
assert len(result) > 2048, f'{deliverable_type}: result suspiciously small ({len(result)} bytes)'
# ---------------------------------------------------------------------------
# LeadershipThemes — requires exactly 4 tracks, each with exactly 4 pillars
# and at least 5 content_outputs; cross_track.tone_guidelines >= 4
# ---------------------------------------------------------------------------
def _make_messaging_pillars():
return [
{'name': f'Pillar {i}', 'message': f'Message for pillar {i}.'}
for i in range(1, 5)
]
def _make_content_outputs():
types = ['anchor_article', 'data_brief', 'linkedin', 'webinar', 'visual_concept']
return [
{'type': t, 'title': f'{t.title()} title', 'description': f'Description for {t}.'}
for t in types
]
def _make_track(number: int):
return {
'number': number,
'theme': f'Track {number} Theme',
'narrative': f'Strategic narrative for track {number}.',
'messaging_pillars': _make_messaging_pillars(),
'data_points': [f'Data point A for track {number}', f'Data point B for track {number}'],
'audience_hook': f'Audience hook paragraph for track {number}.',
'content_outputs': _make_content_outputs(),
'dos': [f'Do this for track {number}', f'Do that for track {number}'],
'donts': [f"Don't do this for track {number}", f"Don't do that for track {number}"],
}
def _make_proof_point_matrix():
return [
{'claim': 'Claim 1', 'evidence': 'Evidence 1', 'source': 'Source Report 2024'},
{'claim': 'Claim 2', 'evidence': 'Evidence 2', 'source': 'Source Report 2024'},
]
def _make_cadence():
return [
{'week': 1, 'focus': 'Track 1 launch', 'outputs': ['Anchor article', 'LinkedIn post #1']},
{'week': 2, 'focus': 'Track 2 launch', 'outputs': ['Data brief', 'LinkedIn post #2']},
]
def _make_tone_guidelines():
return [
'Confident, not arrogant.',
'Evidence-led.',
'Local-first.',
'Empathetic.',
]
def test_leadership_themes_schema_renders():
data = {
'title': 'HP — Four Thought-Leadership Content Tracks',
'subtitle': 'Derived from IT\'s Moment to Lead',
'tracks': [_make_track(i) for i in range(1, 5)],
'cross_track': {
'unified_positioning': 'HP empowers IT leaders to drive business growth through AI.',
'proof_point_matrix': _make_proof_point_matrix(),
'cadence': _make_cadence(),
'tone_guidelines': _make_tone_guidelines(),
},
}
model = LeadershipThemes.model_validate(data)
_assert_renders('leadership_themes', model.model_dump())
# ---------------------------------------------------------------------------
# RegionalEnrichment — same track structure as LeadershipThemes but with
# local_data_points, scenarios (>= 3), and enriched cross_track.
# ---------------------------------------------------------------------------
def _make_enriched_track(number: int):
return {
'number': number,
'theme': f'Track {number} Theme',
'narrative': f'Localised strategic narrative for track {number}.',
'messaging_pillars': _make_messaging_pillars(),
'data_points': [f'Global data point for track {number}'],
'local_data_points': [f'Local data point for track {number} — Local Survey 2024'],
'audience_hook': f'Localised audience hook for track {number}.',
'scenarios': [
{
'title': f'Scenario {j} for track {number}',
'body': f'Body text for scenario {j} in track {number}, set in City {j}.',
'city': f'City {j}',
'industry': f'Industry {j}',
}
for j in range(1, 4) # exactly 3 scenarios (min_length=3)
],
'content_outputs': _make_content_outputs(),
'dos': [f'Localised do for track {number}', f'Another do for track {number}'],
'donts': [f'Localised dont for track {number}', f'Another dont for track {number}'],
}
def test_regional_enrichment_schema_renders():
data = {
'title': 'HP Poland — Enriched Leadership Themes',
'subtitle': 'Local Market Edition',
'region': 'Poland',
'currency': 'PLN',
'regulatory_frameworks': ['GDPR', 'NIS2 Directive', 'EU AI Act'],
'market_context': {
'narrative': 'The Polish AI market is growing steadily.',
'data_tables': [
{
'heading': 'AI Adoption Status',
'headers': ['Indicator', 'Data'],
'rows': [['Planning AI', '41%'], ['In production', '10%']],
}
],
},
'tracks': [_make_enriched_track(i) for i in range(1, 5)],
'sector_matrix': [
{
'sector': 'Banking',
'relevance': 'High NIS2 exposure and AI readiness.',
'priority_tracks': [1, 4],
},
{
'sector': 'Energy',
'relevance': 'Critical infrastructure, large fleets.',
'priority_tracks': [3, 4],
},
],
'cross_track': {
'unified_positioning': 'HP empowers Polish IT leaders.',
'unifying_theme': 'From AI ambition to business results.',
'proof_point_matrix': _make_proof_point_matrix(),
'cadence': _make_cadence(),
'tone_guidelines': _make_tone_guidelines(),
'source_attribution': [
{
'title': 'Polish AI Survey',
'description': 'HP Inc Polska & Computerworld, N=101, 2024',
}
],
},
}
model = RegionalEnrichment.model_validate(data)
_assert_renders('regional_enrichment', model.model_dump())
# ---------------------------------------------------------------------------
# LinkedInPosts — exactly 5 posts (min/max=5), 5-7 hashtags per post,
# visual_pairing_metadata list (no length constraint but
# should have one per post for completeness).
# ---------------------------------------------------------------------------
def _make_post(number: int, track):
return {
'number': number,
'title': f'Post {number} Title',
'track': track,
'audience': 'CIOs and IT Directors',
'post_text': (
f'Hook line for post {number} — stat-led and compelling.\n'
f'Second line builds on the hook with supporting detail.'
),
'cta': f'Download the report for post {number} [link]',
'hashtags': [f'#Hashtag{i}' for i in range(1, 6)], # exactly 5 hashtags
'visual_hint': f'Visual hint for post {number}',
}
def _make_visual_pairing(post_number: int):
return {
'post_number': post_number,
'concept': f'Visual concept for post {post_number}',
'colour_cue': 'HP Blue (#0096D6)',
'image_brief': f'Designer brief for post {post_number}: clean, data-forward composition.',
}
def test_linkedin_posts_schema_renders():
tracks = [1, 4, 2, 3, 'cross-track']
data = {
'posts': [_make_post(i, tracks[i - 1]) for i in range(1, 6)],
'visual_pairing_metadata': [_make_visual_pairing(i) for i in range(1, 6)],
}
model = LinkedInPosts.model_validate(data)
_assert_renders('linkedin_posts', model.model_dump())
# ---------------------------------------------------------------------------
# WebinarSpec — 3-4 speakers, >= 2 narrative_arc phases,
# >= 4 post_webinar_activation steps, duration 20-120 min.
# ---------------------------------------------------------------------------
def _make_speaker(idx: int):
return {
'role': f'Speaker Role {idx}',
'name_placeholder': f'[Speaker {idx} Name]',
'bio': f'Speaker {idx} brings relevant expertise.',
'responsibilities': [f'Responsibility {idx}A', f'Responsibility {idx}B'],
}
def test_webinar_spec_schema_renders():
data = {
'topic': 'From Pilots to Productivity: A Polish Enterprise Webinar',
'duration_minutes': 40,
'overview': {
'format': 'Virtual panel webinar',
'target_audience': 'CIOs and IT Directors at Polish enterprises',
'registration_goal': '150+ registrants',
'promotion_channels': ['LinkedIn', 'Email', 'Partner networks'],
'follow_up_assets': ['Recording', 'Executive summary PDF'],
'rationale': 'Polish AI market shows a clear gap between ambition and production.',
},
'agenda': [
{
'time_range': '0:00-0:02',
'segment': 'Welcome',
'description': 'Host opens the session.',
'duration_minutes': 2,
},
{
'time_range': '0:02-0:30',
'segment': 'Panel Discussion',
'description': 'Moderated discussion covering three acts.',
'duration_minutes': 28,
},
{
'time_range': '0:30-0:40',
'segment': 'Q&A and Close',
'description': 'Audience questions and session wrap.',
'duration_minutes': 10,
},
],
'speakers': [_make_speaker(i) for i in range(1, 4)], # exactly 3 speakers
'narrative_arc': [
{'phase': 'Act 1: Reality Check', 'description': 'Acknowledge the AI adoption gap.'},
{'phase': 'Act 2: The Bridge', 'description': 'Share paths from pilot to production.'},
{'phase': 'Act 3: Enablement Layer', 'description': 'Connect strategy to infrastructure.'},
],
'post_webinar_activation': [
{'when': 'Within 24 hours', 'action': 'Send thank-you email with recording link.', 'owner': 'Marketing Ops'},
{'when': 'Within 48 hours', 'action': 'Sales team follows up with priority accounts.', 'owner': 'Sales'},
{'when': 'Week 2', 'action': 'Publish key takeaways as LinkedIn post.', 'owner': 'Content team'},
{'when': 'Week 3', 'action': 'Re-send to non-openers with new subject line.', 'owner': 'Marketing Ops'},
],
}
model = WebinarSpec.model_validate(data)
_assert_renders('webinar_spec', model.model_dump())
# ---------------------------------------------------------------------------
# InfographicSpecs — exactly 3 infographics (min/max=3).
# ---------------------------------------------------------------------------
def _make_infographic(idx: int):
return {
'title': f'Infographic {idx} Title',
'subtitle': f'Infographic {idx} Subtitle',
'format_options': ['Vertical scroll (digital)', 'A3 (print)', 'LinkedIn carousel'],
'colors': ['#0096D6', '#005A8C', '#FFFFFF'],
'typography_notes': 'Montserrat Bold for headlines, Montserrat Regular for body.',
'copy': {
'headline': f'Headline for infographic {idx}.',
'subhead': f'Subhead for infographic {idx}.',
'supporting_text': f'Supporting text for infographic {idx}.',
},
'data_points': [
{'stat': f'{10 * idx}%', 'label': f'Label for stat {idx}', 'source': 'Research 2024'},
],
'cta': f'CTA line for infographic {idx}.',
'source_attribution': ['HP Research 2024'],
}
def test_infographic_specs_schema_renders():
data = {
'infographics': [_make_infographic(i) for i in range(1, 4)], # exactly 3
}
model = InfographicSpecs.model_validate(data)
_assert_renders('infographic_specs', model.model_dump())
# ---------------------------------------------------------------------------
# ABMEnablement — GenericTemplate: exactly 3 subject_lines;
# AccountTailored: 5-6 rationale rows, exactly 6 talking points.
# ---------------------------------------------------------------------------
def _make_talking_point(idx: int):
return {
'topic': f'Topic {idx}',
'angle': f'HP angle for topic {idx}.',
'supporting_data': f'Key data point {idx} supporting this topic.',
'hp_capability': f'HP Capability {idx}',
}
def test_abm_enablement_schema_renders():
data = {
'generic_template': {
'subject_lines': [
'Subject line option 1 — insight-led',
'Subject line option 2 — data-led',
'Subject line option 3 — question-led',
], # exactly 3
'email_body': (
'Dear [First Name],\n\n'
'I am reaching out because [Company Name] is at the centre of the AI shift '
'in [industry].\n\n'
'Best regards,\n[Name]\n[Title], HP Inc Polska\n\n'
'P.S. See attached report.'
),
'placeholders': ['[Company Name]', '[First Name]', '[industry]'],
'follow_up_protocol': [
'Day 3: Follow up with local data point.',
'Day 7: Connect on LinkedIn.',
'Day 14: Share relevant content.',
'Log all activity in CRM.',
],
},
'account_tailored': {
'account_name': 'Example Enterprise SA',
'account_rationale': [
{'dimension': 'Fleet size', 'detail': '10,000+ endpoints across 5 countries.'},
{'dimension': 'NIS2 classification', 'detail': 'Essential entity under NIS2.'},
{'dimension': 'Regulatory exposure', 'detail': 'GDPR and sector-specific regulation.'},
{'dimension': 'Transformation initiative', 'detail': 'Active digital transformation programme.'},
{'dimension': 'AI interest signals', 'detail': 'CTO quoted on AI in trade media Q3 2024.'},
], # exactly 5 rows (min=5, max=6)
'email_body': (
'Dear [CIO Name],\n\n'
'Your organisation\'s position as an essential entity under NIS2 means that '
'the infrastructure decision is also a compliance decision.\n\n'
'Best regards,\n[Name]\nEnterprise Account Director, HP Inc'
),
'talking_points': [_make_talking_point(i) for i in range(1, 7)], # exactly 6
},
}
model = ABMEnablement.model_validate(data)
_assert_renders('abm_enablement', model.model_dump())