rackham-meeting-analyzer/backend/tests/test_validation.py

"""
Tests for JSON schema validation.
"""
import pytest
import json
from pathlib import Path
from app.services.validation import validate_analysis, first_error

# Load the schema
SCHEMA_PATH = Path(__file__).parent.parent / "app" / "schemas" / "video_analysis.schema.json"
with open(SCHEMA_PATH) as f:
    SCHEMA = json.load(f)


def test_valid_minimal_analysis():
    """Test validation with minimal valid data"""
    data = {
        "version": "v1",
        "transcript": {
            "duration_sec": 120.0,
            "speakers": ["S1", "S2"],
            "utterances": [
                {
                    "speaker": "S1",
                    "start_sec": 0.0,
                    "end_sec": 5.0,
                    "text": "Hello, how are you?"
                },
                {
                    "speaker": "S2",
                    "start_sec": 5.5,
                    "end_sec": 10.0,
                    "text": "I'm good, thanks!"
                }
            ]
        },
        "analysis": {
            "participants": [
                {
                    "id": "S1",
                    "behavior_counts": {
                        "open_question": 1,
                        "closed_question": 0,
                        "testing_understanding": 0,
                        "summarizing": 0,
                        "bringing_in": 0,
                        "proposing": 0,
                        "giving_info_fact": 0,
                        "giving_info_opinion": 0,
                        "disagreeing": 0,
                        "defending_attacking": 0,
                        "shutting_out_interrupting": 0
                    },
                    "speaking_time_sec": 5.0,
                    "pull_push": {
                        "pull_count": 1,
                        "push_count": 0,
                        "ratio": 1.0
                    },
                    "filler_per_min": 0.0,
                    "question_quality": {
                        "open": 1,
                        "closed": 0,
                        "ratio": 1.0
                    },
                    "scores": {
                        "clarity": 85.0,
                        "impact": 70.0,
                        "inclusion": 60.0
                    },
                    "action_items": [
                        {
                            "title": "Increase Pull behaviors",
                            "why": "To gather more information",
                            "how": "Ask more open-ended questions",
                            "example_utterance_id": 0
                        },
                        {
                            "title": "Test understanding",
                            "why": "To ensure alignment",
                            "how": "Summarize key points",
                            "example_utterance_id": 0
                        }
                    ]
                }
            ],
            "timeline": [
                {
                    "utterance_id": 0,
                    "speaker": "S1",
                    "behavior": "open_question",
                    "start_sec": 0.0,
                    "end_sec": 5.0
                }
            ],
            "metrics": {
                "speaking_time": [
                    {"speaker": "S1", "seconds": 5.0},
                    {"speaker": "S2", "seconds": 5.0}
                ],
                "pull_push_transitions": [],
                "alerts": []
            },
            "feedback": {
                "overall": {
                    "strengths": ["Good question"],
                    "opportunities": ["Ask more questions"]
                },
                "by_participant": [
                    {
                        "id": "S1",
                        "notes": ["Strong opener"]
                    }
                ]
            }
        }
    }

    is_valid, error = validate_analysis(data)
    assert is_valid, f"Validation failed: {error}"
    assert error is None


def test_missing_required_field():
    """Test validation fails when required field is missing"""
    data = {
        "version": "v1",
        "transcript": {
            "duration_sec": 120.0,
            "speakers": ["S1"],
            "utterances": []
        }
        # Missing "analysis" field
    }

    is_valid, error = validate_analysis(data)
    assert not is_valid
    assert error is not None
    assert "required" in error.lower() or "analysis" in error.lower()


def test_invalid_speaker_pattern():
    """Test validation fails with invalid speaker ID pattern"""
    data = {
        "version": "v1",
        "transcript": {
            "duration_sec": 120.0,
            "speakers": ["Speaker1"],  # Should be S1, not Speaker1
            "utterances": []
        },
        "analysis": {
            "participants": [],
            "timeline": [],
            "metrics": {
                "speaking_time": [],
                "pull_push_transitions": [],
                "alerts": []
            },
            "feedback": {
                "overall": {
                    "strengths": [],
                    "opportunities": []
                },
                "by_participant": []
            }
        }
    }

    is_valid, error = validate_analysis(data)
    assert not is_valid
    assert error is not None


def test_invalid_score_range():
    """Test validation fails when scores are out of range"""
    data = {
        "version": "v1",
        "transcript": {
            "duration_sec": 120.0,
            "speakers": ["S1"],
            "utterances": []
        },
        "analysis": {
            "participants": [
                {
                    "id": "S1",
                    "behavior_counts": {
                        "open_question": 0,
                        "closed_question": 0,
                        "testing_understanding": 0,
                        "summarizing": 0,
                        "bringing_in": 0,
                        "proposing": 0,
                        "giving_info_fact": 0,
                        "giving_info_opinion": 0,
                        "disagreeing": 0,
                        "defending_attacking": 0,
                        "shutting_out_interrupting": 0
                    },
                    "speaking_time_sec": 0,
                    "pull_push": {
                        "pull_count": 0,
                        "push_count": 0,
                        "ratio": 0
                    },
                    "filler_per_min": 0,
                    "question_quality": {
                        "open": 0,
                        "closed": 0,
                        "ratio": 0
                    },
                    "scores": {
                        "clarity": 150.0,  # Invalid: > 100
                        "impact": 70.0,
                        "inclusion": 60.0
                    },
                    "action_items": [
                        {
                            "title": "Test",
                            "why": "Test",
                            "how": "Test",
                            "example_utterance_id": 0
                        },
                        {
                            "title": "Test2",
                            "why": "Test2",
                            "how": "Test2",
                            "example_utterance_id": 0
                        }
                    ]
                }
            ],
            "timeline": [],
            "metrics": {
                "speaking_time": [],
                "pull_push_transitions": [],
                "alerts": []
            },
            "feedback": {
                "overall": {
                    "strengths": [],
                    "opportunities": []
                },
                "by_participant": []
            }
        }
    }

    is_valid, error = validate_analysis(data)
    assert not is_valid
    assert error is not None


def test_first_error_returns_none_for_valid():
    """Test first_error returns None for valid data"""
    data = {
        "version": "v1",
        "transcript": {
            "duration_sec": 120.0,
            "speakers": ["S1"],
            "utterances": []
        },
        "analysis": {
            "participants": [],
            "timeline": [],
            "metrics": {
                "speaking_time": [],
                "pull_push_transitions": [],
                "alerts": []
            },
            "feedback": {
                "overall": {
                    "strengths": [],
                    "opportunities": []
                },
                "by_participant": []
            }
        }
    }

    error = first_error(data)
    assert error is None


def test_action_items_count():
    """Test validation requires 2-3 action items"""
    # Test with only 1 action item (should fail)
    data = {
        "version": "v1",
        "transcript": {
            "duration_sec": 120.0,
            "speakers": ["S1"],
            "utterances": []
        },
        "analysis": {
            "participants": [
                {
                    "id": "S1",
                    "behavior_counts": {
                        "open_question": 0,
                        "closed_question": 0,
                        "testing_understanding": 0,
                        "summarizing": 0,
                        "bringing_in": 0,
                        "proposing": 0,
                        "giving_info_fact": 0,
                        "giving_info_opinion": 0,
                        "disagreeing": 0,
                        "defending_attacking": 0,
                        "shutting_out_interrupting": 0
                    },
                    "speaking_time_sec": 0,
                    "pull_push": {
                        "pull_count": 0,
                        "push_count": 0,
                        "ratio": 0
                    },
                    "filler_per_min": 0,
                    "question_quality": {
                        "open": 0,
                        "closed": 0,
                        "ratio": 0
                    },
                    "scores": {
                        "clarity": 70.0,
                        "impact": 70.0,
                        "inclusion": 60.0
                    },
                    "action_items": [
                        {
                            "title": "Only one",
                            "why": "Not enough",
                            "how": "Need more",
                            "example_utterance_id": 0
                        }
                    ]  # Should have 2-3 items
                }
            ],
            "timeline": [],
            "metrics": {
                "speaking_time": [],
                "pull_push_transitions": [],
                "alerts": []
            },
            "feedback": {
                "overall": {
                    "strengths": [],
                    "opportunities": []
                },
                "by_participant": []
            }
        }
    }

    is_valid, error = validate_analysis(data)
    assert not is_valid
    assert error is not None