ppt-tool/backend/tests/test_content_intelligence.py
Vadym Samoilenko bdf6e4b4d0 Fix Docker build, test suite, and runtime issues for local deployment
- Fix UV index strategy: mark PyTorch CPU index as explicit with name
- Add --index-strategy unsafe-best-match to Dockerfile uv pip install
- Fix redis version constraint (>=5.0,<6) for ARQ compatibility
- Fix Anthropic model name (claude-sonnet-4-5-20250929)
- Fix IMAGE_PROVIDER enum value (gemini_flash, not google)
- Resolve middlewares.py vs middlewares/ package conflict
- Fix worker import paths (models.sql.presentation, models.sql.slide, utils split)
- Fix seed script FK resolution by importing all related models
- Fix test suite: async fixture scoping, greenlet dep, regex patterns, fixture params
- Fix frontend TypeScript error (Boolean cast for layout.react_code)
- Regenerate package-lock.json with i18n packages
- Add initial Alembic migration (autogenerated from all models)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 17:56:30 +00:00

142 lines
3.9 KiB
Python

"""Tests for content classification regex patterns.
Patterns are duplicated here to avoid importing the full service module,
which has heavy transitive dependencies (google.genai, etc.).
These regexes must stay in sync with services/content_intelligence_service.py.
"""
import re
import pytest
# --- Duplicated from services/content_intelligence_service.py ---
_METRIC_RE = re.compile(
r"""
(?:
[\$€£¥]\s?\d[\d,.]*[KMBTkmbt%]? |
\d[\d,.]*\s?% |
\d[\d,.]*\s?[KMBTkmbt]\b
)
|
(?:
(?:grew|growth|increased?|decreased?|rose|fell|dropped|declined|revenue|profit|margin|roi|cagr|arpu)
.{0,30}?
[\$€£¥]?\d[\d,.]*[KMBTkmbt%]?
)
""",
re.IGNORECASE | re.VERBOSE,
)
_QUOTE_RE = re.compile(
r'["\u201c\u201d].{15,300}?["\u201c\u201d]'
r"(?:\s*[-\u2014\u2013]\s*.{2,60})?",
re.DOTALL,
)
_TABLE_RE = re.compile(r"^\|.+\|$", re.MULTILINE)
_TIMELINE_RE = re.compile(
r"(?:(?:19|20)\d{2}|Q[1-4]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*\s+\d{4})",
re.IGNORECASE,
)
_COMPARISON_RE = re.compile(
r"\b(?:vs\.?|versus|compared?\s+to|in\s+contrast|on\s+the\s+other\s+hand|whereas|alternatively)\b",
re.IGNORECASE,
)
_LIST_RE = re.compile(r"^[\s]*[-*•]\s+.+", re.MULTILINE)
_IMAGE_REF_RE = re.compile(
r"(?:!\[|see\s+(?:figure|image|diagram|chart|photo)|attached\s+image|\.(?:png|jpg|jpeg|gif|webp|svg)\b)",
re.IGNORECASE,
)
class TestMetricRegex:
@pytest.mark.parametrize("text", [
"$2.3M revenue",
"45% growth",
"1,200K units",
"revenue grew 45%",
"profit increased by $2M",
"ROI of 340%",
"CAGR 12%",
])
def test_detects_metrics(self, text):
assert _METRIC_RE.search(text), f"Failed to detect metric: {text}"
@pytest.mark.parametrize("text", [
"The cat sat on the mat",
"We had a meeting yesterday",
])
def test_rejects_non_metrics(self, text):
assert not _METRIC_RE.search(text)
class TestQuoteRegex:
def test_detects_quoted_text(self):
text = '"Innovation is the ability to see change as an opportunity" — John Doe'
assert _QUOTE_RE.search(text)
def test_detects_smart_quotes(self):
text = '\u201cThis is a quoted statement\u201d'
assert _QUOTE_RE.search(text)
def test_rejects_short_quotes(self):
text = '"Hi"'
assert not _QUOTE_RE.search(text)
class TestTableRegex:
def test_detects_markdown_table(self):
text = "| Name | Value |\n| --- | --- |\n| A | 1 |"
assert _TABLE_RE.search(text)
def test_rejects_non_table(self):
text = "This is just normal text"
assert not _TABLE_RE.search(text)
class TestTimelineRegex:
@pytest.mark.parametrize("text", [
"In 2023, we launched the product",
"Q1 results were strong",
"January 2024 earnings",
])
def test_detects_timeline(self, text):
assert _TIMELINE_RE.search(text)
class TestComparisonRegex:
@pytest.mark.parametrize("text", [
"Plan A vs. Plan B",
"compared to last year",
"in contrast to competitors",
"on the other hand, they chose",
])
def test_detects_comparison(self, text):
assert _COMPARISON_RE.search(text)
class TestListRegex:
def test_detects_bullet_list(self):
text = "- Item one\n- Item two\n- Item three"
matches = _LIST_RE.findall(text)
assert len(matches) == 3
def test_detects_asterisk_list(self):
text = "* First\n* Second"
assert _LIST_RE.search(text)
class TestImageRefRegex:
@pytest.mark.parametrize("text", [
"See figure 1 below",
"see diagram for details",
"image.png",
"![alt text](photo.jpg)",
"attached image shows",
])
def test_detects_image_references(self, text):
assert _IMAGE_REF_RE.search(text)