pahvalentines/backend/app/services/profanity.py

"""Profanity detection service using better-profanity library and custom banned words list."""

import re
from pathlib import Path

from better_profanity import profanity

# Initialize profanity filter once at module load
profanity.load_censor_words()

# Load custom banned words list
_banned_words_path = Path(__file__).resolve().parent.parent.parent / "banned_words.txt"
_banned_words: frozenset[str] = frozenset()
_banned_pattern: re.Pattern | None = None

if _banned_words_path.exists():
    _raw = _banned_words_path.read_text(encoding="utf-8")
    _banned_words = frozenset(
        line.lower()
        for line in (l.strip() for l in _raw.splitlines())
        if line
    )
    if _banned_words:
        _escaped = sorted((re.escape(w) for w in _banned_words), key=len, reverse=True)
        _banned_pattern = re.compile(
            "(?:" + "|".join(_escaped) + ")",
            re.IGNORECASE,
        )


def contains_banned_word(text: str) -> bool:
    """Check if text contains a word from the custom banned words list."""
    if not text or _banned_pattern is None:
        return False
    return _banned_pattern.search(text) is not None


def contains_profanity(text: str) -> bool:
    """Check if text contains profanity.

    Args:
        text: The text to check for profanity.

    Returns:
        True if profanity is detected, False otherwise.
    """
    if not text:
        return False
    return profanity.contains_profanity(text) or contains_banned_word(text)


def check_fields(fields: dict[str, str | None]) -> dict[str, bool]:
    """Check multiple fields for profanity.

    Args:
        fields: Dictionary of field_name -> text to check.
                None values are skipped.

    Returns:
        Dictionary of field_name -> has_profanity (bool).
        Only includes fields that were checked (non-None values).
    """
    results = {}
    for field_name, text in fields.items():
        if text is not None:
            results[field_name] = contains_profanity(text)
    return results