Remove word-boundary anchors so banned words are caught even when embedded in larger strings (e.g. "xBatmanx" now matches "Batman"). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
67 lines
2 KiB
Python
67 lines
2 KiB
Python
"""Profanity detection service using better-profanity library and custom banned words list."""
|
|
|
|
import re
|
|
from pathlib import Path
|
|
|
|
from better_profanity import profanity
|
|
|
|
# Initialize profanity filter once at module load
|
|
profanity.load_censor_words()
|
|
|
|
# Load custom banned words list
|
|
_banned_words_path = Path(__file__).resolve().parent.parent.parent / "banned_words.txt"
|
|
_banned_words: frozenset[str] = frozenset()
|
|
_banned_pattern: re.Pattern | None = None
|
|
|
|
if _banned_words_path.exists():
|
|
_raw = _banned_words_path.read_text(encoding="utf-8")
|
|
_banned_words = frozenset(
|
|
line.lower()
|
|
for line in (l.strip() for l in _raw.splitlines())
|
|
if line
|
|
)
|
|
if _banned_words:
|
|
_escaped = sorted((re.escape(w) for w in _banned_words), key=len, reverse=True)
|
|
_banned_pattern = re.compile(
|
|
"(?:" + "|".join(_escaped) + ")",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
|
|
def contains_banned_word(text: str) -> bool:
|
|
"""Check if text contains a word from the custom banned words list."""
|
|
if not text or _banned_pattern is None:
|
|
return False
|
|
return _banned_pattern.search(text) is not None
|
|
|
|
|
|
def contains_profanity(text: str) -> bool:
|
|
"""Check if text contains profanity.
|
|
|
|
Args:
|
|
text: The text to check for profanity.
|
|
|
|
Returns:
|
|
True if profanity is detected, False otherwise.
|
|
"""
|
|
if not text:
|
|
return False
|
|
return profanity.contains_profanity(text) or contains_banned_word(text)
|
|
|
|
|
|
def check_fields(fields: dict[str, str | None]) -> dict[str, bool]:
|
|
"""Check multiple fields for profanity.
|
|
|
|
Args:
|
|
fields: Dictionary of field_name -> text to check.
|
|
None values are skipped.
|
|
|
|
Returns:
|
|
Dictionary of field_name -> has_profanity (bool).
|
|
Only includes fields that were checked (non-None values).
|
|
"""
|
|
results = {}
|
|
for field_name, text in fields.items():
|
|
if text is not None:
|
|
results[field_name] = contains_profanity(text)
|
|
return results
|