"""Profanity filtering service for user-generated content. This module provides validation for display names and other user-provided text to filter inappropriate language. Uses the better-profanity library for detection with customizable word lists. Includes preprocessing to catch common bypass attempts like: - Numbers attached to words (shit123) - Leet-speak substitutions (sh1t, f@ck) - Special characters embedded in words (s.h.i.t) Example: from app.services.profanity_service import validate_display_name # In an API endpoint is_valid, error = validate_display_name("PlayerName") if not is_valid: raise HTTPException(400, error) """ import re from better_profanity import profanity # Initialize profanity filter with default word list # Can be customized with profanity.add_censor_words([...]) profanity.load_censor_words() # Leet-speak character mappings for normalization LEET_SUBSTITUTIONS: dict[str, str] = { "0": "o", "1": "i", "3": "e", "4": "a", "5": "s", "7": "t", "8": "b", "@": "a", "$": "s", "!": "i", "+": "t", } class ProfanityValidationError(Exception): """Error raised when content contains profanity.""" pass def _separate_letters_numbers(text: str) -> str: """Separate letter sequences from number sequences with spaces. Catches bypass attempts like "shit123" -> "shit 123". Args: text: The text to process. Returns: Text with spaces between letter and number sequences. """ result = re.sub(r"([a-zA-Z])(\d)", r"\1 \2", text) result = re.sub(r"(\d)([a-zA-Z])", r"\1 \2", result) return result def _apply_leet_substitutions(text: str) -> str: """Convert leet-speak characters to their letter equivalents. Catches bypass attempts like "sh1t", "f@ck", "$hit". Args: text: The text to process. Returns: Text with leet-speak characters replaced. """ result = text.lower() for leet, letter in LEET_SUBSTITUTIONS.items(): result = result.replace(leet, letter) return result def _remove_separators(text: str) -> str: """Remove separator characters used to break up words. Catches bypass attempts like "s.h.i.t", "f-u-c-k". Args: text: The text to process. Returns: Text with separator characters removed. """ return re.sub(r"[.\-_]", "", text) def contains_profanity(text: str) -> bool: """Check if text contains profanity. Applies multiple normalization strategies to catch bypass attempts: 1. Direct check on original text 2. Separate letters from numbers (shit123 -> shit 123) 3. Leet-speak substitution (sh1t -> shit) 4. Separator removal (s.h.i.t -> shit) Args: text: The text to check. Returns: True if profanity is detected, False otherwise. Example: if contains_profanity(username): reject_username() """ # Check original text first if profanity.contains_profanity(text): return True # Check with letters separated from numbers (shit123 -> shit 123) separated = _separate_letters_numbers(text) if profanity.contains_profanity(separated): return True # Check with leet-speak substitutions leet_normalized = _apply_leet_substitutions(text) if profanity.contains_profanity(leet_normalized): return True # Check with separators removed (s.h.i.t -> shit) no_separators = _remove_separators(text) if profanity.contains_profanity(no_separators): return True # Check combined: leet + separators removed combined = _apply_leet_substitutions(_remove_separators(text)) return bool(profanity.contains_profanity(combined)) def validate_display_name(name: str) -> tuple[bool, str | None]: """Validate a display name for profanity. Uses enhanced profanity detection that catches bypass attempts like leet-speak (sh1t) and number suffixes (shit123). Args: name: The display name to validate. Returns: Tuple of (is_valid, error_message). If valid, returns (True, None). If invalid, returns (False, "error message"). Example: is_valid, error = validate_display_name("BadWord123") if not is_valid: raise HTTPException(400, error) """ if contains_profanity(name): return False, "Display name contains inappropriate language" return True, None def validate_text(text: str, field_name: str = "text") -> tuple[bool, str | None]: """Validate arbitrary text for profanity. Generic validation function for any user-provided text field. Uses enhanced profanity detection that catches bypass attempts. Args: text: The text to validate. field_name: Name of the field for error messages. Returns: Tuple of (is_valid, error_message). Example: is_valid, error = validate_text(bio, "bio") """ if contains_profanity(text): return False, f"{field_name.title()} contains inappropriate language" return True, None def censor_text(text: str, censor_char: str = "*") -> str: """Censor profanity in text by replacing with censor characters. Useful for displaying user content that may contain profanity rather than rejecting it entirely. Args: text: The text to censor. censor_char: Character to use for censoring (default: *). Returns: Text with profanity replaced by censor characters. Example: safe_text = censor_text("some bad words") # Returns: "some *** words" """ return profanity.censor(text, censor_char) def add_custom_words(words: list[str]) -> None: """Add custom words to the profanity filter. Use this to add game-specific or community-specific terms that should be blocked. Args: words: List of words to add to the filter. Example: add_custom_words(["customterm", "anotherterm"]) """ profanity.add_censor_words(words)