mantimon-tcg/backend/app/services/profanity_service.py
Cal Corum cd3efcb528 Implement ProfilePage and profanity filter for display names (F1-006)
ProfilePage implementation:
- Full profile page with avatar, editable display name, session count
- LinkedAccountCard and DisplayNameEditor components
- useProfile composable wrapping user store operations
- Support for linking/unlinking OAuth providers
- Logout and logout-all-devices functionality

Profanity service with bypass detection:
- Uses better-profanity library for base detection
- Enhanced to catch common bypass attempts:
  - Number suffixes/prefixes (shit123, 69fuck)
  - Leet-speak substitutions (sh1t, f@ck, $hit)
  - Separator characters (s.h.i.t, f-u-c-k)
- Integrated into PATCH /api/users/me endpoint
- 17 unit tests covering all normalization strategies

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-30 16:06:42 -06:00

219 lines
5.9 KiB
Python

"""Profanity filtering service for user-generated content.
This module provides validation for display names and other user-provided
text to filter inappropriate language.
Uses the better-profanity library for detection with customizable word lists.
Includes preprocessing to catch common bypass attempts like:
- Numbers attached to words (shit123)
- Leet-speak substitutions (sh1t, f@ck)
- Special characters embedded in words (s.h.i.t)
Example:
from app.services.profanity_service import validate_display_name
# In an API endpoint
is_valid, error = validate_display_name("PlayerName")
if not is_valid:
raise HTTPException(400, error)
"""
import re
from better_profanity import profanity
# Initialize profanity filter with default word list
# Can be customized with profanity.add_censor_words([...])
profanity.load_censor_words()
# Leet-speak character mappings for normalization
LEET_SUBSTITUTIONS: dict[str, str] = {
"0": "o",
"1": "i",
"3": "e",
"4": "a",
"5": "s",
"7": "t",
"8": "b",
"@": "a",
"$": "s",
"!": "i",
"+": "t",
}
class ProfanityValidationError(Exception):
"""Error raised when content contains profanity."""
pass
def _separate_letters_numbers(text: str) -> str:
"""Separate letter sequences from number sequences with spaces.
Catches bypass attempts like "shit123" -> "shit 123".
Args:
text: The text to process.
Returns:
Text with spaces between letter and number sequences.
"""
result = re.sub(r"([a-zA-Z])(\d)", r"\1 \2", text)
result = re.sub(r"(\d)([a-zA-Z])", r"\1 \2", result)
return result
def _apply_leet_substitutions(text: str) -> str:
"""Convert leet-speak characters to their letter equivalents.
Catches bypass attempts like "sh1t", "f@ck", "$hit".
Args:
text: The text to process.
Returns:
Text with leet-speak characters replaced.
"""
result = text.lower()
for leet, letter in LEET_SUBSTITUTIONS.items():
result = result.replace(leet, letter)
return result
def _remove_separators(text: str) -> str:
"""Remove separator characters used to break up words.
Catches bypass attempts like "s.h.i.t", "f-u-c-k".
Args:
text: The text to process.
Returns:
Text with separator characters removed.
"""
return re.sub(r"[.\-_]", "", text)
def contains_profanity(text: str) -> bool:
"""Check if text contains profanity.
Applies multiple normalization strategies to catch bypass attempts:
1. Direct check on original text
2. Separate letters from numbers (shit123 -> shit 123)
3. Leet-speak substitution (sh1t -> shit)
4. Separator removal (s.h.i.t -> shit)
Args:
text: The text to check.
Returns:
True if profanity is detected, False otherwise.
Example:
if contains_profanity(username):
reject_username()
"""
# Check original text first
if profanity.contains_profanity(text):
return True
# Check with letters separated from numbers (shit123 -> shit 123)
separated = _separate_letters_numbers(text)
if profanity.contains_profanity(separated):
return True
# Check with leet-speak substitutions
leet_normalized = _apply_leet_substitutions(text)
if profanity.contains_profanity(leet_normalized):
return True
# Check with separators removed (s.h.i.t -> shit)
no_separators = _remove_separators(text)
if profanity.contains_profanity(no_separators):
return True
# Check combined: leet + separators removed
combined = _apply_leet_substitutions(_remove_separators(text))
return bool(profanity.contains_profanity(combined))
def validate_display_name(name: str) -> tuple[bool, str | None]:
"""Validate a display name for profanity.
Uses enhanced profanity detection that catches bypass attempts
like leet-speak (sh1t) and number suffixes (shit123).
Args:
name: The display name to validate.
Returns:
Tuple of (is_valid, error_message).
If valid, returns (True, None).
If invalid, returns (False, "error message").
Example:
is_valid, error = validate_display_name("BadWord123")
if not is_valid:
raise HTTPException(400, error)
"""
if contains_profanity(name):
return False, "Display name contains inappropriate language"
return True, None
def validate_text(text: str, field_name: str = "text") -> tuple[bool, str | None]:
"""Validate arbitrary text for profanity.
Generic validation function for any user-provided text field.
Uses enhanced profanity detection that catches bypass attempts.
Args:
text: The text to validate.
field_name: Name of the field for error messages.
Returns:
Tuple of (is_valid, error_message).
Example:
is_valid, error = validate_text(bio, "bio")
"""
if contains_profanity(text):
return False, f"{field_name.title()} contains inappropriate language"
return True, None
def censor_text(text: str, censor_char: str = "*") -> str:
"""Censor profanity in text by replacing with censor characters.
Useful for displaying user content that may contain profanity
rather than rejecting it entirely.
Args:
text: The text to censor.
censor_char: Character to use for censoring (default: *).
Returns:
Text with profanity replaced by censor characters.
Example:
safe_text = censor_text("some bad words")
# Returns: "some *** words"
"""
return profanity.censor(text, censor_char)
def add_custom_words(words: list[str]) -> None:
"""Add custom words to the profanity filter.
Use this to add game-specific or community-specific terms
that should be blocked.
Args:
words: List of words to add to the filter.
Example:
add_custom_words(["customterm", "anotherterm"])
"""
profanity.add_censor_words(words)