ProfilePage implementation: - Full profile page with avatar, editable display name, session count - LinkedAccountCard and DisplayNameEditor components - useProfile composable wrapping user store operations - Support for linking/unlinking OAuth providers - Logout and logout-all-devices functionality Profanity service with bypass detection: - Uses better-profanity library for base detection - Enhanced to catch common bypass attempts: - Number suffixes/prefixes (shit123, 69fuck) - Leet-speak substitutions (sh1t, f@ck, $hit) - Separator characters (s.h.i.t, f-u-c-k) - Integrated into PATCH /api/users/me endpoint - 17 unit tests covering all normalization strategies Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
219 lines
5.9 KiB
Python
219 lines
5.9 KiB
Python
"""Profanity filtering service for user-generated content.
|
|
|
|
This module provides validation for display names and other user-provided
|
|
text to filter inappropriate language.
|
|
|
|
Uses the better-profanity library for detection with customizable word lists.
|
|
Includes preprocessing to catch common bypass attempts like:
|
|
- Numbers attached to words (shit123)
|
|
- Leet-speak substitutions (sh1t, f@ck)
|
|
- Special characters embedded in words (s.h.i.t)
|
|
|
|
Example:
|
|
from app.services.profanity_service import validate_display_name
|
|
|
|
# In an API endpoint
|
|
is_valid, error = validate_display_name("PlayerName")
|
|
if not is_valid:
|
|
raise HTTPException(400, error)
|
|
"""
|
|
|
|
import re
|
|
|
|
from better_profanity import profanity
|
|
|
|
# Initialize profanity filter with default word list
|
|
# Can be customized with profanity.add_censor_words([...])
|
|
profanity.load_censor_words()
|
|
|
|
# Leet-speak character mappings for normalization
|
|
LEET_SUBSTITUTIONS: dict[str, str] = {
|
|
"0": "o",
|
|
"1": "i",
|
|
"3": "e",
|
|
"4": "a",
|
|
"5": "s",
|
|
"7": "t",
|
|
"8": "b",
|
|
"@": "a",
|
|
"$": "s",
|
|
"!": "i",
|
|
"+": "t",
|
|
}
|
|
|
|
|
|
class ProfanityValidationError(Exception):
|
|
"""Error raised when content contains profanity."""
|
|
|
|
pass
|
|
|
|
|
|
def _separate_letters_numbers(text: str) -> str:
|
|
"""Separate letter sequences from number sequences with spaces.
|
|
|
|
Catches bypass attempts like "shit123" -> "shit 123".
|
|
|
|
Args:
|
|
text: The text to process.
|
|
|
|
Returns:
|
|
Text with spaces between letter and number sequences.
|
|
"""
|
|
result = re.sub(r"([a-zA-Z])(\d)", r"\1 \2", text)
|
|
result = re.sub(r"(\d)([a-zA-Z])", r"\1 \2", result)
|
|
return result
|
|
|
|
|
|
def _apply_leet_substitutions(text: str) -> str:
|
|
"""Convert leet-speak characters to their letter equivalents.
|
|
|
|
Catches bypass attempts like "sh1t", "f@ck", "$hit".
|
|
|
|
Args:
|
|
text: The text to process.
|
|
|
|
Returns:
|
|
Text with leet-speak characters replaced.
|
|
"""
|
|
result = text.lower()
|
|
for leet, letter in LEET_SUBSTITUTIONS.items():
|
|
result = result.replace(leet, letter)
|
|
return result
|
|
|
|
|
|
def _remove_separators(text: str) -> str:
|
|
"""Remove separator characters used to break up words.
|
|
|
|
Catches bypass attempts like "s.h.i.t", "f-u-c-k".
|
|
|
|
Args:
|
|
text: The text to process.
|
|
|
|
Returns:
|
|
Text with separator characters removed.
|
|
"""
|
|
return re.sub(r"[.\-_]", "", text)
|
|
|
|
|
|
def contains_profanity(text: str) -> bool:
|
|
"""Check if text contains profanity.
|
|
|
|
Applies multiple normalization strategies to catch bypass attempts:
|
|
1. Direct check on original text
|
|
2. Separate letters from numbers (shit123 -> shit 123)
|
|
3. Leet-speak substitution (sh1t -> shit)
|
|
4. Separator removal (s.h.i.t -> shit)
|
|
|
|
Args:
|
|
text: The text to check.
|
|
|
|
Returns:
|
|
True if profanity is detected, False otherwise.
|
|
|
|
Example:
|
|
if contains_profanity(username):
|
|
reject_username()
|
|
"""
|
|
# Check original text first
|
|
if profanity.contains_profanity(text):
|
|
return True
|
|
|
|
# Check with letters separated from numbers (shit123 -> shit 123)
|
|
separated = _separate_letters_numbers(text)
|
|
if profanity.contains_profanity(separated):
|
|
return True
|
|
|
|
# Check with leet-speak substitutions
|
|
leet_normalized = _apply_leet_substitutions(text)
|
|
if profanity.contains_profanity(leet_normalized):
|
|
return True
|
|
|
|
# Check with separators removed (s.h.i.t -> shit)
|
|
no_separators = _remove_separators(text)
|
|
if profanity.contains_profanity(no_separators):
|
|
return True
|
|
|
|
# Check combined: leet + separators removed
|
|
combined = _apply_leet_substitutions(_remove_separators(text))
|
|
return bool(profanity.contains_profanity(combined))
|
|
|
|
|
|
def validate_display_name(name: str) -> tuple[bool, str | None]:
|
|
"""Validate a display name for profanity.
|
|
|
|
Uses enhanced profanity detection that catches bypass attempts
|
|
like leet-speak (sh1t) and number suffixes (shit123).
|
|
|
|
Args:
|
|
name: The display name to validate.
|
|
|
|
Returns:
|
|
Tuple of (is_valid, error_message).
|
|
If valid, returns (True, None).
|
|
If invalid, returns (False, "error message").
|
|
|
|
Example:
|
|
is_valid, error = validate_display_name("BadWord123")
|
|
if not is_valid:
|
|
raise HTTPException(400, error)
|
|
"""
|
|
if contains_profanity(name):
|
|
return False, "Display name contains inappropriate language"
|
|
return True, None
|
|
|
|
|
|
def validate_text(text: str, field_name: str = "text") -> tuple[bool, str | None]:
|
|
"""Validate arbitrary text for profanity.
|
|
|
|
Generic validation function for any user-provided text field.
|
|
Uses enhanced profanity detection that catches bypass attempts.
|
|
|
|
Args:
|
|
text: The text to validate.
|
|
field_name: Name of the field for error messages.
|
|
|
|
Returns:
|
|
Tuple of (is_valid, error_message).
|
|
|
|
Example:
|
|
is_valid, error = validate_text(bio, "bio")
|
|
"""
|
|
if contains_profanity(text):
|
|
return False, f"{field_name.title()} contains inappropriate language"
|
|
return True, None
|
|
|
|
|
|
def censor_text(text: str, censor_char: str = "*") -> str:
|
|
"""Censor profanity in text by replacing with censor characters.
|
|
|
|
Useful for displaying user content that may contain profanity
|
|
rather than rejecting it entirely.
|
|
|
|
Args:
|
|
text: The text to censor.
|
|
censor_char: Character to use for censoring (default: *).
|
|
|
|
Returns:
|
|
Text with profanity replaced by censor characters.
|
|
|
|
Example:
|
|
safe_text = censor_text("some bad words")
|
|
# Returns: "some *** words"
|
|
"""
|
|
return profanity.censor(text, censor_char)
|
|
|
|
|
|
def add_custom_words(words: list[str]) -> None:
|
|
"""Add custom words to the profanity filter.
|
|
|
|
Use this to add game-specific or community-specific terms
|
|
that should be blocked.
|
|
|
|
Args:
|
|
words: List of words to add to the filter.
|
|
|
|
Example:
|
|
add_custom_words(["customterm", "anotherterm"])
|
|
"""
|
|
profanity.add_censor_words(words)
|