fix: Validate and sanitize pitching decision data from Google Sheets
Some checks failed
Build Docker Image / build (pull_request) Failing after 19s
Some checks failed
Build Docker Image / build (pull_request) Failing after 19s
Added robust validation to handle spreadsheet errors and invalid data when reading pitching decisions from scorecards. Problem: - POST /api/v3/decisions was failing with 422 errors - Google Sheets cells containing "#N/A" were passed directly to API - API correctly rejected invalid team_id values like "#N/A" string - No validation of integer fields or required fields Root Cause: - sheets_service.py:read_pitching_decisions() read values without validation or type checking - Spreadsheet formula errors (#N/A, #REF!, etc.) passed through - Invalid data types not caught until API validation failed Solution: 1. Added _is_spreadsheet_error() to detect formula errors 2. Added _sanitize_int_field() to validate and convert integers 3. Enhanced read_pitching_decisions() to: - Detect and skip rows with spreadsheet errors - Validate integer fields (pitcher_id, team_id, etc.) - Ensure required fields (pitcher_id, team_id) are present - Log warnings for invalid data with row numbers - Only return valid, sanitized decision data Impact: - Prevents 422 errors from bad spreadsheet data - Provides clear warnings in logs when data is invalid - Gracefully skips invalid rows instead of crashing - Helps identify scorecard data entry errors Testing: - Handles #N/A, #REF!, #VALUE!, #DIV/0! and other errors - Converts "123.0" strings to integers correctly - Validates required fields before sending to API - Logs row numbers for debugging bad data Production logs showed: "Input should be a valid integer, unable to parse string as an integer", input: "#N/A" for team_id field Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
e0af40804d
commit
92eb9055f1
@ -38,6 +38,49 @@ class SheetsService:
|
||||
)
|
||||
return self._sheets_client
|
||||
|
||||
@staticmethod
|
||||
def _is_spreadsheet_error(value: Any) -> bool:
|
||||
"""
|
||||
Check if a value is a spreadsheet error.
|
||||
|
||||
Args:
|
||||
value: Value to check
|
||||
|
||||
Returns:
|
||||
True if value is a spreadsheet error (#N/A, #REF!, etc.)
|
||||
"""
|
||||
if not isinstance(value, str):
|
||||
return False
|
||||
|
||||
# Common spreadsheet errors
|
||||
error_values = ['#N/A', '#REF!', '#VALUE!', '#DIV/0!', '#NUM!', '#NAME?', '#NULL!', '#ERROR!']
|
||||
return value.strip() in error_values
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_int_field(value: Any, field_name: str) -> Optional[int]:
|
||||
"""
|
||||
Sanitize a value that should be an integer.
|
||||
|
||||
Args:
|
||||
value: Value from spreadsheet
|
||||
field_name: Field name for logging
|
||||
|
||||
Returns:
|
||||
Integer value or None if invalid
|
||||
"""
|
||||
if value is None or value == '':
|
||||
return None
|
||||
|
||||
# Check for spreadsheet errors
|
||||
if SheetsService._is_spreadsheet_error(value):
|
||||
return None
|
||||
|
||||
# Try to convert to int
|
||||
try:
|
||||
return int(float(value)) # Handle "123.0" strings
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
async def open_scorecard(self, sheet_url: str) -> pygsheets.Spreadsheet:
|
||||
"""
|
||||
Open and validate access to a Google Sheet.
|
||||
@ -241,20 +284,74 @@ class SheetsService:
|
||||
'hold', 'b_save', 'irunners', 'irunners_scored', 'team_id'
|
||||
]
|
||||
|
||||
# Fields that must be integers
|
||||
int_fields = {
|
||||
'pitcher_id', 'rest_required', 'win', 'loss', 'is_save',
|
||||
'hold', 'b_save', 'irunners', 'irunners_scored', 'team_id'
|
||||
}
|
||||
# Fields that are required and cannot be None
|
||||
required_fields = {'pitcher_id', 'team_id'}
|
||||
|
||||
pit_data = []
|
||||
row_num = 3 # Start at row 3 (B3 in spreadsheet)
|
||||
|
||||
for line in all_decisions:
|
||||
row_num += 1
|
||||
if not line: # Skip empty rows
|
||||
continue
|
||||
|
||||
this_data = {}
|
||||
for count, value in enumerate(line):
|
||||
if value != '' and count < len(pit_keys):
|
||||
this_data[pit_keys[count]] = value
|
||||
has_error = False
|
||||
|
||||
if this_data: # Only include non-empty rows
|
||||
for count, value in enumerate(line):
|
||||
if count >= len(pit_keys):
|
||||
break
|
||||
|
||||
field_name = pit_keys[count]
|
||||
|
||||
# Skip empty values
|
||||
if value == '':
|
||||
continue
|
||||
|
||||
# Check for spreadsheet errors
|
||||
if self._is_spreadsheet_error(value):
|
||||
self.logger.warning(
|
||||
f"Row {row_num}: Spreadsheet error '{value}' in field '{field_name}' - skipping row"
|
||||
)
|
||||
has_error = True
|
||||
break
|
||||
|
||||
# Sanitize integer fields
|
||||
if field_name in int_fields:
|
||||
sanitized = self._sanitize_int_field(value, field_name)
|
||||
if sanitized is None and value != '':
|
||||
self.logger.warning(
|
||||
f"Row {row_num}: Invalid integer value '{value}' for field '{field_name}' - skipping row"
|
||||
)
|
||||
has_error = True
|
||||
break
|
||||
if sanitized is not None:
|
||||
this_data[field_name] = sanitized
|
||||
else:
|
||||
# Non-integer fields pass through as-is
|
||||
this_data[field_name] = value
|
||||
|
||||
# Skip rows with errors
|
||||
if has_error:
|
||||
continue
|
||||
|
||||
# Validate required fields are present
|
||||
missing_required = required_fields - set(this_data.keys())
|
||||
if missing_required:
|
||||
self.logger.warning(
|
||||
f"Row {row_num}: Missing required fields {missing_required} - skipping row"
|
||||
)
|
||||
continue
|
||||
|
||||
if this_data: # Only include valid rows
|
||||
pit_data.append(this_data)
|
||||
|
||||
self.logger.info(f"Read {len(pit_data)} pitching decisions")
|
||||
self.logger.info(f"Read {len(pit_data)} valid pitching decisions")
|
||||
return pit_data
|
||||
|
||||
except Exception as e:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user