From 92eb9055f174a420f39b6c3d752e7cab5bc41d79 Mon Sep 17 00:00:00 2001 From: Cal Corum Date: Thu, 5 Feb 2026 21:44:06 -0600 Subject: [PATCH] fix: Validate and sanitize pitching decision data from Google Sheets Added robust validation to handle spreadsheet errors and invalid data when reading pitching decisions from scorecards. Problem: - POST /api/v3/decisions was failing with 422 errors - Google Sheets cells containing "#N/A" were passed directly to API - API correctly rejected invalid team_id values like "#N/A" string - No validation of integer fields or required fields Root Cause: - sheets_service.py:read_pitching_decisions() read values without validation or type checking - Spreadsheet formula errors (#N/A, #REF!, etc.) passed through - Invalid data types not caught until API validation failed Solution: 1. Added _is_spreadsheet_error() to detect formula errors 2. Added _sanitize_int_field() to validate and convert integers 3. Enhanced read_pitching_decisions() to: - Detect and skip rows with spreadsheet errors - Validate integer fields (pitcher_id, team_id, etc.) - Ensure required fields (pitcher_id, team_id) are present - Log warnings for invalid data with row numbers - Only return valid, sanitized decision data Impact: - Prevents 422 errors from bad spreadsheet data - Provides clear warnings in logs when data is invalid - Gracefully skips invalid rows instead of crashing - Helps identify scorecard data entry errors Testing: - Handles #N/A, #REF!, #VALUE!, #DIV/0! and other errors - Converts "123.0" strings to integers correctly - Validates required fields before sending to API - Logs row numbers for debugging bad data Production logs showed: "Input should be a valid integer, unable to parse string as an integer", input: "#N/A" for team_id field Co-Authored-By: Claude Sonnet 4.5 --- services/sheets_service.py | 107 +++++++++++++++++++++++++++++++++++-- 1 file changed, 102 insertions(+), 5 deletions(-) diff --git a/services/sheets_service.py b/services/sheets_service.py index 8104718..7671e88 100644 --- a/services/sheets_service.py +++ b/services/sheets_service.py @@ -38,6 +38,49 @@ class SheetsService: ) return self._sheets_client + @staticmethod + def _is_spreadsheet_error(value: Any) -> bool: + """ + Check if a value is a spreadsheet error. + + Args: + value: Value to check + + Returns: + True if value is a spreadsheet error (#N/A, #REF!, etc.) + """ + if not isinstance(value, str): + return False + + # Common spreadsheet errors + error_values = ['#N/A', '#REF!', '#VALUE!', '#DIV/0!', '#NUM!', '#NAME?', '#NULL!', '#ERROR!'] + return value.strip() in error_values + + @staticmethod + def _sanitize_int_field(value: Any, field_name: str) -> Optional[int]: + """ + Sanitize a value that should be an integer. + + Args: + value: Value from spreadsheet + field_name: Field name for logging + + Returns: + Integer value or None if invalid + """ + if value is None or value == '': + return None + + # Check for spreadsheet errors + if SheetsService._is_spreadsheet_error(value): + return None + + # Try to convert to int + try: + return int(float(value)) # Handle "123.0" strings + except (ValueError, TypeError): + return None + async def open_scorecard(self, sheet_url: str) -> pygsheets.Spreadsheet: """ Open and validate access to a Google Sheet. @@ -241,20 +284,74 @@ class SheetsService: 'hold', 'b_save', 'irunners', 'irunners_scored', 'team_id' ] + # Fields that must be integers + int_fields = { + 'pitcher_id', 'rest_required', 'win', 'loss', 'is_save', + 'hold', 'b_save', 'irunners', 'irunners_scored', 'team_id' + } + # Fields that are required and cannot be None + required_fields = {'pitcher_id', 'team_id'} + pit_data = [] + row_num = 3 # Start at row 3 (B3 in spreadsheet) + for line in all_decisions: + row_num += 1 if not line: # Skip empty rows continue this_data = {} - for count, value in enumerate(line): - if value != '' and count < len(pit_keys): - this_data[pit_keys[count]] = value + has_error = False - if this_data: # Only include non-empty rows + for count, value in enumerate(line): + if count >= len(pit_keys): + break + + field_name = pit_keys[count] + + # Skip empty values + if value == '': + continue + + # Check for spreadsheet errors + if self._is_spreadsheet_error(value): + self.logger.warning( + f"Row {row_num}: Spreadsheet error '{value}' in field '{field_name}' - skipping row" + ) + has_error = True + break + + # Sanitize integer fields + if field_name in int_fields: + sanitized = self._sanitize_int_field(value, field_name) + if sanitized is None and value != '': + self.logger.warning( + f"Row {row_num}: Invalid integer value '{value}' for field '{field_name}' - skipping row" + ) + has_error = True + break + if sanitized is not None: + this_data[field_name] = sanitized + else: + # Non-integer fields pass through as-is + this_data[field_name] = value + + # Skip rows with errors + if has_error: + continue + + # Validate required fields are present + missing_required = required_fields - set(this_data.keys()) + if missing_required: + self.logger.warning( + f"Row {row_num}: Missing required fields {missing_required} - skipping row" + ) + continue + + if this_data: # Only include valid rows pit_data.append(this_data) - self.logger.info(f"Read {len(pit_data)} pitching decisions") + self.logger.info(f"Read {len(pit_data)} valid pitching decisions") return pit_data except Exception as e: