diff --git a/services/sheets_service.py b/services/sheets_service.py index 8104718..7671e88 100644 --- a/services/sheets_service.py +++ b/services/sheets_service.py @@ -38,6 +38,49 @@ class SheetsService: ) return self._sheets_client + @staticmethod + def _is_spreadsheet_error(value: Any) -> bool: + """ + Check if a value is a spreadsheet error. + + Args: + value: Value to check + + Returns: + True if value is a spreadsheet error (#N/A, #REF!, etc.) + """ + if not isinstance(value, str): + return False + + # Common spreadsheet errors + error_values = ['#N/A', '#REF!', '#VALUE!', '#DIV/0!', '#NUM!', '#NAME?', '#NULL!', '#ERROR!'] + return value.strip() in error_values + + @staticmethod + def _sanitize_int_field(value: Any, field_name: str) -> Optional[int]: + """ + Sanitize a value that should be an integer. + + Args: + value: Value from spreadsheet + field_name: Field name for logging + + Returns: + Integer value or None if invalid + """ + if value is None or value == '': + return None + + # Check for spreadsheet errors + if SheetsService._is_spreadsheet_error(value): + return None + + # Try to convert to int + try: + return int(float(value)) # Handle "123.0" strings + except (ValueError, TypeError): + return None + async def open_scorecard(self, sheet_url: str) -> pygsheets.Spreadsheet: """ Open and validate access to a Google Sheet. @@ -241,20 +284,74 @@ class SheetsService: 'hold', 'b_save', 'irunners', 'irunners_scored', 'team_id' ] + # Fields that must be integers + int_fields = { + 'pitcher_id', 'rest_required', 'win', 'loss', 'is_save', + 'hold', 'b_save', 'irunners', 'irunners_scored', 'team_id' + } + # Fields that are required and cannot be None + required_fields = {'pitcher_id', 'team_id'} + pit_data = [] + row_num = 3 # Start at row 3 (B3 in spreadsheet) + for line in all_decisions: + row_num += 1 if not line: # Skip empty rows continue this_data = {} - for count, value in enumerate(line): - if value != '' and count < len(pit_keys): - this_data[pit_keys[count]] = value + has_error = False - if this_data: # Only include non-empty rows + for count, value in enumerate(line): + if count >= len(pit_keys): + break + + field_name = pit_keys[count] + + # Skip empty values + if value == '': + continue + + # Check for spreadsheet errors + if self._is_spreadsheet_error(value): + self.logger.warning( + f"Row {row_num}: Spreadsheet error '{value}' in field '{field_name}' - skipping row" + ) + has_error = True + break + + # Sanitize integer fields + if field_name in int_fields: + sanitized = self._sanitize_int_field(value, field_name) + if sanitized is None and value != '': + self.logger.warning( + f"Row {row_num}: Invalid integer value '{value}' for field '{field_name}' - skipping row" + ) + has_error = True + break + if sanitized is not None: + this_data[field_name] = sanitized + else: + # Non-integer fields pass through as-is + this_data[field_name] = value + + # Skip rows with errors + if has_error: + continue + + # Validate required fields are present + missing_required = required_fields - set(this_data.keys()) + if missing_required: + self.logger.warning( + f"Row {row_num}: Missing required fields {missing_required} - skipping row" + ) + continue + + if this_data: # Only include valid rows pit_data.append(this_data) - self.logger.info(f"Read {len(pit_data)} pitching decisions") + self.logger.info(f"Read {len(pit_data)} valid pitching decisions") return pit_data except Exception as e: