""" Google Sheets Service Handles reading data from Google Sheets scorecards for game submission. """ import asyncio from typing import Dict, List, Any, Optional import pygsheets from utils.logging import get_contextual_logger from exceptions import SheetsException class SheetsService: """Google Sheets integration for scorecard reading.""" def __init__(self, credentials_path: Optional[str] = None): """ Initialize sheets service. Args: credentials_path: Path to service account credentials JSON If None, will use path from config """ if credentials_path is None: from config import get_config credentials_path = get_config().sheets_credentials_path self.credentials_path = credentials_path self.logger = get_contextual_logger(f"{__name__}.SheetsService") self._sheets_client = None def _get_client(self) -> pygsheets.client.Client: """Get or create pygsheets client.""" if self._sheets_client is None: self._sheets_client = pygsheets.authorize( service_file=self.credentials_path ) return self._sheets_client @staticmethod def _is_spreadsheet_error(value: Any) -> bool: """ Check if a value is a spreadsheet error. Args: value: Value to check Returns: True if value is a spreadsheet error (#N/A, #REF!, etc.) """ if not isinstance(value, str): return False # Common spreadsheet errors error_values = [ "#N/A", "#REF!", "#VALUE!", "#DIV/0!", "#NUM!", "#NAME?", "#NULL!", "#ERROR!", ] return value.strip() in error_values @staticmethod def _sanitize_int_field(value: Any, field_name: str) -> Optional[int]: """ Sanitize a value that should be an integer. Args: value: Value from spreadsheet field_name: Field name for logging Returns: Integer value or None if invalid """ if value is None or value == "": return None # Check for spreadsheet errors if SheetsService._is_spreadsheet_error(value): return None # Try to convert to int try: return int(float(value)) # Handle "123.0" strings except (ValueError, TypeError): return None async def open_scorecard(self, sheet_url: str) -> pygsheets.Spreadsheet: """ Open and validate access to a Google Sheet. Args: sheet_url: Full URL to Google Sheet Returns: Opened spreadsheet object Raises: SheetsException: If sheet cannot be accessed """ try: # Run in thread pool since pygsheets is synchronous loop = asyncio.get_running_loop() sheets = await loop.run_in_executor(None, self._get_client) scorecard = await loop.run_in_executor(None, sheets.open_by_url, sheet_url) self.logger.info(f"Opened scorecard: {scorecard.title}") return scorecard except Exception as e: self.logger.error(f"Failed to open scorecard {sheet_url}: {e}") raise SheetsException( "Unable to access scorecard. Is it publicly readable?" ) from e async def read_setup_data(self, scorecard: pygsheets.Spreadsheet) -> Dict[str, Any]: """ Read game metadata from Setup tab. Cell mappings: - V35: Scorecard version - C3:D7: Game data (week, game_num, teams, managers) Returns: Dictionary with keys: - version: str - week: int - game_num: int - away_team_abbrev: str - home_team_abbrev: str - away_manager_name: str - home_manager_name: str """ try: loop = asyncio.get_running_loop() # Get Setup tab setup_tab = await loop.run_in_executor( None, scorecard.worksheet_by_title, "Setup" ) # Read version version = await loop.run_in_executor(None, setup_tab.get_value, "V35") # Read game data (C3:D7) g_data = await loop.run_in_executor(None, setup_tab.get_values, "C3", "D7") return { "version": version, "week": int(g_data[1][0]), "game_num": int(g_data[2][0]), "away_team_abbrev": g_data[3][0], "home_team_abbrev": g_data[4][0], "away_manager_name": g_data[3][1], "home_manager_name": g_data[4][1], } except Exception as e: self.logger.error(f"Failed to read setup data: {e}") raise SheetsException("Unable to read game setup data") from e async def read_playtable_data( self, scorecard: pygsheets.Spreadsheet ) -> List[Dict[str, Any]]: """ Read all plays from Playtable tab. Reads range B3:BW300 which contains up to 297 rows of play data with 68 columns per row. Returns: List of play dictionaries with field names mapped """ try: loop = asyncio.get_running_loop() # Get Playtable tab playtable = await loop.run_in_executor( None, scorecard.worksheet_by_title, "Playtable" ) # Read play data all_plays = await loop.run_in_executor( None, playtable.get_values, "B3", "BW300" ) # Field names in order (from old bot lines 1621-1632) play_keys = [ "play_num", "batter_id", "batter_pos", "pitcher_id", "on_base_code", "inning_half", "inning_num", "batting_order", "starting_outs", "away_score", "home_score", "on_first_id", "on_first_final", "on_second_id", "on_second_final", "on_third_id", "on_third_final", "batter_final", "pa", "ab", "run", "e_run", "hit", "rbi", "double", "triple", "homerun", "bb", "so", "hbp", "sac", "ibb", "gidp", "bphr", "bpfo", "bp1b", "bplo", "sb", "cs", "outs", "pitcher_rest_outs", "wpa", "catcher_id", "defender_id", "runner_id", "check_pos", "error", "wild_pitch", "passed_ball", "pick_off", "balk", "is_go_ahead", "is_tied", "is_new_inning", "inherited_runners", "inherited_scored", "on_hook_for_loss", "run_differential", "unused-manager", "unused-pitcherpow", "unused-pitcherrestip", "unused-runners", "unused-fatigue", "unused-roundedip", "unused-elitestart", "unused-scenario", "unused-winxaway", "unused-winxhome", "unused-pinchrunner", "unused-order", "hand_batting", "hand_pitching", "re24_primary", "re24_running", ] p_data = [] for line in all_plays: this_data = {} for count, value in enumerate(line): if value != "" and count < len(play_keys): this_data[play_keys[count]] = value # Only include rows with meaningful data (>5 fields) if len(this_data.keys()) > 5: p_data.append(this_data) self.logger.info(f"Read {len(p_data)} plays from scorecard") return p_data except Exception as e: self.logger.error(f"Failed to read playtable data: {e}") raise SheetsException("Unable to read play-by-play data") from e async def read_pitching_decisions( self, scorecard: pygsheets.Spreadsheet ) -> List[Dict[str, Any]]: """ Read pitching decisions from Pitcherstats tab. Reads range B3:O30 which contains up to 27 rows of pitcher data with 14 columns per row. Returns: List of decision dictionaries with field names mapped """ try: loop = asyncio.get_running_loop() # Get Pitcherstats tab pitching = await loop.run_in_executor( None, scorecard.worksheet_by_title, "Pitcherstats" ) # Read decision data all_decisions = await loop.run_in_executor( None, pitching.get_values, "B3", "O30" ) # Field names in order (from old bot lines 1688-1691) pit_keys = [ "pitcher_id", "rest_ip", "is_start", "base_rest", "extra_rest", "rest_required", "win", "loss", "is_save", "hold", "b_save", "irunners", "irunners_scored", "team_id", ] # Fields that must be integers int_fields = { "pitcher_id", "rest_required", "win", "loss", "is_save", "hold", "b_save", "irunners", "irunners_scored", "team_id", } # Fields that are required and cannot be None required_fields = {"pitcher_id", "team_id"} pit_data = [] row_num = 3 # Start at row 3 (B3 in spreadsheet) for line in all_decisions: row_num += 1 if not line: # Skip empty rows continue this_data = {} has_error = False for count, value in enumerate(line): if count >= len(pit_keys): break field_name = pit_keys[count] # Skip empty values if value == "": continue # Check for spreadsheet errors if self._is_spreadsheet_error(value): raise SheetsException( f"❌ Spreadsheet Error Detected\n\n" f"**Location:** Row {row_num}, Column '{field_name}'\n" f"**Value Found:** `{value}`\n\n" f"This cell contains a formula error that must be fixed before submission.\n\n" f"**Common Error Types:**\n" f"• `#REF!` - Invalid cell reference (deleted row/column)\n" f"• `#N/A` - Lookup formula couldn't find a match\n" f"• `#VALUE!` - Wrong data type in formula\n" f"• `#DIV/0!` - Division by zero\n" f"• `#NAME?` - Unrecognized formula name\n\n" f"**Action Required:** Fix cell {field_name} in row {row_num} and resubmit." ) # Sanitize integer fields if field_name in int_fields: sanitized = self._sanitize_int_field(value, field_name) if sanitized is None and value != "": self.logger.warning( f"Row {row_num}: Invalid integer value '{value}' for field '{field_name}' - skipping row" ) has_error = True break if sanitized is not None: this_data[field_name] = sanitized else: # Non-integer fields pass through as-is this_data[field_name] = value # Skip rows with errors if has_error: continue # Validate required fields are present missing_required = required_fields - set(this_data.keys()) if missing_required: self.logger.warning( f"Row {row_num}: Missing required fields {missing_required} - skipping row" ) continue if this_data: # Only include valid rows pit_data.append(this_data) self.logger.info(f"Read {len(pit_data)} valid pitching decisions") return pit_data except Exception as e: self.logger.error(f"Failed to read pitching decisions: {e}") raise SheetsException("Unable to read pitching decisions") from e async def read_box_score( self, scorecard: pygsheets.Spreadsheet ) -> Dict[str, List[int]]: """ Read box score from Scorecard or Box Score tab. Tries 'Scorecard' tab first (BW8:BY9), falls back to 'Box Score' tab (T6:V7). Returns: Dictionary with 'away' and 'home' keys, each containing [runs, hits, errors] """ try: loop = asyncio.get_running_loop() # Try Scorecard tab first try: sc_tab = await loop.run_in_executor( None, scorecard.worksheet_by_title, "Scorecard" ) score_table = await loop.run_in_executor( None, sc_tab.get_values, "BW8", "BY9" ) except pygsheets.WorksheetNotFound: # Fallback to Box Score tab sc_tab = await loop.run_in_executor( None, scorecard.worksheet_by_title, "Box Score" ) score_table = await loop.run_in_executor( None, sc_tab.get_values, "T6", "V7" ) return { "away": [int(x) for x in score_table[0]], # [R, H, E] "home": [int(x) for x in score_table[1]], # [R, H, E] } except Exception as e: self.logger.error(f"Failed to read box score: {e}") raise SheetsException("Unable to read box score") from e