**Problem:** The _is_spreadsheet_error() check was logging a warning and silently skipping rows with formula errors (#REF!, #N/A, etc.). This could lead to incomplete game data being submitted without the user knowing. **Solution:** Raise SheetsException immediately when spreadsheet errors are detected, providing: - Exact row number and field name - Actual error value found in the cell - Common error type explanations - Clear action required to fix **Impact:** - Users get immediate feedback about spreadsheet errors - No partial/incomplete data submitted to API - Clear instructions on what needs to be fixed - Better data integrity **Example Error Message:** ``` ❌ Spreadsheet Error Detected **Location:** Row 7, Column 'pitcher_id' **Value Found:** `#REF!` This cell contains a formula error that must be fixed before submission. **Action Required:** Fix cell pitcher_id in row 7 and resubmit. ``` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
421 lines
14 KiB
Python
421 lines
14 KiB
Python
"""
|
|
Google Sheets Service
|
|
|
|
Handles reading data from Google Sheets scorecards for game submission.
|
|
"""
|
|
import asyncio
|
|
from typing import Dict, List, Any, Optional
|
|
import pygsheets
|
|
|
|
from utils.logging import get_contextual_logger
|
|
from exceptions import SheetsException
|
|
|
|
|
|
class SheetsService:
|
|
"""Google Sheets integration for scorecard reading."""
|
|
|
|
def __init__(self, credentials_path: Optional[str] = None):
|
|
"""
|
|
Initialize sheets service.
|
|
|
|
Args:
|
|
credentials_path: Path to service account credentials JSON
|
|
If None, will use path from config
|
|
"""
|
|
if credentials_path is None:
|
|
from config import get_config
|
|
credentials_path = get_config().sheets_credentials_path
|
|
|
|
self.credentials_path = credentials_path
|
|
self.logger = get_contextual_logger(f'{__name__}.SheetsService')
|
|
self._sheets_client = None
|
|
|
|
def _get_client(self) -> pygsheets.client.Client:
|
|
"""Get or create pygsheets client."""
|
|
if self._sheets_client is None:
|
|
self._sheets_client = pygsheets.authorize(
|
|
service_file=self.credentials_path
|
|
)
|
|
return self._sheets_client
|
|
|
|
@staticmethod
|
|
def _is_spreadsheet_error(value: Any) -> bool:
|
|
"""
|
|
Check if a value is a spreadsheet error.
|
|
|
|
Args:
|
|
value: Value to check
|
|
|
|
Returns:
|
|
True if value is a spreadsheet error (#N/A, #REF!, etc.)
|
|
"""
|
|
if not isinstance(value, str):
|
|
return False
|
|
|
|
# Common spreadsheet errors
|
|
error_values = ['#N/A', '#REF!', '#VALUE!', '#DIV/0!', '#NUM!', '#NAME?', '#NULL!', '#ERROR!']
|
|
return value.strip() in error_values
|
|
|
|
@staticmethod
|
|
def _sanitize_int_field(value: Any, field_name: str) -> Optional[int]:
|
|
"""
|
|
Sanitize a value that should be an integer.
|
|
|
|
Args:
|
|
value: Value from spreadsheet
|
|
field_name: Field name for logging
|
|
|
|
Returns:
|
|
Integer value or None if invalid
|
|
"""
|
|
if value is None or value == '':
|
|
return None
|
|
|
|
# Check for spreadsheet errors
|
|
if SheetsService._is_spreadsheet_error(value):
|
|
return None
|
|
|
|
# Try to convert to int
|
|
try:
|
|
return int(float(value)) # Handle "123.0" strings
|
|
except (ValueError, TypeError):
|
|
return None
|
|
|
|
async def open_scorecard(self, sheet_url: str) -> pygsheets.Spreadsheet:
|
|
"""
|
|
Open and validate access to a Google Sheet.
|
|
|
|
Args:
|
|
sheet_url: Full URL to Google Sheet
|
|
|
|
Returns:
|
|
Opened spreadsheet object
|
|
|
|
Raises:
|
|
SheetsException: If sheet cannot be accessed
|
|
"""
|
|
try:
|
|
# Run in thread pool since pygsheets is synchronous
|
|
loop = asyncio.get_event_loop()
|
|
sheets = await loop.run_in_executor(
|
|
None,
|
|
self._get_client
|
|
)
|
|
scorecard = await loop.run_in_executor(
|
|
None,
|
|
sheets.open_by_url,
|
|
sheet_url
|
|
)
|
|
|
|
self.logger.info(f"Opened scorecard: {scorecard.title}")
|
|
return scorecard
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Failed to open scorecard {sheet_url}: {e}")
|
|
raise SheetsException(
|
|
"Unable to access scorecard. Is it publicly readable?"
|
|
) from e
|
|
|
|
async def read_setup_data(
|
|
self,
|
|
scorecard: pygsheets.Spreadsheet
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Read game metadata from Setup tab.
|
|
|
|
Cell mappings:
|
|
- V35: Scorecard version
|
|
- C3:D7: Game data (week, game_num, teams, managers)
|
|
|
|
Returns:
|
|
Dictionary with keys:
|
|
- version: str
|
|
- week: int
|
|
- game_num: int
|
|
- away_team_abbrev: str
|
|
- home_team_abbrev: str
|
|
- away_manager_name: str
|
|
- home_manager_name: str
|
|
"""
|
|
try:
|
|
loop = asyncio.get_event_loop()
|
|
|
|
# Get Setup tab
|
|
setup_tab = await loop.run_in_executor(
|
|
None,
|
|
scorecard.worksheet_by_title,
|
|
'Setup'
|
|
)
|
|
|
|
# Read version
|
|
version = await loop.run_in_executor(
|
|
None,
|
|
setup_tab.get_value,
|
|
'V35'
|
|
)
|
|
|
|
# Read game data (C3:D7)
|
|
g_data = await loop.run_in_executor(
|
|
None,
|
|
setup_tab.get_values,
|
|
'C3',
|
|
'D7'
|
|
)
|
|
|
|
return {
|
|
'version': version,
|
|
'week': int(g_data[1][0]),
|
|
'game_num': int(g_data[2][0]),
|
|
'away_team_abbrev': g_data[3][0],
|
|
'home_team_abbrev': g_data[4][0],
|
|
'away_manager_name': g_data[3][1],
|
|
'home_manager_name': g_data[4][1]
|
|
}
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Failed to read setup data: {e}")
|
|
raise SheetsException("Unable to read game setup data") from e
|
|
|
|
async def read_playtable_data(
|
|
self,
|
|
scorecard: pygsheets.Spreadsheet
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Read all plays from Playtable tab.
|
|
|
|
Reads range B3:BW300 which contains up to 297 rows of play data
|
|
with 68 columns per row.
|
|
|
|
Returns:
|
|
List of play dictionaries with field names mapped
|
|
"""
|
|
try:
|
|
loop = asyncio.get_event_loop()
|
|
|
|
# Get Playtable tab
|
|
playtable = await loop.run_in_executor(
|
|
None,
|
|
scorecard.worksheet_by_title,
|
|
'Playtable'
|
|
)
|
|
|
|
# Read play data
|
|
all_plays = await loop.run_in_executor(
|
|
None,
|
|
playtable.get_values,
|
|
'B3',
|
|
'BW300'
|
|
)
|
|
|
|
# Field names in order (from old bot lines 1621-1632)
|
|
play_keys = [
|
|
'play_num', 'batter_id', 'batter_pos', 'pitcher_id',
|
|
'on_base_code', 'inning_half', 'inning_num', 'batting_order',
|
|
'starting_outs', 'away_score', 'home_score', 'on_first_id',
|
|
'on_first_final', 'on_second_id', 'on_second_final',
|
|
'on_third_id', 'on_third_final', 'batter_final', 'pa', 'ab',
|
|
'run', 'e_run', 'hit', 'rbi', 'double', 'triple', 'homerun',
|
|
'bb', 'so', 'hbp', 'sac', 'ibb', 'gidp', 'bphr', 'bpfo',
|
|
'bp1b', 'bplo', 'sb', 'cs', 'outs', 'pitcher_rest_outs',
|
|
'wpa', 'catcher_id', 'defender_id', 'runner_id', 'check_pos',
|
|
'error', 'wild_pitch', 'passed_ball', 'pick_off', 'balk',
|
|
'is_go_ahead', 'is_tied', 'is_new_inning', 'inherited_runners',
|
|
'inherited_scored', 'on_hook_for_loss', 'run_differential',
|
|
'unused-manager', 'unused-pitcherpow', 'unused-pitcherrestip',
|
|
'unused-runners', 'unused-fatigue', 'unused-roundedip',
|
|
'unused-elitestart', 'unused-scenario', 'unused-winxaway',
|
|
'unused-winxhome', 'unused-pinchrunner', 'unused-order',
|
|
'hand_batting', 'hand_pitching', 're24_primary', 're24_running'
|
|
]
|
|
|
|
p_data = []
|
|
for line in all_plays:
|
|
this_data = {}
|
|
for count, value in enumerate(line):
|
|
if value != '' and count < len(play_keys):
|
|
this_data[play_keys[count]] = value
|
|
|
|
# Only include rows with meaningful data (>5 fields)
|
|
if len(this_data.keys()) > 5:
|
|
p_data.append(this_data)
|
|
|
|
self.logger.info(f"Read {len(p_data)} plays from scorecard")
|
|
return p_data
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Failed to read playtable data: {e}")
|
|
raise SheetsException("Unable to read play-by-play data") from e
|
|
|
|
async def read_pitching_decisions(
|
|
self,
|
|
scorecard: pygsheets.Spreadsheet
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Read pitching decisions from Pitcherstats tab.
|
|
|
|
Reads range B3:O30 which contains up to 27 rows of pitcher data
|
|
with 14 columns per row.
|
|
|
|
Returns:
|
|
List of decision dictionaries with field names mapped
|
|
"""
|
|
try:
|
|
loop = asyncio.get_event_loop()
|
|
|
|
# Get Pitcherstats tab
|
|
pitching = await loop.run_in_executor(
|
|
None,
|
|
scorecard.worksheet_by_title,
|
|
'Pitcherstats'
|
|
)
|
|
|
|
# Read decision data
|
|
all_decisions = await loop.run_in_executor(
|
|
None,
|
|
pitching.get_values,
|
|
'B3',
|
|
'O30'
|
|
)
|
|
|
|
# Field names in order (from old bot lines 1688-1691)
|
|
pit_keys = [
|
|
'pitcher_id', 'rest_ip', 'is_start', 'base_rest',
|
|
'extra_rest', 'rest_required', 'win', 'loss', 'is_save',
|
|
'hold', 'b_save', 'irunners', 'irunners_scored', 'team_id'
|
|
]
|
|
|
|
# Fields that must be integers
|
|
int_fields = {
|
|
'pitcher_id', 'rest_required', 'win', 'loss', 'is_save',
|
|
'hold', 'b_save', 'irunners', 'irunners_scored', 'team_id'
|
|
}
|
|
# Fields that are required and cannot be None
|
|
required_fields = {'pitcher_id', 'team_id'}
|
|
|
|
pit_data = []
|
|
row_num = 3 # Start at row 3 (B3 in spreadsheet)
|
|
|
|
for line in all_decisions:
|
|
row_num += 1
|
|
if not line: # Skip empty rows
|
|
continue
|
|
|
|
this_data = {}
|
|
has_error = False
|
|
|
|
for count, value in enumerate(line):
|
|
if count >= len(pit_keys):
|
|
break
|
|
|
|
field_name = pit_keys[count]
|
|
|
|
# Skip empty values
|
|
if value == '':
|
|
continue
|
|
|
|
# Check for spreadsheet errors
|
|
if self._is_spreadsheet_error(value):
|
|
raise SheetsException(
|
|
f"❌ Spreadsheet Error Detected\n\n"
|
|
f"**Location:** Row {row_num}, Column '{field_name}'\n"
|
|
f"**Value Found:** `{value}`\n\n"
|
|
f"This cell contains a formula error that must be fixed before submission.\n\n"
|
|
f"**Common Error Types:**\n"
|
|
f"• `#REF!` - Invalid cell reference (deleted row/column)\n"
|
|
f"• `#N/A` - Lookup formula couldn't find a match\n"
|
|
f"• `#VALUE!` - Wrong data type in formula\n"
|
|
f"• `#DIV/0!` - Division by zero\n"
|
|
f"• `#NAME?` - Unrecognized formula name\n\n"
|
|
f"**Action Required:** Fix cell {field_name} in row {row_num} and resubmit."
|
|
)
|
|
|
|
# Sanitize integer fields
|
|
if field_name in int_fields:
|
|
sanitized = self._sanitize_int_field(value, field_name)
|
|
if sanitized is None and value != '':
|
|
self.logger.warning(
|
|
f"Row {row_num}: Invalid integer value '{value}' for field '{field_name}' - skipping row"
|
|
)
|
|
has_error = True
|
|
break
|
|
if sanitized is not None:
|
|
this_data[field_name] = sanitized
|
|
else:
|
|
# Non-integer fields pass through as-is
|
|
this_data[field_name] = value
|
|
|
|
# Skip rows with errors
|
|
if has_error:
|
|
continue
|
|
|
|
# Validate required fields are present
|
|
missing_required = required_fields - set(this_data.keys())
|
|
if missing_required:
|
|
self.logger.warning(
|
|
f"Row {row_num}: Missing required fields {missing_required} - skipping row"
|
|
)
|
|
continue
|
|
|
|
if this_data: # Only include valid rows
|
|
pit_data.append(this_data)
|
|
|
|
self.logger.info(f"Read {len(pit_data)} valid pitching decisions")
|
|
return pit_data
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Failed to read pitching decisions: {e}")
|
|
raise SheetsException("Unable to read pitching decisions") from e
|
|
|
|
async def read_box_score(
|
|
self,
|
|
scorecard: pygsheets.Spreadsheet
|
|
) -> Dict[str, List[int]]:
|
|
"""
|
|
Read box score from Scorecard or Box Score tab.
|
|
|
|
Tries 'Scorecard' tab first (BW8:BY9), falls back to
|
|
'Box Score' tab (T6:V7).
|
|
|
|
Returns:
|
|
Dictionary with 'away' and 'home' keys, each containing
|
|
[runs, hits, errors]
|
|
"""
|
|
try:
|
|
loop = asyncio.get_event_loop()
|
|
|
|
# Try Scorecard tab first
|
|
try:
|
|
sc_tab = await loop.run_in_executor(
|
|
None,
|
|
scorecard.worksheet_by_title,
|
|
'Scorecard'
|
|
)
|
|
score_table = await loop.run_in_executor(
|
|
None,
|
|
sc_tab.get_values,
|
|
'BW8',
|
|
'BY9'
|
|
)
|
|
except pygsheets.WorksheetNotFound:
|
|
# Fallback to Box Score tab
|
|
sc_tab = await loop.run_in_executor(
|
|
None,
|
|
scorecard.worksheet_by_title,
|
|
'Box Score'
|
|
)
|
|
score_table = await loop.run_in_executor(
|
|
None,
|
|
sc_tab.get_values,
|
|
'T6',
|
|
'V7'
|
|
)
|
|
|
|
return {
|
|
'away': [int(x) for x in score_table[0]], # [R, H, E]
|
|
'home': [int(x) for x in score_table[1]] # [R, H, E]
|
|
}
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Failed to read box score: {e}")
|
|
raise SheetsException("Unable to read box score") from e
|