From 5b75a3d38fb9a891de60108e31fe9c488edc6b88 Mon Sep 17 00:00:00 2001 From: Cal Corum Date: Thu, 18 Dec 2025 16:39:38 -0600 Subject: [PATCH] Implement CLI wrappers for live-series, retrosheet, scouting, upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrated all major card creation workflows to pd-cards CLI: live-series: - update: Full FanGraphs/BBRef card generation with CLI options - status: Show cardset status from database retrosheet: - process: Historical Retrosheet data processing - arms: Generate outfield arm ratings from play-by-play - validate: Check for position anomalies in cardsets - defense: Fetch defensive stats from Baseball Reference scouting: - batters: Generate batting scouting reports - pitchers: Generate pitching scouting reports - all: Generate all reports at once upload: - s3: Upload card images to AWS S3 - check: Validate cards without uploading - refresh: Re-generate and re-upload card images Updated CLAUDE.md with comprehensive CLI documentation. Legacy scripts remain available but CLI is now the primary interface. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- CLAUDE.md | 94 ++++++++- pd_cards/commands/live_series.py | 250 +++++++++++++++++++++++- pd_cards/commands/retrosheet.py | 321 ++++++++++++++++++++++++++++--- pd_cards/commands/scouting.py | 186 +++++++++++++++--- pd_cards/commands/upload.py | 185 +++++++++++++++--- 5 files changed, 947 insertions(+), 89 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 2cbab58..ac93d49 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -32,14 +32,98 @@ This is a baseball card creation system for Paper Dynasty, a sports card simulat 3. **Output**: Generated card data is POSTed directly to Paper Dynasty API; cards rendered on-demand when URLs accessed ### Entry Points -- **live_series_update.py**: Main script for live season card updates (in-season cards) -- **retrosheet_data.py**: Main script for historical replay cardsets +- **pd_cards/**: CLI package (`pd-cards` command) for all card creation operations +- **live_series_update.py**: Legacy script for live season card updates (use `pd-cards live-series` instead) +- **retrosheet_data.py**: Legacy script for historical replay cardsets (use `pd-cards retrosheet` instead) - **refresh_cards.py**: Updates existing player card images and metadata - **check_cards.py**: Validates card data and generates test outputs -- **check_cards_and_upload.py**: Fetches card images from API and uploads to AWS S3 with cache-busting URLs -- **scouting_batters.py** / **scouting_pitchers.py**: Generate scouting reports and ratings comparisons +- **check_cards_and_upload.py**: Legacy S3 upload script (use `pd-cards upload` instead) +- **scouting_batters.py** / **scouting_pitchers.py**: Legacy scouting scripts (use `pd-cards scouting` instead) -## Common Commands +## pd-cards CLI + +The primary interface is the `pd-cards` CLI tool. Install it with: + +```bash +uv pip install -e . # Install in development mode +``` + +### Custom Characters (YAML Profiles) + +Custom fictional players are defined as YAML profiles in `pd_cards/custom/profiles/`. + +```bash +# List all custom character profiles +pd-cards custom list + +# Preview a character's calculated ratings +pd-cards custom preview kalin_young + +# Submit a character to the database +pd-cards custom submit kalin_young --dry-run # Preview first +pd-cards custom submit kalin_young # Actually submit + +# Create a new character profile template +pd-cards custom new --name "Player Name" --type batter --hand R --target-ops 0.800 +pd-cards custom new --name "Pitcher Name" --type pitcher --hand L --target-ops 0.650 +``` + +### Live Series Updates + +```bash +# Update live series cards from FanGraphs/Baseball Reference data +pd-cards live-series update --cardset "2025 Season" --games 81 --dry-run +pd-cards live-series update --cardset "2025 Season" --games 162 + +# Show cardset status +pd-cards live-series status +``` + +### Retrosheet Processing + +```bash +# Process historical Retrosheet data +pd-cards retrosheet process 2005 --cardset-id 27 --description Live --dry-run +pd-cards retrosheet process 2005 --cardset-id 27 --description Live + +# Generate outfield arm ratings +pd-cards retrosheet arms 2005 --events data-input/retrosheet/retrosheets_events_2005.csv + +# Validate positions for a cardset +pd-cards retrosheet validate 27 + +# Fetch defensive stats from Baseball Reference +pd-cards retrosheet defense 2005 --output "data-input/2005 Live Cardset/" +``` + +### Scouting Reports + +```bash +# Generate batting scouting reports +pd-cards scouting batters --cardset-id 27 + +# Generate pitching scouting reports +pd-cards scouting pitchers --cardset-id 27 + +# Generate all scouting reports +pd-cards scouting all --cardset-id 27 +``` + +### S3 Upload + +```bash +# Upload card images to S3 +pd-cards upload s3 --cardset "2005 Live" --dry-run +pd-cards upload s3 --cardset "2005 Live" --limit 10 + +# Check cards without uploading +pd-cards upload check --cardset "2005 Live" --limit 10 + +# Refresh card images +pd-cards upload refresh --cardset "2005 Live" +``` + +## Legacy Commands (Still Available) ### Testing ```bash diff --git a/pd_cards/commands/live_series.py b/pd_cards/commands/live_series.py index ba99554..a3e9b25 100644 --- a/pd_cards/commands/live_series.py +++ b/pd_cards/commands/live_series.py @@ -4,10 +4,14 @@ Live series card update commands. Commands for generating cards from current season FanGraphs/Baseball Reference data. """ +import asyncio +import datetime from pathlib import Path +from typing import Optional import typer from rich.console import Console +from rich.progress import Progress, SpinnerColumn, TextColumn app = typer.Typer(no_args_is_help=True) console = Console() @@ -15,16 +19,26 @@ console = Console() @app.command() def update( - cardset: str = typer.Option(..., "--cardset", "-c", help="Target cardset name (e.g., '2025 Live')"), - season: int = typer.Option(None, "--season", "-s", help="Season year (defaults to current)"), - games_played: int = typer.Option(None, "--games", "-g", help="Number of games played (for prorating)"), + cardset: str = typer.Option(..., "--cardset", "-c", help="Target cardset name (e.g., '2025 Season')"), + season: int = typer.Option(None, "--season", "-s", help="Season year (defaults to cardset year)"), + games_played: int = typer.Option(162, "--games", "-g", help="Number of games played (1-162)"), + description: str = typer.Option(None, "--description", "-d", help="Player description (defaults to year)"), + pull_fielding: bool = typer.Option(True, "--pull-fielding/--no-pull-fielding", help="Pull fielding stats from Baseball Reference"), + post_batters: bool = typer.Option(True, "--post-batters/--skip-batters", help="Post batting cards and ratings"), + post_pitchers: bool = typer.Option(True, "--post-pitchers/--skip-pitchers", help="Post pitching cards and ratings"), + post_fielders: bool = typer.Option(True, "--post-fielders/--skip-fielders", help="Post card positions"), + post_players: bool = typer.Option(True, "--post-players/--skip-players", help="Post player updates"), + is_live: bool = typer.Option(True, "--live/--not-live", help="Look up current MLB clubs from statsapi"), ignore_limits: bool = typer.Option(False, "--ignore-limits", help="Ignore minimum PA/TBF requirements"), dry_run: bool = typer.Option(False, "--dry-run", "-n", help="Preview without saving to database"), ): """ Update live series cards from FanGraphs/Baseball Reference data. - Reads CSV files from data-input/ and generates batting/pitching cards. + Reads CSV files from data-input/{cardset} Cardset/ and generates batting/pitching cards. + + Example: + pd-cards live-series update --cardset "2025 Season" --games 81 """ console.print() console.print("=" * 70) @@ -33,13 +47,185 @@ def update( if dry_run: console.print("[yellow]DRY RUN - no changes will be made[/yellow]") + console.print() - # TODO: Migrate logic from live_series_update.py - console.print() - console.print("[yellow]Not yet implemented - run legacy script:[/yellow]") - console.print(" python live_series_update.py") + # Validate games_played + if games_played < 1 or games_played > 162: + console.print(f"[red]Error: games_played must be between 1 and 162, got {games_played}[/red]") + raise typer.Exit(1) - raise typer.Exit(0) + season_pct = games_played / 162 + + # Import the necessary modules + try: + import sys + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + + import batters.creation + import pitchers.creation + import pandas as pd + from creation_helpers import pd_players_df, pd_positions_df + from db_calls import db_get, db_patch, DB_URL + from exceptions import logger + except ImportError as e: + console.print(f"[red]Error importing modules: {e}[/red]") + console.print("Make sure you're running from the card-creation directory") + raise typer.Exit(1) + + CARD_BASE_URL = f'{DB_URL}/v2/players' + + async def run_update(): + # Look up cardset + console.print(f"Searching for cardset: {cardset}") + c_query = await db_get('cardsets', params=[('name', cardset)]) + if c_query is None or c_query['count'] == 0: + console.print(f"[red]Cardset '{cardset}' not found[/red]") + raise typer.Exit(1) + + cardset_data = c_query['cardsets'][0] + input_path = f'data-input/{cardset_data["name"]} Cardset/' + + # Determine season from cardset name if not provided + actual_season = season + if actual_season is None: + # Try to extract year from cardset name + import re + match = re.search(r'\b(19|20)\d{2}\b', cardset) + if match: + actual_season = int(match.group()) + else: + actual_season = datetime.datetime.now().year + + # Determine description + actual_description = description if description else str(actual_season) + + console.print(f"Cardset ID: {cardset_data['id']} / Season: {actual_season}") + console.print(f"Game count: {games_played} / Season %: {season_pct:.2%}") + console.print(f"Description: {actual_description}") + console.print() + + if dry_run: + console.print("[green]Validation passed - ready to run[/green]") + console.print() + console.print("Would execute:") + console.print(f" - Input path: {input_path}") + if post_batters: + console.print(" - Process batting cards") + if post_pitchers: + console.print(" - Process pitching cards") + if post_fielders: + console.print(" - Process card positions") + if post_players: + console.print(" - Update player records") + return + + start_time = datetime.datetime.now() + release_directory = f'{start_time.year}-{start_time.month}-{start_time.day}' + + # Run batters + console.print("[bold]Processing batters...[/bold]") + data = await batters.creation.run_batters( + cardset_data, input_path, post_players, CARD_BASE_URL, release_directory, + actual_description, season_pct, post_batters, pull_fielding, actual_season, + is_live, ignore_limits + ) + + batter_time = datetime.datetime.now() + batter_runtime = batter_time - start_time + console.print(f"[green]✓ Batter updates complete[/green]") + console.print(f" Total batting cards: {data['tot_batters']}") + console.print(f" New cardset batters: {data['new_batters']}") + console.print(f" Runtime: {round(batter_runtime.total_seconds())} seconds") + console.print() + + # Run pitchers + console.print("[bold]Processing pitchers...[/bold]") + data = await pitchers.creation.run_pitchers( + cardset_data, input_path, CARD_BASE_URL, actual_season, release_directory, + actual_description, season_pct, post_players, post_pitchers, is_live, ignore_limits + ) + pitching_stats = data['pitching_stats'] + + pitcher_time = datetime.datetime.now() + pitcher_runtime = pitcher_time - batter_time + console.print(f"[green]✓ Pitcher updates complete[/green]") + console.print(f" Total pitching cards: {data['tot_pitchers']}") + console.print(f" New cardset pitchers: {data['new_pitchers']}") + console.print(f" Runtime: {round(pitcher_runtime.total_seconds())} seconds") + console.print() + + # Run player position updates + if 'promos' not in cardset.lower(): + console.print("[bold]Processing player positions...[/bold]") + all_pos = await pd_positions_df(cardset_data['id']) + all_players = await pd_players_df(cardset_data['id']) + + player_updates = {} + + def set_all_positions(df_data): + pos_series = all_pos.query(f'player_id == {df_data["player_id"]}')['position'] + pos_updates = [] + count = 1 + for this_pos in pos_series: + if this_pos == 'P': + try: + this_pitcher = pitching_stats.loc[df_data['bbref_id']] + except KeyError: + pos_updates.append((f'pos_{count}', 'RP')) + count += 1 + break + + if this_pitcher['starter_rating'] > 3: + pos_updates.append((f'pos_{count}', 'SP')) + count += 1 + if this_pitcher['relief_rating'] > 1 or not pd.isna(this_pitcher['closer_rating']): + pos_updates.append((f'pos_{count}', 'RP')) + count += 1 + else: + pos_updates.append((f'pos_{count}', 'RP')) + count += 1 + + if not pd.isna(this_pitcher['closer_rating']): + pos_updates.append((f'pos_{count}', 'CP')) + count += 1 + else: + pos_updates.append((f'pos_{count}', this_pos)) + count += 1 + + if count == 1: + pos_updates.append(('pos_1', 'DH')) + count += 1 + + while count <= 9: + pos_updates.append((f'pos_{count}', 'False')) + count += 1 + + if len(pos_updates) > 0: + if df_data.player_id not in player_updates.keys(): + player_updates[df_data.player_id] = pos_updates + else: + player_updates[df_data.player_id].extend(pos_updates) + + all_players.apply(set_all_positions, axis=1) + + console.print(f"Sending {len(player_updates)} player updates to database...") + if post_players: + for player_id in player_updates: + await db_patch('players', object_id=player_id, params=player_updates[player_id]) + + position_time = datetime.datetime.now() + position_runtime = position_time - pitcher_time + console.print(f"[green]✓ Player position updates complete[/green]") + console.print(f" Runtime: {round(position_runtime.total_seconds())} seconds") + console.print() + + total_runtime = datetime.datetime.now() - start_time + console.print("=" * 70) + console.print(f"[bold green]✓ LIVE SERIES UPDATE COMPLETE[/bold green]") + console.print(f"Total runtime: {round(total_runtime.total_seconds())} seconds") + console.print("=" * 70) + + asyncio.run(run_update()) @app.command() @@ -48,4 +234,48 @@ def status( ): """Show status of live series cardsets.""" console.print() - console.print("[yellow]Not yet implemented[/yellow]") + + try: + import sys + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + from db_calls import db_get + except ImportError as e: + console.print(f"[red]Error importing modules: {e}[/red]") + raise typer.Exit(1) + + async def get_status(): + params = [] + if cardset: + params.append(('name', cardset)) + + result = await db_get('cardsets', params=params if params else None) + + if result is None or result['count'] == 0: + if cardset: + console.print(f"[yellow]No cardset found matching '{cardset}'[/yellow]") + else: + console.print("[yellow]No cardsets found[/yellow]") + return + + from rich.table import Table + table = Table(title="Cardsets") + table.add_column("ID", justify="right") + table.add_column("Name") + table.add_column("Season", justify="right") + table.add_column("Players", justify="right") + + for cs in result['cardsets']: + # Get player count for each cardset + players = await db_get('players', params=[('cardset_id', cs['id'])]) + player_count = players['count'] if players else 0 + + table.add_row( + str(cs['id']), + cs['name'], + str(cs.get('season', '-')), + str(player_count) + ) + + console.print(table) + + asyncio.run(get_status()) diff --git a/pd_cards/commands/retrosheet.py b/pd_cards/commands/retrosheet.py index 0c37bac..07a4e0e 100644 --- a/pd_cards/commands/retrosheet.py +++ b/pd_cards/commands/retrosheet.py @@ -4,6 +4,7 @@ Retrosheet historical data processing commands. Commands for generating cards from historical Retrosheet play-by-play data. """ +import asyncio from pathlib import Path from typing import Optional @@ -18,40 +19,107 @@ console = Console() def process( year: int = typer.Argument(..., help="Season year to process (e.g., 2005)"), cardset_id: int = typer.Option(..., "--cardset-id", "-c", help="Target cardset ID"), - description: str = typer.Option("Live", "--description", "-d", help="Player description (e.g., 'Live', 'June PotM')"), - start_date: Optional[str] = typer.Option(None, "--start", help="Start date YYYYMMDD (defaults to season start)"), - end_date: Optional[str] = typer.Option(None, "--end", help="End date YYYYMMDD (defaults to season end)"), - events_file: Optional[Path] = typer.Option(None, "--events", "-e", help="Retrosheet events CSV file"), + description: str = typer.Option("Live", "--description", "-d", help="Player description ('Live' or 'Month PotM')"), + start_date: Optional[str] = typer.Option(None, "--start", help="Start date YYYYMMDD (defaults to March 1)"), + end_date: Optional[str] = typer.Option(None, "--end", help="End date YYYYMMDD (defaults to Oct 2)"), + events_file: Optional[str] = typer.Option(None, "--events", "-e", help="Retrosheet events CSV filename"), + data_input: Optional[str] = typer.Option(None, "--input", "-i", help="Data input directory path"), + season_pct: float = typer.Option(1.0, "--season-pct", help="Season percentage (0.0-1.0)"), + min_pa_vl: int = typer.Option(None, "--min-pa-vl", help="Minimum PA vs LHP (default: 20 Live, 1 PotM)"), + min_pa_vr: int = typer.Option(None, "--min-pa-vr", help="Minimum PA vs RHP (default: 40 Live, 1 PotM)"), + post_data: bool = typer.Option(True, "--post/--no-post", help="Post data to database"), dry_run: bool = typer.Option(False, "--dry-run", "-n", help="Preview without saving to database"), ): """ Process Retrosheet data and create player cards. Generates batting and pitching cards from historical play-by-play data. + + Example: + pd-cards retrosheet process 2005 --cardset-id 27 --description Live """ console.print() console.print("=" * 70) console.print(f"[bold]RETROSHEET PROCESSING - {year}[/bold]") console.print("=" * 70) + # Calculate defaults based on description + is_live = 'live' in description.lower() + + if min_pa_vl is None: + min_pa_vl = 20 if is_live else 1 + if min_pa_vr is None: + min_pa_vr = 40 if is_live else 1 + + if start_date is None: + start_date = f"{year}0301" + if end_date is None: + end_date = f"{year}1002" + + if events_file is None: + events_file = f"retrosheets_events_{year}.csv" + + if data_input is None: + data_input = f"data-input/{year} Live Cardset/" + console.print(f"Cardset ID: {cardset_id}") console.print(f"Description: {description}") - if start_date: - console.print(f"Start Date: {start_date}") - if end_date: - console.print(f"End Date: {end_date}") + console.print(f"Date Range: {start_date} - {end_date}") + console.print(f"Season %: {season_pct:.0%}") + console.print(f"Min PA: vL={min_pa_vl}, vR={min_pa_vr}") + console.print(f"Events: {events_file}") + console.print(f"Input: {data_input}") + console.print() if dry_run: console.print("[yellow]DRY RUN - no changes will be made[/yellow]") + console.print() + console.print("[green]Validation passed - ready to run[/green]") + console.print() + console.print("To run for real, remove --dry-run flag") + raise typer.Exit(0) - # TODO: Migrate logic from retrosheet_data.py - console.print() - console.print("[yellow]Not yet implemented - run legacy script:[/yellow]") - console.print(" python retrosheet_data.py") - console.print() - console.print("[dim]Configure settings in retrosheet_data.py before running[/dim]") + # Import and configure the retrosheet module + try: + import sys + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) - raise typer.Exit(0) + import retrosheet_data as rd + + # Configure the module's globals + rd.CARDSET_ID = cardset_id + rd.PLAYER_DESCRIPTION = description + rd.START_DATE = int(start_date) + rd.END_DATE = int(end_date) + rd.SEASON_PCT = season_pct + rd.MIN_PA_VL = min_pa_vl + rd.MIN_PA_VR = min_pa_vr + rd.MIN_TBF_VL = min_pa_vl + rd.MIN_TBF_VR = min_pa_vr + rd.POST_DATA = post_data + rd.EVENTS_FILENAME = events_file + rd.DATA_INPUT_FILE_PATH = data_input + + console.print("[bold]Starting Retrosheet processing...[/bold]") + console.print() + + # Run the main function + asyncio.run(rd.main()) + + console.print() + console.print("=" * 70) + console.print(f"[bold green]✓ RETROSHEET PROCESSING COMPLETE[/bold green]") + console.print("=" * 70) + + except ImportError as e: + console.print(f"[red]Error importing modules: {e}[/red]") + console.print("Make sure you're running from the card-creation directory") + raise typer.Exit(1) + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + import traceback + traceback.print_exc() + raise typer.Exit(1) @app.command() @@ -59,29 +127,103 @@ def arms( year: int = typer.Argument(..., help="Season year"), events_file: Path = typer.Option(..., "--events", "-e", help="Retrosheet events CSV file"), output: Optional[Path] = typer.Option(None, "--output", "-o", help="Output CSV file"), + season_pct: float = typer.Option(1.0, "--season-pct", help="Season percentage for min sample"), ): """ Generate outfield arm ratings from Retrosheet data. Analyzes play-by-play events to calculate OF arm strength ratings. + + Example: + pd-cards retrosheet arms 2005 --events data-input/retrosheet/retrosheets_events_2005.csv """ console.print() console.print("=" * 70) console.print(f"[bold]OUTFIELD ARM RATINGS - {year}[/bold]") console.print("=" * 70) + if not events_file.exists(): + console.print(f"[red]Events file not found: {events_file}[/red]") + raise typer.Exit(1) + if output is None: output = Path(f"data-output/retrosheet_arm_ratings_{year}.csv") console.print(f"Events file: {events_file}") console.print(f"Output: {output}") - - # TODO: Migrate logic from generate_arm_ratings_csv.py + console.print(f"Season %: {season_pct:.0%}") console.print() - console.print("[yellow]Not yet implemented - run legacy script:[/yellow]") - console.print(f" python generate_arm_ratings_csv.py --year {year} --events {events_file}") - raise typer.Exit(0) + try: + import sys + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + + import pandas as pd + from defenders.retrosheet_arm_calculator import calculate_of_arms_from_retrosheet + + console.print("Loading Retrosheet events...") + df_events = pd.read_csv(events_file) + console.print(f" Loaded {len(df_events)} events") + + console.print("Calculating arm ratings...") + arm_ratings = calculate_of_arms_from_retrosheet(df_events, season_pct=season_pct) + + # Convert to DataFrame for CSV output + rows = [] + for key, rating in arm_ratings.items(): + player_id, position = key.rsplit('_', 1) + rows.append({ + 'player_id': player_id, + 'position': position.upper(), + 'season': year, + 'arm_rating': rating.get('arm_rating', 0), + 'balls_fielded': rating.get('balls_fielded', 0), + 'total_assists': rating.get('total_assists', 0), + 'assist_rate': rating.get('assist_rate', 0), + 'z_score': rating.get('z_score', 0), + }) + + df_output = pd.DataFrame(rows) + df_output = df_output.sort_values(['position', 'arm_rating']) + + # Ensure output directory exists + output.parent.mkdir(parents=True, exist_ok=True) + + df_output.to_csv(output, index=False) + console.print() + console.print(f"[green]✓ Saved {len(df_output)} arm ratings to {output}[/green]") + + # Show distribution summary + from rich.table import Table + table = Table(title="Arm Rating Distribution") + table.add_column("Position") + table.add_column("Count", justify="right") + table.add_column("Avg Rating", justify="right") + table.add_column("Min", justify="right") + table.add_column("Max", justify="right") + + for pos in ['LF', 'CF', 'RF']: + pos_df = df_output[df_output['position'] == pos] + if len(pos_df) > 0: + table.add_row( + pos, + str(len(pos_df)), + f"{pos_df['arm_rating'].mean():.1f}", + str(int(pos_df['arm_rating'].min())), + str(int(pos_df['arm_rating'].max())) + ) + + console.print() + console.print(table) + + except ImportError as e: + console.print(f"[red]Error importing modules: {e}[/red]") + raise typer.Exit(1) + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + import traceback + traceback.print_exc() + raise typer.Exit(1) @app.command() @@ -93,15 +235,144 @@ def validate( Validate positions for a cardset. Checks for anomalous DH counts and missing outfield positions. + + Example: + pd-cards retrosheet validate 27 """ console.print() console.print("=" * 70) console.print(f"[bold]POSITION VALIDATION - Cardset {cardset_id}[/bold]") console.print("=" * 70) - # TODO: Migrate logic from scripts/check_positions.sh - console.print() - console.print("[yellow]Not yet implemented - run legacy script:[/yellow]") - console.print(f" ./scripts/check_positions.sh {cardset_id} {api_url}") + try: + import sys + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + from db_calls import db_get - raise typer.Exit(0) + async def run_validation(): + # Get all players in cardset + console.print(f"Fetching players from cardset {cardset_id}...") + players = await db_get('players', params=[('cardset_id', cardset_id)]) + + if players is None or players['count'] == 0: + console.print(f"[yellow]No players found in cardset {cardset_id}[/yellow]") + return + + console.print(f"Found {players['count']} players") + console.print() + + # Count positions + pos_counts = {} + dh_players = [] + + for player in players['players']: + pos_1 = player.get('pos_1', '') + if pos_1: + pos_counts[pos_1] = pos_counts.get(pos_1, 0) + 1 + if pos_1 == 'DH': + dh_players.append(f"{player.get('p_name', 'Unknown')} (ID: {player['player_id']})") + + # Display position counts + from rich.table import Table + table = Table(title="Position Distribution") + table.add_column("Position") + table.add_column("Count", justify="right") + + for pos in sorted(pos_counts.keys()): + count = pos_counts[pos] + style = "red" if pos == 'DH' and count > 5 else None + table.add_row(pos, str(count), style=style) + + console.print(table) + console.print() + + # Check for anomalies + issues = [] + + dh_count = pos_counts.get('DH', 0) + if dh_count > 5: + issues.append(f"[red]⚠ Anomalous DH count: {dh_count} (should be <5 for full-season cards)[/red]") + console.print("DH Players:") + for p in dh_players[:10]: + console.print(f" - {p}") + if len(dh_players) > 10: + console.print(f" ... and {len(dh_players) - 10} more") + console.print() + + for pos in ['LF', 'CF', 'RF']: + if pos_counts.get(pos, 0) == 0: + issues.append(f"[red]⚠ Missing {pos} positions (indicates defensive calculation failures)[/red]") + + if issues: + console.print("[bold]Issues Found:[/bold]") + for issue in issues: + console.print(f" {issue}") + else: + console.print("[green]✓ No position anomalies detected[/green]") + + asyncio.run(run_validation()) + + except ImportError as e: + console.print(f"[red]Error importing modules: {e}[/red]") + raise typer.Exit(1) + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + raise typer.Exit(1) + + +@app.command() +def defense( + year: int = typer.Argument(..., help="Season year to fetch defense stats for"), + output_dir: Path = typer.Option(None, "--output", "-o", help="Output directory for CSV files"), +): + """ + Fetch and store defensive statistics from Baseball Reference. + + Downloads fielding stats for all positions and saves to CSV files. + + Example: + pd-cards retrosheet defense 2005 --output "data-input/2005 Live Cardset/" + """ + console.print() + console.print("=" * 70) + console.print(f"[bold]FETCH DEFENSIVE STATS - {year}[/bold]") + console.print("=" * 70) + + if output_dir is None: + output_dir = Path(f"data-input/{year} Live Cardset/") + + console.print(f"Output directory: {output_dir}") + console.print() + + try: + import sys + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + + import defenders.calcs_defense as cde + + positions = ['c', '1b', '2b', '3b', 'ss', 'lf', 'cf', 'rf', 'of', 'p'] + + output_dir.mkdir(parents=True, exist_ok=True) + + async def fetch_defense(): + for position in positions: + console.print(f"Fetching {position.upper()} defensive stats...") + pos_df = cde.get_bbref_fielding_df(position, year) + output_file = output_dir / f"defense_{position}.csv" + pos_df.to_csv(output_file) + console.print(f" [green]✓ Saved {len(pos_df)} records to {output_file}[/green]") + await asyncio.sleep(8) # Rate limiting + + console.print() + console.print(f"[green]✓ All defensive stats saved to {output_dir}[/green]") + + asyncio.run(fetch_defense()) + + except ImportError as e: + console.print(f"[red]Error importing modules: {e}[/red]") + raise typer.Exit(1) + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + import traceback + traceback.print_exc() + raise typer.Exit(1) diff --git a/pd_cards/commands/scouting.py b/pd_cards/commands/scouting.py index 0312be8..c103bfe 100644 --- a/pd_cards/commands/scouting.py +++ b/pd_cards/commands/scouting.py @@ -4,8 +4,9 @@ Scouting report generation commands. Commands for generating scouting reports and ratings comparisons. """ +import asyncio from pathlib import Path -from typing import Optional +from typing import Optional, List import typer from rich.console import Console @@ -16,58 +17,199 @@ console = Console() @app.command() def batters( - cardset: str = typer.Option(..., "--cardset", "-c", help="Cardset name"), - output: Optional[Path] = typer.Option(None, "--output", "-o", help="Output directory"), + cardset_ids: Optional[List[int]] = typer.Option(None, "--cardset-id", "-c", help="Cardset ID(s) to include (can specify multiple)"), + output_dir: Path = typer.Option(Path("scouting"), "--output", "-o", help="Output directory"), ): """ Generate batting scouting reports. Creates CSV files with batting ratings and comparisons. + + Example: + pd-cards scouting batters --cardset-id 27 --cardset-id 29 """ console.print() console.print("=" * 70) - console.print(f"[bold]BATTING SCOUTING REPORT - {cardset}[/bold]") + console.print("[bold]BATTING SCOUTING REPORT[/bold]") console.print("=" * 70) - # TODO: Migrate logic from scouting_batters.py - console.print() - console.print("[yellow]Not yet implemented - run legacy script:[/yellow]") - console.print(" python scouting_batters.py") + if cardset_ids: + console.print(f"Cardset IDs: {cardset_ids}") + else: + console.print("Cardset IDs: All") - raise typer.Exit(0) + console.print(f"Output: {output_dir}") + console.print() + + try: + import sys + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + + import scouting_batters as sb + import copy + + async def run_scouting(): + # Ensure output directory exists + output_dir.mkdir(parents=True, exist_ok=True) + + console.print("Pulling scouting data...") + batting_dfs = await sb.get_scouting_dfs(cardset_ids or []) + console.print(f" Received {len(batting_dfs)} rows") + + console.print("Generating basic scouting report...") + await sb.post_calc_basic(copy.deepcopy(batting_dfs)) + console.print(f" [green]✓ Saved to {output_dir}/batting-basic.csv[/green]") + + console.print("Generating ratings guide...") + await sb.post_calc_ratings(copy.deepcopy(batting_dfs)) + console.print(f" [green]✓ Saved to {output_dir}/batting-ratings.csv[/green]") + + console.print() + console.print("[green]✓ Batting scouting complete[/green]") + + asyncio.run(run_scouting()) + + except ImportError as e: + console.print(f"[red]Error importing modules: {e}[/red]") + raise typer.Exit(1) + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + import traceback + traceback.print_exc() + raise typer.Exit(1) @app.command() def pitchers( - cardset: str = typer.Option(..., "--cardset", "-c", help="Cardset name"), - output: Optional[Path] = typer.Option(None, "--output", "-o", help="Output directory"), + cardset_ids: Optional[List[int]] = typer.Option(None, "--cardset-id", "-c", help="Cardset ID(s) to include (can specify multiple)"), + output_dir: Path = typer.Option(Path("scouting"), "--output", "-o", help="Output directory"), ): """ Generate pitching scouting reports. Creates CSV files with pitching ratings and comparisons. + + Example: + pd-cards scouting pitchers --cardset-id 27 --cardset-id 29 """ console.print() console.print("=" * 70) - console.print(f"[bold]PITCHING SCOUTING REPORT - {cardset}[/bold]") + console.print("[bold]PITCHING SCOUTING REPORT[/bold]") console.print("=" * 70) - # TODO: Migrate logic from scouting_pitchers.py - console.print() - console.print("[yellow]Not yet implemented - run legacy script:[/yellow]") - console.print(" python scouting_pitchers.py") + if cardset_ids: + console.print(f"Cardset IDs: {cardset_ids}") + else: + console.print("Cardset IDs: All") - raise typer.Exit(0) + console.print(f"Output: {output_dir}") + console.print() + + try: + import sys + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + + import scouting_pitchers as sp + import copy + + async def run_scouting(): + # Ensure output directory exists + output_dir.mkdir(parents=True, exist_ok=True) + + console.print("Pulling scouting data...") + pitching_dfs = await sp.get_scouting_dfs(cardset_ids or []) + console.print(f" Received {len(pitching_dfs)} rows") + + console.print("Generating basic scouting report...") + await sp.post_calc_basic(copy.deepcopy(pitching_dfs)) + console.print(f" [green]✓ Saved to {output_dir}/pitching-basic.csv[/green]") + + console.print("Generating ratings guide...") + await sp.post_calc_ratings(copy.deepcopy(pitching_dfs)) + console.print(f" [green]✓ Saved to {output_dir}/pitching-ratings.csv[/green]") + + console.print() + console.print("[green]✓ Pitching scouting complete[/green]") + + asyncio.run(run_scouting()) + + except ImportError as e: + console.print(f"[red]Error importing modules: {e}[/red]") + raise typer.Exit(1) + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + import traceback + traceback.print_exc() + raise typer.Exit(1) @app.command() -def upload( - cardset: str = typer.Option(..., "--cardset", "-c", help="Cardset name"), +def all( + cardset_ids: Optional[List[int]] = typer.Option(None, "--cardset-id", "-c", help="Cardset ID(s) to include"), + output_dir: Path = typer.Option(Path("scouting"), "--output", "-o", help="Output directory"), ): """ - Upload scouting reports to database. + Generate all scouting reports (batters and pitchers). - Uploads generated scouting CSV data to Paper Dynasty API. + Example: + pd-cards scouting all --cardset-id 27 """ console.print() - console.print("[yellow]Not yet implemented[/yellow]") + console.print("=" * 70) + console.print("[bold]FULL SCOUTING REPORT[/bold]") + console.print("=" * 70) + console.print() + + # Run batters + console.print("[bold]Phase 1: Batters[/bold]") + try: + import sys + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + + import scouting_batters as sb + import scouting_pitchers as sp + import copy + + async def run_all_scouting(): + output_dir.mkdir(parents=True, exist_ok=True) + + # Batters + console.print("Pulling batting data...") + batting_dfs = await sb.get_scouting_dfs(cardset_ids or []) + console.print(f" Received {len(batting_dfs)} batter rows") + + await sb.post_calc_basic(copy.deepcopy(batting_dfs)) + console.print(f" [green]✓ batting-basic.csv[/green]") + + await sb.post_calc_ratings(copy.deepcopy(batting_dfs)) + console.print(f" [green]✓ batting-ratings.csv[/green]") + + console.print() + console.print("[bold]Phase 2: Pitchers[/bold]") + + # Pitchers + console.print("Pulling pitching data...") + pitching_dfs = await sp.get_scouting_dfs(cardset_ids or []) + console.print(f" Received {len(pitching_dfs)} pitcher rows") + + await sp.post_calc_basic(copy.deepcopy(pitching_dfs)) + console.print(f" [green]✓ pitching-basic.csv[/green]") + + await sp.post_calc_ratings(copy.deepcopy(pitching_dfs)) + console.print(f" [green]✓ pitching-ratings.csv[/green]") + + console.print() + console.print("=" * 70) + console.print("[bold green]✓ ALL SCOUTING REPORTS COMPLETE[/bold green]") + console.print("=" * 70) + + asyncio.run(run_all_scouting()) + + except ImportError as e: + console.print(f"[red]Error importing modules: {e}[/red]") + raise typer.Exit(1) + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + import traceback + traceback.print_exc() + raise typer.Exit(1) diff --git a/pd_cards/commands/upload.py b/pd_cards/commands/upload.py index 4f38552..a2ec8e8 100644 --- a/pd_cards/commands/upload.py +++ b/pd_cards/commands/upload.py @@ -4,6 +4,7 @@ Card image upload commands. Commands for uploading card images to AWS S3. """ +import asyncio from pathlib import Path from typing import Optional @@ -20,12 +21,19 @@ def s3( start_id: Optional[int] = typer.Option(None, "--start-id", help="Player ID to start from (for resuming)"), limit: Optional[int] = typer.Option(None, "--limit", "-l", help="Limit number of cards to process"), html: bool = typer.Option(False, "--html", help="Upload HTML preview cards instead of PNG"), + skip_batters: bool = typer.Option(False, "--skip-batters", help="Skip batting cards"), + skip_pitchers: bool = typer.Option(False, "--skip-pitchers", help="Skip pitching cards"), + upload: bool = typer.Option(True, "--upload/--no-upload", help="Upload to S3"), + update_urls: bool = typer.Option(True, "--update-urls/--no-update-urls", help="Update player URLs in database"), dry_run: bool = typer.Option(False, "--dry-run", "-n", help="Preview without uploading"), ): """ Upload card images to AWS S3. Fetches card images from Paper Dynasty API and uploads to S3 bucket. + + Example: + pd-cards upload s3 --cardset "2005 Live" --limit 10 """ console.print() console.print("=" * 70) @@ -39,53 +47,176 @@ def s3( console.print(f"Limit: {limit} cards") if html: console.print("Mode: HTML preview cards") + if skip_batters: + console.print("Skipping: Batting cards") + if skip_pitchers: + console.print("Skipping: Pitching cards") + console.print(f"Upload to S3: {upload and not dry_run}") + console.print(f"Update URLs: {update_urls and not dry_run}") + console.print() if dry_run: console.print("[yellow]DRY RUN - no uploads will be made[/yellow]") + console.print() + console.print("[green]Validation passed - ready to run[/green]") + raise typer.Exit(0) - # TODO: Migrate logic from check_cards_and_upload.py - console.print() - console.print("[yellow]Not yet implemented - run legacy script:[/yellow]") - console.print(" python check_cards_and_upload.py") + try: + import sys + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) - raise typer.Exit(0) + import check_cards_and_upload as ccu + # Configure the module's globals + ccu.CARDSET_NAME = cardset + ccu.START_ID = start_id + ccu.TEST_COUNT = limit if limit else 9999 + ccu.HTML_CARDS = html + ccu.SKIP_BATS = skip_batters + ccu.SKIP_ARMS = skip_pitchers + ccu.UPLOAD_TO_S3 = upload + ccu.UPDATE_PLAYER_URLS = update_urls -@app.command() -def migrate( - cardset: str = typer.Option(..., "--cardset", "-c", help="Cardset name"), - dry_run: bool = typer.Option(False, "--dry-run", "-n", help="Preview without uploading"), -): - """ - Migrate all cards for a cardset to S3. + # Re-initialize S3 client if uploading + if upload: + import boto3 + ccu.s3_client = boto3.client('s3', region_name=ccu.AWS_REGION) + else: + ccu.s3_client = None - Bulk upload for initial cardset migration. - """ - console.print() - console.print("=" * 70) - console.print(f"[bold]S3 MIGRATION - {cardset}[/bold]") - console.print("=" * 70) + console.print("[bold]Starting S3 upload...[/bold]") + console.print() - if dry_run: - console.print("[yellow]DRY RUN - no uploads will be made[/yellow]") + asyncio.run(ccu.main([])) - # TODO: Migrate logic from migrate_all_cards_to_s3.py - console.print() - console.print("[yellow]Not yet implemented - run legacy script:[/yellow]") - console.print(" python migrate_all_cards_to_s3.py") + console.print() + console.print("=" * 70) + console.print(f"[bold green]✓ S3 UPLOAD COMPLETE[/bold green]") + console.print("=" * 70) - raise typer.Exit(0) + except ImportError as e: + console.print(f"[red]Error importing modules: {e}[/red]") + raise typer.Exit(1) + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + import traceback + traceback.print_exc() + raise typer.Exit(1) @app.command() def refresh( cardset: str = typer.Option(..., "--cardset", "-c", help="Cardset name"), + limit: Optional[int] = typer.Option(None, "--limit", "-l", help="Limit number of cards"), + dry_run: bool = typer.Option(False, "--dry-run", "-n", help="Preview without refreshing"), ): """ Refresh card images for a cardset. Re-generates and re-uploads card images. + + Example: + pd-cards upload refresh --cardset "2005 Live" --limit 10 """ console.print() - console.print("[yellow]Not yet implemented - run legacy script:[/yellow]") - console.print(" python refresh_cards.py") + console.print("=" * 70) + console.print(f"[bold]CARD REFRESH - {cardset}[/bold]") + console.print("=" * 70) + + console.print(f"Cardset: {cardset}") + if limit: + console.print(f"Limit: {limit} cards") + + if dry_run: + console.print("[yellow]DRY RUN - no changes will be made[/yellow]") + console.print() + console.print("[green]Validation passed - ready to run[/green]") + raise typer.Exit(0) + + try: + import sys + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + + import refresh_cards as rc + + # Configure the module + rc.CARDSET_NAME = cardset + rc.TEST_COUNT = limit if limit else 9999 + + console.print("[bold]Starting card refresh...[/bold]") + console.print() + + asyncio.run(rc.main([])) + + console.print() + console.print("=" * 70) + console.print(f"[bold green]✓ CARD REFRESH COMPLETE[/bold green]") + console.print("=" * 70) + + except ImportError as e: + console.print(f"[red]Error importing modules: {e}[/red]") + console.print("Try: python refresh_cards.py") + raise typer.Exit(1) + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + import traceback + traceback.print_exc() + raise typer.Exit(1) + + +@app.command() +def check( + cardset: str = typer.Option(..., "--cardset", "-c", help="Cardset name"), + limit: Optional[int] = typer.Option(None, "--limit", "-l", help="Limit number of cards to check"), + output_dir: Path = typer.Option(Path("data-output"), "--output", "-o", help="Output directory"), +): + """ + Check and validate card images without uploading. + + Downloads card images and saves locally for review. + + Example: + pd-cards upload check --cardset "2005 Live" --limit 10 + """ + console.print() + console.print("=" * 70) + console.print(f"[bold]CARD CHECK - {cardset}[/bold]") + console.print("=" * 70) + + console.print(f"Cardset: {cardset}") + if limit: + console.print(f"Limit: {limit} cards") + console.print(f"Output: {output_dir}") + console.print() + + try: + import sys + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + + import check_cards_and_upload as ccu + + # Configure for check-only mode + ccu.CARDSET_NAME = cardset + ccu.START_ID = None + ccu.TEST_COUNT = limit if limit else 9999 + ccu.HTML_CARDS = False + ccu.UPLOAD_TO_S3 = False + ccu.UPDATE_PLAYER_URLS = False + ccu.s3_client = None + + console.print("[bold]Starting card check...[/bold]") + console.print() + + asyncio.run(ccu.main([])) + + console.print() + console.print("[green]✓ Card check complete[/green]") + + except ImportError as e: + console.print(f"[red]Error importing modules: {e}[/red]") + raise typer.Exit(1) + except Exception as e: + console.print(f"[red]Error: {e}[/red]") + import traceback + traceback.print_exc() + raise typer.Exit(1)