paper-dynasty-card-creation/scripts/check_positions.sh
Cal Corum 5d7a0dd74b CLAUDE: Fix outfield position assignment bug and add validation script
Fixed critical bug where all outfielders were incorrectly assigned as DH
due to defense CSV column mismatch in retrosheet_data.py:

- Lines 889, 926: Changed column check from 'in row' to 'in pos_df.columns'
  to correctly detect bis_runs_total availability
- Line 947: Fixed fallback from non-existent 'tz_runs_outfield' to
  'tz_runs_total' which actually exists in Baseball Reference CSVs

Impact:
- Before: 57 DH players, 0 outfield positions
- After: 3 DH players, 62 outfielders (23 RF, 20 CF, 19 LF)

Added scripts/check_positions.sh:
- Validates position distribution after card generation
- Flags anomalous DH counts (>5 or >10%)
- Verifies outfield positions exist in cardpositions table
- Provides quick smoke test for defensive calculations

Updated CLAUDE.md:
- Added Position Validation section with check_positions.sh usage
- Documented outfield position bug in Common Issues & Solutions
- Included code examples and verification steps

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-09 11:38:36 -06:00

107 lines
3.5 KiB
Bash
Executable File

#!/bin/bash
# Usage: ./scripts/check_positions.sh <cardset_id> [api_url]
# Example: ./scripts/check_positions.sh 27
# Example: ./scripts/check_positions.sh 27 https://pddev.manticorum.com/api
CARDSET_ID=$1
API_URL=${2:-"https://pd.manticorum.com/api"}
if [ -z "$CARDSET_ID" ]; then
echo "Error: Cardset ID required"
echo "Usage: $0 <cardset_id> [api_url]"
echo "Example: $0 27"
echo "Example: $0 27 https://pddev.manticorum.com/api"
exit 1
fi
echo "======================================"
echo "Position Analysis for Cardset $CARDSET_ID"
echo "API: $API_URL"
echo "======================================"
echo ""
# Fetch players and save to temp file
TEMP_FILE=$(mktemp)
curl -s "$API_URL/v2/players?cardset_id=$CARDSET_ID" | jq -r '.players[] | "\(.p_name),\(.pos_1),\(.pos_2),\(.pos_3)"' > "$TEMP_FILE"
TOTAL_PLAYERS=$(wc -l < "$TEMP_FILE")
echo "Total players: $TOTAL_PLAYERS"
echo ""
# Position distribution
echo "=== Position 1 Distribution ==="
cat "$TEMP_FILE" | cut -d',' -f2 | sort | uniq -c | sort -rn
echo ""
# Count DHs
DH_COUNT=$(cat "$TEMP_FILE" | cut -d',' -f2 | grep -c "^DH$")
echo "=== DH Analysis ==="
echo "Total DH players: $DH_COUNT"
# Flag if DH count is anomalous (>5 for full season, >10% of total)
DH_PERCENT=$((DH_COUNT * 100 / TOTAL_PLAYERS))
if [ $DH_COUNT -gt 5 ] && [ $DH_PERCENT -gt 10 ]; then
echo "⚠️ WARNING: Unusually high number of DH players ($DH_COUNT = ${DH_PERCENT}%)"
echo " Expected: <5 for full season cards"
elif [ $DH_COUNT -gt 5 ]; then
echo "⚠️ NOTICE: Above-average DH count ($DH_COUNT)"
else
echo "✅ DH count is normal ($DH_COUNT)"
fi
echo ""
# Show DH players if count is suspicious
if [ $DH_COUNT -gt 5 ]; then
echo "=== DH Players (should mostly be full-time DHs) ==="
cat "$TEMP_FILE" | grep ",DH," | cut -d',' -f1 | head -20
if [ $DH_COUNT -gt 20 ]; then
echo "... and $((DH_COUNT - 20)) more"
fi
echo ""
fi
# Count outfielders
OF_COUNT=$(cat "$TEMP_FILE" | cut -d',' -f2 | grep -cE "^(LF|CF|RF)$")
echo "=== Outfield Analysis ==="
echo "Total outfielders: $OF_COUNT"
echo " LF: $(cat "$TEMP_FILE" | cut -d',' -f2 | grep -c "^LF$")"
echo " CF: $(cat "$TEMP_FILE" | cut -d',' -f2 | grep -c "^CF$")"
echo " RF: $(cat "$TEMP_FILE" | cut -d',' -f2 | grep -c "^RF$")"
# Flag if no outfielders (major bug)
if [ $OF_COUNT -eq 0 ]; then
echo "🚨 CRITICAL: No outfielders found! Defensive positions likely failed."
elif [ $OF_COUNT -lt 20 ]; then
echo "⚠️ WARNING: Very few outfielders ($OF_COUNT). Check defensive position calculations."
else
echo "✅ Outfield count looks normal ($OF_COUNT)"
fi
echo ""
# Check cardpositions table for outfield positions
echo "=== CardPositions Table Check ==="
CARDPOS_OF_COUNT=$(curl -s "$API_URL/v2/cardpositions?cardset_id=$CARDSET_ID" | jq '[.positions[] | select(.position | test("LF|CF|RF"))] | length')
echo "Outfield positions in cardpositions table: $CARDPOS_OF_COUNT"
if [ "$CARDPOS_OF_COUNT" = "0" ]; then
echo "🚨 CRITICAL: No outfield positions in database! Defensive calculations failed."
elif [ $CARDPOS_OF_COUNT -lt $((OF_COUNT - 5)) ]; then
echo "⚠️ WARNING: Fewer cardpositions than players with OF pos_1"
else
echo "✅ CardPositions table looks good"
fi
echo ""
# Sample outfielders to verify
echo "=== Sample Outfielders (for manual verification) ==="
cat "$TEMP_FILE" | grep -E ",LF,|,CF,|,RF," | head -5 | column -t -s','
echo ""
# Clean up
rm "$TEMP_FILE"
echo "======================================"
echo "Analysis complete!"
echo "======================================"