Fixed critical bug where all outfielders were incorrectly assigned as DH due to defense CSV column mismatch in retrosheet_data.py: - Lines 889, 926: Changed column check from 'in row' to 'in pos_df.columns' to correctly detect bis_runs_total availability - Line 947: Fixed fallback from non-existent 'tz_runs_outfield' to 'tz_runs_total' which actually exists in Baseball Reference CSVs Impact: - Before: 57 DH players, 0 outfield positions - After: 3 DH players, 62 outfielders (23 RF, 20 CF, 19 LF) Added scripts/check_positions.sh: - Validates position distribution after card generation - Flags anomalous DH counts (>5 or >10%) - Verifies outfield positions exist in cardpositions table - Provides quick smoke test for defensive calculations Updated CLAUDE.md: - Added Position Validation section with check_positions.sh usage - Documented outfield position bug in Common Issues & Solutions - Included code examples and verification steps 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
107 lines
3.5 KiB
Bash
Executable File
107 lines
3.5 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Usage: ./scripts/check_positions.sh <cardset_id> [api_url]
|
|
# Example: ./scripts/check_positions.sh 27
|
|
# Example: ./scripts/check_positions.sh 27 https://pddev.manticorum.com/api
|
|
|
|
CARDSET_ID=$1
|
|
API_URL=${2:-"https://pd.manticorum.com/api"}
|
|
|
|
if [ -z "$CARDSET_ID" ]; then
|
|
echo "Error: Cardset ID required"
|
|
echo "Usage: $0 <cardset_id> [api_url]"
|
|
echo "Example: $0 27"
|
|
echo "Example: $0 27 https://pddev.manticorum.com/api"
|
|
exit 1
|
|
fi
|
|
|
|
echo "======================================"
|
|
echo "Position Analysis for Cardset $CARDSET_ID"
|
|
echo "API: $API_URL"
|
|
echo "======================================"
|
|
echo ""
|
|
|
|
# Fetch players and save to temp file
|
|
TEMP_FILE=$(mktemp)
|
|
curl -s "$API_URL/v2/players?cardset_id=$CARDSET_ID" | jq -r '.players[] | "\(.p_name),\(.pos_1),\(.pos_2),\(.pos_3)"' > "$TEMP_FILE"
|
|
|
|
TOTAL_PLAYERS=$(wc -l < "$TEMP_FILE")
|
|
echo "Total players: $TOTAL_PLAYERS"
|
|
echo ""
|
|
|
|
# Position distribution
|
|
echo "=== Position 1 Distribution ==="
|
|
cat "$TEMP_FILE" | cut -d',' -f2 | sort | uniq -c | sort -rn
|
|
echo ""
|
|
|
|
# Count DHs
|
|
DH_COUNT=$(cat "$TEMP_FILE" | cut -d',' -f2 | grep -c "^DH$")
|
|
echo "=== DH Analysis ==="
|
|
echo "Total DH players: $DH_COUNT"
|
|
|
|
# Flag if DH count is anomalous (>5 for full season, >10% of total)
|
|
DH_PERCENT=$((DH_COUNT * 100 / TOTAL_PLAYERS))
|
|
if [ $DH_COUNT -gt 5 ] && [ $DH_PERCENT -gt 10 ]; then
|
|
echo "⚠️ WARNING: Unusually high number of DH players ($DH_COUNT = ${DH_PERCENT}%)"
|
|
echo " Expected: <5 for full season cards"
|
|
elif [ $DH_COUNT -gt 5 ]; then
|
|
echo "⚠️ NOTICE: Above-average DH count ($DH_COUNT)"
|
|
else
|
|
echo "✅ DH count is normal ($DH_COUNT)"
|
|
fi
|
|
echo ""
|
|
|
|
# Show DH players if count is suspicious
|
|
if [ $DH_COUNT -gt 5 ]; then
|
|
echo "=== DH Players (should mostly be full-time DHs) ==="
|
|
cat "$TEMP_FILE" | grep ",DH," | cut -d',' -f1 | head -20
|
|
if [ $DH_COUNT -gt 20 ]; then
|
|
echo "... and $((DH_COUNT - 20)) more"
|
|
fi
|
|
echo ""
|
|
fi
|
|
|
|
# Count outfielders
|
|
OF_COUNT=$(cat "$TEMP_FILE" | cut -d',' -f2 | grep -cE "^(LF|CF|RF)$")
|
|
echo "=== Outfield Analysis ==="
|
|
echo "Total outfielders: $OF_COUNT"
|
|
echo " LF: $(cat "$TEMP_FILE" | cut -d',' -f2 | grep -c "^LF$")"
|
|
echo " CF: $(cat "$TEMP_FILE" | cut -d',' -f2 | grep -c "^CF$")"
|
|
echo " RF: $(cat "$TEMP_FILE" | cut -d',' -f2 | grep -c "^RF$")"
|
|
|
|
# Flag if no outfielders (major bug)
|
|
if [ $OF_COUNT -eq 0 ]; then
|
|
echo "🚨 CRITICAL: No outfielders found! Defensive positions likely failed."
|
|
elif [ $OF_COUNT -lt 20 ]; then
|
|
echo "⚠️ WARNING: Very few outfielders ($OF_COUNT). Check defensive position calculations."
|
|
else
|
|
echo "✅ Outfield count looks normal ($OF_COUNT)"
|
|
fi
|
|
echo ""
|
|
|
|
# Check cardpositions table for outfield positions
|
|
echo "=== CardPositions Table Check ==="
|
|
CARDPOS_OF_COUNT=$(curl -s "$API_URL/v2/cardpositions?cardset_id=$CARDSET_ID" | jq '[.positions[] | select(.position | test("LF|CF|RF"))] | length')
|
|
echo "Outfield positions in cardpositions table: $CARDPOS_OF_COUNT"
|
|
|
|
if [ "$CARDPOS_OF_COUNT" = "0" ]; then
|
|
echo "🚨 CRITICAL: No outfield positions in database! Defensive calculations failed."
|
|
elif [ $CARDPOS_OF_COUNT -lt $((OF_COUNT - 5)) ]; then
|
|
echo "⚠️ WARNING: Fewer cardpositions than players with OF pos_1"
|
|
else
|
|
echo "✅ CardPositions table looks good"
|
|
fi
|
|
echo ""
|
|
|
|
# Sample outfielders to verify
|
|
echo "=== Sample Outfielders (for manual verification) ==="
|
|
cat "$TEMP_FILE" | grep -E ",LF,|,CF,|,RF," | head -5 | column -t -s','
|
|
echo ""
|
|
|
|
# Clean up
|
|
rm "$TEMP_FILE"
|
|
|
|
echo "======================================"
|
|
echo "Analysis complete!"
|
|
echo "======================================"
|