paper-dynasty-card-creation/test_nan_handling.py
2025-11-08 16:57:35 -06:00

132 lines
4.7 KiB
Python

"""
Test NaN handling in rarity assignment.
"""
import pandas as pd
import numpy as np
def test_nan_handling():
"""Test that NaN values in new_rarity_id are properly filled."""
print("Testing NaN Handling in Rarity Assignment")
print("=" * 80)
# Case 1: Column exists with some NaN values
print("\nCase 1: Column exists with NaN values")
total_ratings = pd.DataFrame({
'player_id': [1, 2, 3, 4, 5],
'new_rarity_id': [1, 2, np.nan, 3, np.nan]
})
print(f"Before fix:")
print(f" new_rarity_id values: {total_ratings['new_rarity_id'].tolist()}")
print(f" Has NaN: {total_ratings['new_rarity_id'].isna().any()}")
# Apply fix
if 'new_rarity_id' not in total_ratings.columns:
total_ratings['new_rarity_id'] = 5
elif total_ratings['new_rarity_id'].isna().any():
total_ratings['new_rarity_id'] = total_ratings['new_rarity_id'].fillna(5)
print(f"After fix:")
print(f" new_rarity_id values: {total_ratings['new_rarity_id'].tolist()}")
print(f" Has NaN: {total_ratings['new_rarity_id'].isna().any()}")
assert not total_ratings['new_rarity_id'].isna().any(), "Should have no NaN values"
assert total_ratings['new_rarity_id'].iloc[2] == 5, "NaN should be replaced with 5"
assert total_ratings['new_rarity_id'].iloc[4] == 5, "NaN should be replaced with 5"
print(" ✓ Pass")
# Case 2: Column doesn't exist at all
print("\nCase 2: Column doesn't exist")
total_ratings = pd.DataFrame({
'player_id': [1, 2, 3, 4, 5],
'other_column': ['a', 'b', 'c', 'd', 'e']
})
print(f"Before fix:")
print(f" Columns: {total_ratings.columns.tolist()}")
print(f" Has new_rarity_id: {'new_rarity_id' in total_ratings.columns}")
# Apply fix
if 'new_rarity_id' not in total_ratings.columns:
total_ratings['new_rarity_id'] = 5
elif total_ratings['new_rarity_id'].isna().any():
total_ratings['new_rarity_id'] = total_ratings['new_rarity_id'].fillna(5)
print(f"After fix:")
print(f" Columns: {total_ratings.columns.tolist()}")
print(f" new_rarity_id values: {total_ratings['new_rarity_id'].tolist()}")
assert 'new_rarity_id' in total_ratings.columns, "Column should be created"
assert (total_ratings['new_rarity_id'] == 5).all(), "All values should be 5"
print(" ✓ Pass")
# Case 3: Column exists with all valid values
print("\nCase 3: Column exists with all valid values")
total_ratings = pd.DataFrame({
'player_id': [1, 2, 3, 4, 5],
'new_rarity_id': [1, 2, 3, 4, 5]
})
print(f"Before fix:")
print(f" new_rarity_id values: {total_ratings['new_rarity_id'].tolist()}")
print(f" Has NaN: {total_ratings['new_rarity_id'].isna().any()}")
# Apply fix
if 'new_rarity_id' not in total_ratings.columns:
total_ratings['new_rarity_id'] = 5
elif total_ratings['new_rarity_id'].isna().any():
total_ratings['new_rarity_id'] = total_ratings['new_rarity_id'].fillna(5)
print(f"After fix:")
print(f" new_rarity_id values: {total_ratings['new_rarity_id'].tolist()}")
print(f" Has NaN: {total_ratings['new_rarity_id'].isna().any()}")
assert total_ratings['new_rarity_id'].tolist() == [1, 2, 3, 4, 5], "Values should be unchanged"
print(" ✓ Pass")
# Case 4: Simulate get_player_updates with NaN
print("\nCase 4: Test in get_player_updates context")
base_costs = {1: 810, 2: 270, 3: 90, 4: 30, 5: 10, 99: 2400}
average_ops = {1: 1.066, 2: 0.938, 3: 0.844, 4: 0.752, 5: 0.612}
player_data = pd.DataFrame({
'player_id': [1, 2, 3],
'cost': [99999, 99999, 99999],
'new_rarity_id': [1, 5, np.nan],
'total_OPS': [1.1, 0.6, np.nan]
})
print(f"Player data before processing:")
print(player_data)
# Fix NaN rarity
player_data['new_rarity_id'] = player_data['new_rarity_id'].fillna(5)
print(f"\nAfter NaN fix:")
print(player_data)
# Now try the cost calculation
try:
for idx, row in player_data.iterrows():
if row['cost'] == 99999 and not pd.isna(row['new_rarity_id']):
# This should work now
new_cost = round(
base_costs[int(row['new_rarity_id'])] *
(row['total_OPS'] if not pd.isna(row['total_OPS']) else 1.0) /
average_ops[int(row['new_rarity_id'])]
)
print(f" Player {row['player_id']}: rarity={int(row['new_rarity_id'])}, cost={new_cost}")
print(" ✓ Pass - No ValueError")
except ValueError as e:
print(f" ✗ Fail - ValueError: {e}")
raise
if __name__ == '__main__':
test_nan_handling()
print("\n" + "=" * 80)
print("✅ All NaN handling tests passed!")
print("=" * 80)