#!/usr/bin/env python3 """ Add resolution columns to the CSV files for manual review """ import csv import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger('UpdateCSV') def add_resolution_column_to_unmatched(): """Add resolution column to unmatched players CSV""" input_file = '/mnt/NV2/Development/major-domo/database/unmatched_players_for_review.csv' output_file = '/mnt/NV2/Development/major-domo/database/unmatched_players_for_review_updated.csv' with open(input_file, 'r') as infile: reader = csv.DictReader(infile) # Read all rows rows = list(reader) # Add resolution column to each row for row in rows: row['resolution'] = '' # Empty for manual filling # Add some helpful comments based on the suggested match if row['suggested_sbaplayer_id'] and not row['suggested_sbaplayer_id'].startswith('PARTIAL:'): row['resolution'] = 'ACCEPT' # Pre-fill exact matches as ACCEPT elif row['suggested_sbaplayer_id'].startswith('PARTIAL:'): row['resolution'] = 'REVIEW' # Mark partial matches for review else: row['resolution'] = 'CREATE_NEW' # Default for no suggestions # Write updated CSV with open(output_file, 'w', newline='') as outfile: fieldnames = list(reader.fieldnames) + ['resolution'] writer = csv.DictWriter(outfile, fieldnames=fieldnames) writer.writeheader() writer.writerows(rows) logger.info(f"Updated unmatched players CSV: {output_file}") return output_file def add_resolution_column_to_high_risk(): """Add resolution column to high risk matches CSV""" input_file = '/mnt/NV2/Development/major-domo/database/high_risk_player_matches.csv' output_file = '/mnt/NV2/Development/major-domo/database/high_risk_player_matches_updated.csv' with open(input_file, 'r') as infile: reader = csv.DictReader(infile) # Read all rows rows = list(reader) # Add resolution column to each row for row in rows: row['resolution'] = '' # Empty for manual filling # Add helpful pre-fills based on risk type and bbref_id presence if row['risk_type'] == 'sbaplayer_conflict': # Check if both have bbref_ids - if so, they are definitely different people sba1_bbref = row['sba1_bbref'].strip() sba2_bbref = row['sba2_bbref'].strip() if sba1_bbref and sba2_bbref and sba1_bbref != sba2_bbref: # Different bbref_ids = definitely different people row['resolution'] = 'DIFFERENT_PEOPLE' elif not sba1_bbref and not sba2_bbref: # Neither has bbref_id - could be duplicate if row['similarity_score'] == '1.000': row['resolution'] = f"MERGE_{row['sba2_id']}_INTO_{row['sba1_id']}" else: row['resolution'] = 'DIFFERENT_PEOPLE' elif sba1_bbref and not sba2_bbref: # One has bbref_id, one doesn't - could be duplicate needing bbref_id row['resolution'] = f"MERGE_{row['sba2_id']}_INTO_{row['sba1_id']}" elif sba2_bbref and not sba1_bbref: # One has bbref_id, one doesn't - could be duplicate needing bbref_id row['resolution'] = f"MERGE_{row['sba1_id']}_INTO_{row['sba2_id']}" else: # Same bbref_id - definitely duplicate row['resolution'] = f"MERGE_{row['sba2_id']}_INTO_{row['sba1_id']}" elif row['risk_type'] == 'player_ambiguous_match': row['resolution'] = f"USE_SBA_{row['sba1_id']}" # Pre-select first option elif row['risk_type'] == 'middle_initial_conflict': row['resolution'] = 'DIFFERENT_PEOPLE' # Write updated CSV with open(output_file, 'w', newline='') as outfile: fieldnames = list(reader.fieldnames) + ['resolution'] writer = csv.DictWriter(outfile, fieldnames=fieldnames) writer.writeheader() writer.writerows(rows) logger.info(f"Updated high risk matches CSV: {output_file}") return output_file def create_instructions_file(): """Create instructions file for manual review""" instructions = """ # CSV REVIEW INSTRUCTIONS ## File 1: unmatched_players_for_review_updated.csv **Resolution Column Values:** - `ACCEPT` - Use the suggested_sbaplayer_id (pre-filled for exact matches) - `USE_SBA_123` - Use specific SbaPlayer ID 123 instead of suggestion - `CREATE_NEW` - Create new SbaPlayer record for this player - `SKIP` - Skip this player for now (won't be processed) - `REVIEW` - Needs manual review (pre-filled for partial matches) **Pre-filled Values:** - Exact name matches are pre-filled as `ACCEPT` - Partial matches are marked as `REVIEW` - No suggestions are marked as `CREATE_NEW` ## File 2: high_risk_player_matches_updated.csv **Resolution Column Values:** - `MERGE_123_INTO_456` - Merge SbaPlayer 123 into SbaPlayer 456 - `DIFFERENT_PEOPLE` - These are actually different people, keep separate - `USE_SBA_123` - For player matches, use this specific SbaPlayer ID - `CREATE_NEW` - Create new SbaPlayer record - `SKIP` - Skip this for now **Pre-filled Logic for SbaPlayer Conflicts:** - Different bbref_ids = `DIFFERENT_PEOPLE` (bbref_ids are globally unique) - Same bbref_id = `MERGE` (definitely duplicates) - One has bbref_id, one doesn't = `MERGE` suggestion (review needed) - Neither has bbref_id + identical names = `MERGE` suggestion - Player ambiguous matches pre-select the first suggested SbaPlayer - Middle initial conflicts are marked as `DIFFERENT_PEOPLE` ## Important Notes: - **bbref_ids are globally unique** - trust them completely - If two SbaPlayers have different bbref_ids, they are different people - If one has bbref_id and one doesn't, they might be the same person ## Next Steps: 1. Review and edit the resolution columns in both files 2. Save the files when done 3. Let Claude know you're ready to process the changes ## Common Patterns: - bbref_id mismatches (like "HALP") should usually be `CREATE_NEW` - Different bbref_ids = always different people - Common names like "Carlos Martinez" need careful review - Middle initials usually indicate different people """ with open('/mnt/NV2/Development/major-domo/database/CSV_REVIEW_INSTRUCTIONS.txt', 'w') as f: f.write(instructions) logger.info("Created instructions file: CSV_REVIEW_INSTRUCTIONS.txt") def main(): """Update both CSV files with resolution columns""" logger.info("Adding resolution columns to CSV files...") unmatched_file = add_resolution_column_to_unmatched() high_risk_file = add_resolution_column_to_high_risk() create_instructions_file() logger.info(f"\n=== CSV FILES UPDATED ===") logger.info(f"Unmatched players: {unmatched_file}") logger.info(f"High risk matches: {high_risk_file}") logger.info(f"Instructions: CSV_REVIEW_INSTRUCTIONS.txt") logger.info(f"\nPlease review and edit the 'resolution' column in both files.") if __name__ == "__main__": main()