174 lines
7.3 KiB
Python
174 lines
7.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Add resolution columns to the CSV files for manual review
|
|
"""
|
|
import csv
|
|
import logging
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger('UpdateCSV')
|
|
|
|
def add_resolution_column_to_unmatched():
|
|
"""Add resolution column to unmatched players CSV"""
|
|
input_file = '/mnt/NV2/Development/major-domo/database/unmatched_players_for_review.csv'
|
|
output_file = '/mnt/NV2/Development/major-domo/database/unmatched_players_for_review_updated.csv'
|
|
|
|
with open(input_file, 'r') as infile:
|
|
reader = csv.DictReader(infile)
|
|
|
|
# Read all rows
|
|
rows = list(reader)
|
|
|
|
# Add resolution column to each row
|
|
for row in rows:
|
|
row['resolution'] = '' # Empty for manual filling
|
|
|
|
# Add some helpful comments based on the suggested match
|
|
if row['suggested_sbaplayer_id'] and not row['suggested_sbaplayer_id'].startswith('PARTIAL:'):
|
|
row['resolution'] = 'ACCEPT' # Pre-fill exact matches as ACCEPT
|
|
elif row['suggested_sbaplayer_id'].startswith('PARTIAL:'):
|
|
row['resolution'] = 'REVIEW' # Mark partial matches for review
|
|
else:
|
|
row['resolution'] = 'CREATE_NEW' # Default for no suggestions
|
|
|
|
# Write updated CSV
|
|
with open(output_file, 'w', newline='') as outfile:
|
|
fieldnames = list(reader.fieldnames) + ['resolution']
|
|
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
|
|
|
|
writer.writeheader()
|
|
writer.writerows(rows)
|
|
|
|
logger.info(f"Updated unmatched players CSV: {output_file}")
|
|
return output_file
|
|
|
|
def add_resolution_column_to_high_risk():
|
|
"""Add resolution column to high risk matches CSV"""
|
|
input_file = '/mnt/NV2/Development/major-domo/database/high_risk_player_matches.csv'
|
|
output_file = '/mnt/NV2/Development/major-domo/database/high_risk_player_matches_updated.csv'
|
|
|
|
with open(input_file, 'r') as infile:
|
|
reader = csv.DictReader(infile)
|
|
|
|
# Read all rows
|
|
rows = list(reader)
|
|
|
|
# Add resolution column to each row
|
|
for row in rows:
|
|
row['resolution'] = '' # Empty for manual filling
|
|
|
|
# Add helpful pre-fills based on risk type and bbref_id presence
|
|
if row['risk_type'] == 'sbaplayer_conflict':
|
|
# Check if both have bbref_ids - if so, they are definitely different people
|
|
sba1_bbref = row['sba1_bbref'].strip()
|
|
sba2_bbref = row['sba2_bbref'].strip()
|
|
|
|
if sba1_bbref and sba2_bbref and sba1_bbref != sba2_bbref:
|
|
# Different bbref_ids = definitely different people
|
|
row['resolution'] = 'DIFFERENT_PEOPLE'
|
|
elif not sba1_bbref and not sba2_bbref:
|
|
# Neither has bbref_id - could be duplicate
|
|
if row['similarity_score'] == '1.000':
|
|
row['resolution'] = f"MERGE_{row['sba2_id']}_INTO_{row['sba1_id']}"
|
|
else:
|
|
row['resolution'] = 'DIFFERENT_PEOPLE'
|
|
elif sba1_bbref and not sba2_bbref:
|
|
# One has bbref_id, one doesn't - could be duplicate needing bbref_id
|
|
row['resolution'] = f"MERGE_{row['sba2_id']}_INTO_{row['sba1_id']}"
|
|
elif sba2_bbref and not sba1_bbref:
|
|
# One has bbref_id, one doesn't - could be duplicate needing bbref_id
|
|
row['resolution'] = f"MERGE_{row['sba1_id']}_INTO_{row['sba2_id']}"
|
|
else:
|
|
# Same bbref_id - definitely duplicate
|
|
row['resolution'] = f"MERGE_{row['sba2_id']}_INTO_{row['sba1_id']}"
|
|
|
|
elif row['risk_type'] == 'player_ambiguous_match':
|
|
row['resolution'] = f"USE_SBA_{row['sba1_id']}" # Pre-select first option
|
|
elif row['risk_type'] == 'middle_initial_conflict':
|
|
row['resolution'] = 'DIFFERENT_PEOPLE'
|
|
|
|
# Write updated CSV
|
|
with open(output_file, 'w', newline='') as outfile:
|
|
fieldnames = list(reader.fieldnames) + ['resolution']
|
|
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
|
|
|
|
writer.writeheader()
|
|
writer.writerows(rows)
|
|
|
|
logger.info(f"Updated high risk matches CSV: {output_file}")
|
|
return output_file
|
|
|
|
def create_instructions_file():
|
|
"""Create instructions file for manual review"""
|
|
instructions = """
|
|
# CSV REVIEW INSTRUCTIONS
|
|
|
|
## File 1: unmatched_players_for_review_updated.csv
|
|
|
|
**Resolution Column Values:**
|
|
- `ACCEPT` - Use the suggested_sbaplayer_id (pre-filled for exact matches)
|
|
- `USE_SBA_123` - Use specific SbaPlayer ID 123 instead of suggestion
|
|
- `CREATE_NEW` - Create new SbaPlayer record for this player
|
|
- `SKIP` - Skip this player for now (won't be processed)
|
|
- `REVIEW` - Needs manual review (pre-filled for partial matches)
|
|
|
|
**Pre-filled Values:**
|
|
- Exact name matches are pre-filled as `ACCEPT`
|
|
- Partial matches are marked as `REVIEW`
|
|
- No suggestions are marked as `CREATE_NEW`
|
|
|
|
## File 2: high_risk_player_matches_updated.csv
|
|
|
|
**Resolution Column Values:**
|
|
- `MERGE_123_INTO_456` - Merge SbaPlayer 123 into SbaPlayer 456
|
|
- `DIFFERENT_PEOPLE` - These are actually different people, keep separate
|
|
- `USE_SBA_123` - For player matches, use this specific SbaPlayer ID
|
|
- `CREATE_NEW` - Create new SbaPlayer record
|
|
- `SKIP` - Skip this for now
|
|
|
|
**Pre-filled Logic for SbaPlayer Conflicts:**
|
|
- Different bbref_ids = `DIFFERENT_PEOPLE` (bbref_ids are globally unique)
|
|
- Same bbref_id = `MERGE` (definitely duplicates)
|
|
- One has bbref_id, one doesn't = `MERGE` suggestion (review needed)
|
|
- Neither has bbref_id + identical names = `MERGE` suggestion
|
|
- Player ambiguous matches pre-select the first suggested SbaPlayer
|
|
- Middle initial conflicts are marked as `DIFFERENT_PEOPLE`
|
|
|
|
## Important Notes:
|
|
- **bbref_ids are globally unique** - trust them completely
|
|
- If two SbaPlayers have different bbref_ids, they are different people
|
|
- If one has bbref_id and one doesn't, they might be the same person
|
|
|
|
## Next Steps:
|
|
1. Review and edit the resolution columns in both files
|
|
2. Save the files when done
|
|
3. Let Claude know you're ready to process the changes
|
|
|
|
## Common Patterns:
|
|
- bbref_id mismatches (like "HALP") should usually be `CREATE_NEW`
|
|
- Different bbref_ids = always different people
|
|
- Common names like "Carlos Martinez" need careful review
|
|
- Middle initials usually indicate different people
|
|
"""
|
|
|
|
with open('/mnt/NV2/Development/major-domo/database/CSV_REVIEW_INSTRUCTIONS.txt', 'w') as f:
|
|
f.write(instructions)
|
|
|
|
logger.info("Created instructions file: CSV_REVIEW_INSTRUCTIONS.txt")
|
|
|
|
def main():
|
|
"""Update both CSV files with resolution columns"""
|
|
logger.info("Adding resolution columns to CSV files...")
|
|
|
|
unmatched_file = add_resolution_column_to_unmatched()
|
|
high_risk_file = add_resolution_column_to_high_risk()
|
|
create_instructions_file()
|
|
|
|
logger.info(f"\n=== CSV FILES UPDATED ===")
|
|
logger.info(f"Unmatched players: {unmatched_file}")
|
|
logger.info(f"High risk matches: {high_risk_file}")
|
|
logger.info(f"Instructions: CSV_REVIEW_INSTRUCTIONS.txt")
|
|
logger.info(f"\nPlease review and edit the 'resolution' column in both files.")
|
|
|
|
if __name__ == "__main__":
|
|
main() |