major-domo-database/.claude/sqlite-to-postgres/api_data_integrity_tests.py
Cal Corum 7130a1fd43 Postgres Migration
Migration documentation and scripts
2025-08-25 07:18:31 -05:00

482 lines
19 KiB
Python
Executable File

#!/usr/bin/env python3
"""
API Data Integrity Test Suite
Compares data between localhost PostgreSQL API and production SQLite API
to identify and validate data migration issues.
Usage:
python api_data_integrity_tests.py
python api_data_integrity_tests.py --verbose
python api_data_integrity_tests.py --test players
"""
import requests
import json
import sys
import argparse
from typing import Dict, List, Any, Tuple
from dataclasses import dataclass
from datetime import datetime
import logging
# API Configuration
LOCALHOST_API = "http://localhost:801/api/v3"
PRODUCTION_API = "https://sba.manticorum.com/api/v3"
# Test Configuration
TEST_SEASON = 10
SAMPLE_PLAYER_IDS = [9916, 9958, 9525, 9349, 9892] # Known problematic + some others
SAMPLE_TEAM_IDS = [404, 428, 443, 422, 425]
SAMPLE_GAME_IDS = [1571, 1458, 1710]
@dataclass
class TestResult:
"""Container for test results"""
test_name: str
passed: bool
localhost_data: Any
production_data: Any
error_message: str = ""
details: Dict[str, Any] = None
class APIDataIntegrityTester:
"""Test suite for comparing API data between localhost and production"""
def __init__(self, verbose: bool = False):
self.verbose = verbose
self.results: List[TestResult] = []
self.setup_logging()
def setup_logging(self):
"""Configure logging"""
level = logging.DEBUG if self.verbose else logging.INFO
log_filename = f'logs/api_integrity_test_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'
logging.basicConfig(
level=level,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(),
logging.FileHandler(log_filename)
]
)
self.logger = logging.getLogger(__name__)
def make_request(self, base_url: str, endpoint: str, params: Dict = None) -> Tuple[bool, Any]:
"""Make API request with error handling"""
try:
url = f"{base_url}{endpoint}"
self.logger.debug(f"Making request to: {url} with params: {params}")
response = requests.get(url, params=params, timeout=30)
response.raise_for_status()
return True, response.json()
except requests.exceptions.RequestException as e:
self.logger.error(f"Request failed for {base_url}{endpoint}: {e}")
return False, str(e)
except json.JSONDecodeError as e:
self.logger.error(f"JSON decode failed for {base_url}{endpoint}: {e}")
return False, f"Invalid JSON response: {e}"
def compare_player_data(self, player_id: int) -> TestResult:
"""Compare player data between APIs"""
test_name = f"Player ID {player_id} Data Comparison"
# Get data from both APIs
localhost_success, localhost_data = self.make_request(LOCALHOST_API, f"/players/{player_id}")
production_success, production_data = self.make_request(PRODUCTION_API, f"/players/{player_id}")
if not localhost_success or not production_success:
return TestResult(
test_name=test_name,
passed=False,
localhost_data=localhost_data if localhost_success else None,
production_data=production_data if production_success else None,
error_message="API request failed"
)
# Compare key fields
fields_to_compare = ['id', 'name', 'pos_1', 'season']
differences = {}
for field in fields_to_compare:
localhost_val = localhost_data.get(field)
production_val = production_data.get(field)
if localhost_val != production_val:
differences[field] = {
'localhost': localhost_val,
'production': production_val
}
passed = len(differences) == 0
error_msg = f"Field differences: {differences}" if differences else ""
return TestResult(
test_name=test_name,
passed=passed,
localhost_data=localhost_data,
production_data=production_data,
error_message=error_msg,
details={'differences': differences}
)
def compare_batting_stats(self, params: Dict) -> TestResult:
"""Compare batting statistics between APIs"""
test_name = f"Batting Stats Comparison: {params}"
# Ensure season is included
if 'season' not in params:
params['season'] = TEST_SEASON
localhost_success, localhost_data = self.make_request(LOCALHOST_API, "/plays/batting", params)
production_success, production_data = self.make_request(PRODUCTION_API, "/plays/batting", params)
if not localhost_success or not production_success:
return TestResult(
test_name=test_name,
passed=False,
localhost_data=localhost_data if localhost_success else None,
production_data=production_data if production_success else None,
error_message="API request failed"
)
# Compare counts and top results
localhost_count = localhost_data.get('count', 0)
production_count = production_data.get('count', 0)
localhost_stats = localhost_data.get('stats', [])
production_stats = production_data.get('stats', [])
differences = {}
# Compare counts
if localhost_count != production_count:
differences['count'] = {
'localhost': localhost_count,
'production': production_count
}
# Compare top 3 results if available
top_n = min(3, len(localhost_stats), len(production_stats))
if top_n > 0:
top_differences = []
for i in range(top_n):
local_player = localhost_stats[i].get('player', {})
prod_player = production_stats[i].get('player', {})
if local_player.get('name') != prod_player.get('name'):
top_differences.append({
'rank': i + 1,
'localhost_player': local_player.get('name'),
'production_player': prod_player.get('name'),
'localhost_id': local_player.get('id'),
'production_id': prod_player.get('id')
})
if top_differences:
differences['top_players'] = top_differences
passed = len(differences) == 0
error_msg = f"Differences found: {differences}" if differences else ""
return TestResult(
test_name=test_name,
passed=passed,
localhost_data={'count': localhost_count, 'top_3': localhost_stats[:3]},
production_data={'count': production_count, 'top_3': production_stats[:3]},
error_message=error_msg,
details={'differences': differences}
)
def compare_play_data(self, params: Dict) -> TestResult:
"""Compare play data between APIs"""
test_name = f"Play Data Comparison: {params}"
if 'season' not in params:
params['season'] = TEST_SEASON
localhost_success, localhost_data = self.make_request(LOCALHOST_API, "/plays", params)
production_success, production_data = self.make_request(PRODUCTION_API, "/plays", params)
if not localhost_success or not production_success:
return TestResult(
test_name=test_name,
passed=False,
localhost_data=localhost_data if localhost_success else None,
production_data=production_data if production_success else None,
error_message="API request failed"
)
localhost_count = localhost_data.get('count', 0)
production_count = production_data.get('count', 0)
localhost_plays = localhost_data.get('plays', [])
production_plays = production_data.get('plays', [])
# Compare basic metrics
differences = {}
if localhost_count != production_count:
differences['count'] = {
'localhost': localhost_count,
'production': production_count
}
# Compare first play if available
if localhost_plays and production_plays:
local_first = localhost_plays[0]
prod_first = production_plays[0]
key_fields = ['batter_id', 'pitcher_id', 'on_base_code', 'pa', 'hit']
first_play_diffs = {}
for field in key_fields:
if local_first.get(field) != prod_first.get(field):
first_play_diffs[field] = {
'localhost': local_first.get(field),
'production': prod_first.get(field)
}
if first_play_diffs:
differences['first_play'] = first_play_diffs
passed = len(differences) == 0
error_msg = f"Differences found: {differences}" if differences else ""
return TestResult(
test_name=test_name,
passed=passed,
localhost_data={'count': localhost_count, 'sample_play': localhost_plays[0] if localhost_plays else None},
production_data={'count': production_count, 'sample_play': production_plays[0] if production_plays else None},
error_message=error_msg,
details={'differences': differences}
)
def test_known_problematic_players(self) -> List[TestResult]:
"""Test the specific players we know are problematic"""
self.logger.info("Testing known problematic players...")
results = []
for player_id in SAMPLE_PLAYER_IDS:
result = self.compare_player_data(player_id)
results.append(result)
self.logger.info(f"Player {player_id}: {'PASS' if result.passed else 'FAIL'}")
if not result.passed and self.verbose:
self.logger.debug(f" Error: {result.error_message}")
return results
def test_batting_statistics(self) -> List[TestResult]:
"""Test various batting statistics endpoints"""
self.logger.info("Testing batting statistics...")
results = []
test_cases = [
# The original problematic query
{'season': TEST_SEASON, 'group_by': 'playerteam', 'limit': 10, 'obc': '111', 'sort': 'repri-desc'},
# Basic season stats
{'season': TEST_SEASON, 'group_by': 'player', 'limit': 5, 'sort': 'repri-desc'},
# Team level stats
{'season': TEST_SEASON, 'group_by': 'team', 'limit': 5},
# Specific on-base situations
{'season': TEST_SEASON, 'group_by': 'player', 'limit': 5, 'obc': '000'},
{'season': TEST_SEASON, 'group_by': 'player', 'limit': 5, 'obc': '100'},
]
for params in test_cases:
result = self.compare_batting_stats(params)
results.append(result)
self.logger.info(f"Batting stats {params}: {'PASS' if result.passed else 'FAIL'}")
if not result.passed and self.verbose:
self.logger.debug(f" Error: {result.error_message}")
return results
def test_play_data(self) -> List[TestResult]:
"""Test play-by-play data"""
self.logger.info("Testing play data...")
results = []
test_cases = [
# Basic plays
{'season': TEST_SEASON, 'limit': 5},
# Specific on-base codes
{'season': TEST_SEASON, 'obc': '111', 'limit': 5},
{'season': TEST_SEASON, 'obc': '000', 'limit': 5},
# Player-specific plays
{'season': TEST_SEASON, 'batter_id': '9916', 'limit': 5},
]
for params in test_cases:
result = self.compare_play_data(params)
results.append(result)
self.logger.info(f"Play data {params}: {'PASS' if result.passed else 'FAIL'}")
if not result.passed and self.verbose:
self.logger.debug(f" Error: {result.error_message}")
return results
def test_api_connectivity(self) -> List[TestResult]:
"""Test basic API connectivity and health"""
self.logger.info("Testing API connectivity...")
results = []
# Test basic endpoints
endpoints = [
("/players", {'season': TEST_SEASON, 'limit': 1}),
("/teams", {'season': TEST_SEASON, 'limit': 1}),
("/plays", {'season': TEST_SEASON, 'limit': 1}),
]
for endpoint, params in endpoints:
test_name = f"API Connectivity: {endpoint}"
localhost_success, localhost_data = self.make_request(LOCALHOST_API, endpoint, params)
production_success, production_data = self.make_request(PRODUCTION_API, endpoint, params)
passed = localhost_success and production_success
error_msg = ""
if not localhost_success:
error_msg += f"Localhost failed: {localhost_data}. "
if not production_success:
error_msg += f"Production failed: {production_data}. "
result = TestResult(
test_name=test_name,
passed=passed,
localhost_data=localhost_data if localhost_success else None,
production_data=production_data if production_success else None,
error_message=error_msg.strip()
)
results.append(result)
self.logger.info(f"Connectivity {endpoint}: {'PASS' if result.passed else 'FAIL'}")
return results
def run_all_tests(self) -> None:
"""Run the complete test suite"""
self.logger.info("Starting API Data Integrity Test Suite")
self.logger.info(f"Localhost API: {LOCALHOST_API}")
self.logger.info(f"Production API: {PRODUCTION_API}")
self.logger.info(f"Test Season: {TEST_SEASON}")
self.logger.info("=" * 60)
# Run all test categories
self.results.extend(self.test_api_connectivity())
self.results.extend(self.test_known_problematic_players())
self.results.extend(self.test_batting_statistics())
self.results.extend(self.test_play_data())
# Generate summary
self.generate_summary()
def run_specific_tests(self, test_category: str) -> None:
"""Run specific test category"""
self.logger.info(f"Running {test_category} tests only")
if test_category == "connectivity":
self.results.extend(self.test_api_connectivity())
elif test_category == "players":
self.results.extend(self.test_known_problematic_players())
elif test_category == "batting":
self.results.extend(self.test_batting_statistics())
elif test_category == "plays":
self.results.extend(self.test_play_data())
else:
self.logger.error(f"Unknown test category: {test_category}")
return
self.generate_summary()
def generate_summary(self) -> None:
"""Generate and display test summary"""
total_tests = len(self.results)
passed_tests = sum(1 for r in self.results if r.passed)
failed_tests = total_tests - passed_tests
self.logger.info("=" * 60)
self.logger.info("TEST SUMMARY")
self.logger.info("=" * 60)
self.logger.info(f"Total Tests: {total_tests}")
self.logger.info(f"Passed: {passed_tests}")
self.logger.info(f"Failed: {failed_tests}")
self.logger.info(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%" if total_tests > 0 else "No tests run")
if failed_tests > 0:
self.logger.info("\nFAILED TESTS:")
self.logger.info("-" * 40)
for result in self.results:
if not result.passed:
self.logger.info(f"{result.test_name}")
if result.error_message:
self.logger.info(f" Error: {result.error_message}")
if self.verbose and result.details:
self.logger.info(f" Details: {json.dumps(result.details, indent=2)}")
# Save detailed results to file
self.save_detailed_results()
def save_detailed_results(self) -> None:
"""Save detailed test results to JSON file"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"logs/api_integrity_results_{timestamp}.json"
results_data = {
'timestamp': timestamp,
'localhost_api': LOCALHOST_API,
'production_api': PRODUCTION_API,
'test_season': TEST_SEASON,
'summary': {
'total_tests': len(self.results),
'passed': sum(1 for r in self.results if r.passed),
'failed': sum(1 for r in self.results if not r.passed)
},
'results': [
{
'test_name': r.test_name,
'passed': r.passed,
'error_message': r.error_message,
'localhost_data': r.localhost_data,
'production_data': r.production_data,
'details': r.details
}
for r in self.results
]
}
try:
with open(filename, 'w') as f:
json.dump(results_data, f, indent=2, default=str)
self.logger.info(f"\nDetailed results saved to: {filename}")
except Exception as e:
self.logger.error(f"Failed to save results: {e}")
def main():
"""Main entry point"""
parser = argparse.ArgumentParser(description="API Data Integrity Test Suite")
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
parser.add_argument('--test', '-t', choices=['connectivity', 'players', 'batting', 'plays'],
help='Run specific test category only')
args = parser.parse_args()
tester = APIDataIntegrityTester(verbose=args.verbose)
try:
if args.test:
tester.run_specific_tests(args.test)
else:
tester.run_all_tests()
except KeyboardInterrupt:
print("\nTest suite interrupted by user")
sys.exit(1)
except Exception as e:
print(f"Test suite failed with error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()