#!/usr/bin/env python3 """ API Data Integrity Test Suite Compares data between localhost PostgreSQL API and production SQLite API to identify and validate data migration issues. Usage: python api_data_integrity_tests.py python api_data_integrity_tests.py --verbose python api_data_integrity_tests.py --test players """ import requests import json import sys import argparse from typing import Dict, List, Any, Tuple from dataclasses import dataclass from datetime import datetime import logging # API Configuration LOCALHOST_API = "http://localhost:801/api/v3" PRODUCTION_API = "https://sba.manticorum.com/api/v3" # Test Configuration TEST_SEASON = 10 SAMPLE_PLAYER_IDS = [9916, 9958, 9525, 9349, 9892] # Known problematic + some others SAMPLE_TEAM_IDS = [404, 428, 443, 422, 425] SAMPLE_GAME_IDS = [1571, 1458, 1710] @dataclass class TestResult: """Container for test results""" test_name: str passed: bool localhost_data: Any production_data: Any error_message: str = "" details: Dict[str, Any] = None class APIDataIntegrityTester: """Test suite for comparing API data between localhost and production""" def __init__(self, verbose: bool = False): self.verbose = verbose self.results: List[TestResult] = [] self.setup_logging() def setup_logging(self): """Configure logging""" level = logging.DEBUG if self.verbose else logging.INFO log_filename = f'logs/api_integrity_test_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log' logging.basicConfig( level=level, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(), logging.FileHandler(log_filename) ] ) self.logger = logging.getLogger(__name__) def make_request(self, base_url: str, endpoint: str, params: Dict = None) -> Tuple[bool, Any]: """Make API request with error handling""" try: url = f"{base_url}{endpoint}" self.logger.debug(f"Making request to: {url} with params: {params}") response = requests.get(url, params=params, timeout=30) response.raise_for_status() return True, response.json() except requests.exceptions.RequestException as e: self.logger.error(f"Request failed for {base_url}{endpoint}: {e}") return False, str(e) except json.JSONDecodeError as e: self.logger.error(f"JSON decode failed for {base_url}{endpoint}: {e}") return False, f"Invalid JSON response: {e}" def compare_player_data(self, player_id: int) -> TestResult: """Compare player data between APIs""" test_name = f"Player ID {player_id} Data Comparison" # Get data from both APIs localhost_success, localhost_data = self.make_request(LOCALHOST_API, f"/players/{player_id}") production_success, production_data = self.make_request(PRODUCTION_API, f"/players/{player_id}") if not localhost_success or not production_success: return TestResult( test_name=test_name, passed=False, localhost_data=localhost_data if localhost_success else None, production_data=production_data if production_success else None, error_message="API request failed" ) # Compare key fields fields_to_compare = ['id', 'name', 'pos_1', 'season'] differences = {} for field in fields_to_compare: localhost_val = localhost_data.get(field) production_val = production_data.get(field) if localhost_val != production_val: differences[field] = { 'localhost': localhost_val, 'production': production_val } passed = len(differences) == 0 error_msg = f"Field differences: {differences}" if differences else "" return TestResult( test_name=test_name, passed=passed, localhost_data=localhost_data, production_data=production_data, error_message=error_msg, details={'differences': differences} ) def compare_batting_stats(self, params: Dict) -> TestResult: """Compare batting statistics between APIs""" test_name = f"Batting Stats Comparison: {params}" # Ensure season is included if 'season' not in params: params['season'] = TEST_SEASON localhost_success, localhost_data = self.make_request(LOCALHOST_API, "/plays/batting", params) production_success, production_data = self.make_request(PRODUCTION_API, "/plays/batting", params) if not localhost_success or not production_success: return TestResult( test_name=test_name, passed=False, localhost_data=localhost_data if localhost_success else None, production_data=production_data if production_success else None, error_message="API request failed" ) # Compare counts and top results localhost_count = localhost_data.get('count', 0) production_count = production_data.get('count', 0) localhost_stats = localhost_data.get('stats', []) production_stats = production_data.get('stats', []) differences = {} # Compare counts if localhost_count != production_count: differences['count'] = { 'localhost': localhost_count, 'production': production_count } # Compare top 3 results if available top_n = min(3, len(localhost_stats), len(production_stats)) if top_n > 0: top_differences = [] for i in range(top_n): local_player = localhost_stats[i].get('player', {}) prod_player = production_stats[i].get('player', {}) if local_player.get('name') != prod_player.get('name'): top_differences.append({ 'rank': i + 1, 'localhost_player': local_player.get('name'), 'production_player': prod_player.get('name'), 'localhost_id': local_player.get('id'), 'production_id': prod_player.get('id') }) if top_differences: differences['top_players'] = top_differences passed = len(differences) == 0 error_msg = f"Differences found: {differences}" if differences else "" return TestResult( test_name=test_name, passed=passed, localhost_data={'count': localhost_count, 'top_3': localhost_stats[:3]}, production_data={'count': production_count, 'top_3': production_stats[:3]}, error_message=error_msg, details={'differences': differences} ) def compare_play_data(self, params: Dict) -> TestResult: """Compare play data between APIs""" test_name = f"Play Data Comparison: {params}" if 'season' not in params: params['season'] = TEST_SEASON localhost_success, localhost_data = self.make_request(LOCALHOST_API, "/plays", params) production_success, production_data = self.make_request(PRODUCTION_API, "/plays", params) if not localhost_success or not production_success: return TestResult( test_name=test_name, passed=False, localhost_data=localhost_data if localhost_success else None, production_data=production_data if production_success else None, error_message="API request failed" ) localhost_count = localhost_data.get('count', 0) production_count = production_data.get('count', 0) localhost_plays = localhost_data.get('plays', []) production_plays = production_data.get('plays', []) # Compare basic metrics differences = {} if localhost_count != production_count: differences['count'] = { 'localhost': localhost_count, 'production': production_count } # Compare first play if available if localhost_plays and production_plays: local_first = localhost_plays[0] prod_first = production_plays[0] key_fields = ['batter_id', 'pitcher_id', 'on_base_code', 'pa', 'hit'] first_play_diffs = {} for field in key_fields: if local_first.get(field) != prod_first.get(field): first_play_diffs[field] = { 'localhost': local_first.get(field), 'production': prod_first.get(field) } if first_play_diffs: differences['first_play'] = first_play_diffs passed = len(differences) == 0 error_msg = f"Differences found: {differences}" if differences else "" return TestResult( test_name=test_name, passed=passed, localhost_data={'count': localhost_count, 'sample_play': localhost_plays[0] if localhost_plays else None}, production_data={'count': production_count, 'sample_play': production_plays[0] if production_plays else None}, error_message=error_msg, details={'differences': differences} ) def test_known_problematic_players(self) -> List[TestResult]: """Test the specific players we know are problematic""" self.logger.info("Testing known problematic players...") results = [] for player_id in SAMPLE_PLAYER_IDS: result = self.compare_player_data(player_id) results.append(result) self.logger.info(f"Player {player_id}: {'PASS' if result.passed else 'FAIL'}") if not result.passed and self.verbose: self.logger.debug(f" Error: {result.error_message}") return results def test_batting_statistics(self) -> List[TestResult]: """Test various batting statistics endpoints""" self.logger.info("Testing batting statistics...") results = [] test_cases = [ # The original problematic query {'season': TEST_SEASON, 'group_by': 'playerteam', 'limit': 10, 'obc': '111', 'sort': 'repri-desc'}, # Basic season stats {'season': TEST_SEASON, 'group_by': 'player', 'limit': 5, 'sort': 'repri-desc'}, # Team level stats {'season': TEST_SEASON, 'group_by': 'team', 'limit': 5}, # Specific on-base situations {'season': TEST_SEASON, 'group_by': 'player', 'limit': 5, 'obc': '000'}, {'season': TEST_SEASON, 'group_by': 'player', 'limit': 5, 'obc': '100'}, ] for params in test_cases: result = self.compare_batting_stats(params) results.append(result) self.logger.info(f"Batting stats {params}: {'PASS' if result.passed else 'FAIL'}") if not result.passed and self.verbose: self.logger.debug(f" Error: {result.error_message}") return results def test_play_data(self) -> List[TestResult]: """Test play-by-play data""" self.logger.info("Testing play data...") results = [] test_cases = [ # Basic plays {'season': TEST_SEASON, 'limit': 5}, # Specific on-base codes {'season': TEST_SEASON, 'obc': '111', 'limit': 5}, {'season': TEST_SEASON, 'obc': '000', 'limit': 5}, # Player-specific plays {'season': TEST_SEASON, 'batter_id': '9916', 'limit': 5}, ] for params in test_cases: result = self.compare_play_data(params) results.append(result) self.logger.info(f"Play data {params}: {'PASS' if result.passed else 'FAIL'}") if not result.passed and self.verbose: self.logger.debug(f" Error: {result.error_message}") return results def test_api_connectivity(self) -> List[TestResult]: """Test basic API connectivity and health""" self.logger.info("Testing API connectivity...") results = [] # Test basic endpoints endpoints = [ ("/players", {'season': TEST_SEASON, 'limit': 1}), ("/teams", {'season': TEST_SEASON, 'limit': 1}), ("/plays", {'season': TEST_SEASON, 'limit': 1}), ] for endpoint, params in endpoints: test_name = f"API Connectivity: {endpoint}" localhost_success, localhost_data = self.make_request(LOCALHOST_API, endpoint, params) production_success, production_data = self.make_request(PRODUCTION_API, endpoint, params) passed = localhost_success and production_success error_msg = "" if not localhost_success: error_msg += f"Localhost failed: {localhost_data}. " if not production_success: error_msg += f"Production failed: {production_data}. " result = TestResult( test_name=test_name, passed=passed, localhost_data=localhost_data if localhost_success else None, production_data=production_data if production_success else None, error_message=error_msg.strip() ) results.append(result) self.logger.info(f"Connectivity {endpoint}: {'PASS' if result.passed else 'FAIL'}") return results def run_all_tests(self) -> None: """Run the complete test suite""" self.logger.info("Starting API Data Integrity Test Suite") self.logger.info(f"Localhost API: {LOCALHOST_API}") self.logger.info(f"Production API: {PRODUCTION_API}") self.logger.info(f"Test Season: {TEST_SEASON}") self.logger.info("=" * 60) # Run all test categories self.results.extend(self.test_api_connectivity()) self.results.extend(self.test_known_problematic_players()) self.results.extend(self.test_batting_statistics()) self.results.extend(self.test_play_data()) # Generate summary self.generate_summary() def run_specific_tests(self, test_category: str) -> None: """Run specific test category""" self.logger.info(f"Running {test_category} tests only") if test_category == "connectivity": self.results.extend(self.test_api_connectivity()) elif test_category == "players": self.results.extend(self.test_known_problematic_players()) elif test_category == "batting": self.results.extend(self.test_batting_statistics()) elif test_category == "plays": self.results.extend(self.test_play_data()) else: self.logger.error(f"Unknown test category: {test_category}") return self.generate_summary() def generate_summary(self) -> None: """Generate and display test summary""" total_tests = len(self.results) passed_tests = sum(1 for r in self.results if r.passed) failed_tests = total_tests - passed_tests self.logger.info("=" * 60) self.logger.info("TEST SUMMARY") self.logger.info("=" * 60) self.logger.info(f"Total Tests: {total_tests}") self.logger.info(f"Passed: {passed_tests}") self.logger.info(f"Failed: {failed_tests}") self.logger.info(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%" if total_tests > 0 else "No tests run") if failed_tests > 0: self.logger.info("\nFAILED TESTS:") self.logger.info("-" * 40) for result in self.results: if not result.passed: self.logger.info(f"❌ {result.test_name}") if result.error_message: self.logger.info(f" Error: {result.error_message}") if self.verbose and result.details: self.logger.info(f" Details: {json.dumps(result.details, indent=2)}") # Save detailed results to file self.save_detailed_results() def save_detailed_results(self) -> None: """Save detailed test results to JSON file""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"logs/api_integrity_results_{timestamp}.json" results_data = { 'timestamp': timestamp, 'localhost_api': LOCALHOST_API, 'production_api': PRODUCTION_API, 'test_season': TEST_SEASON, 'summary': { 'total_tests': len(self.results), 'passed': sum(1 for r in self.results if r.passed), 'failed': sum(1 for r in self.results if not r.passed) }, 'results': [ { 'test_name': r.test_name, 'passed': r.passed, 'error_message': r.error_message, 'localhost_data': r.localhost_data, 'production_data': r.production_data, 'details': r.details } for r in self.results ] } try: with open(filename, 'w') as f: json.dump(results_data, f, indent=2, default=str) self.logger.info(f"\nDetailed results saved to: {filename}") except Exception as e: self.logger.error(f"Failed to save results: {e}") def main(): """Main entry point""" parser = argparse.ArgumentParser(description="API Data Integrity Test Suite") parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output') parser.add_argument('--test', '-t', choices=['connectivity', 'players', 'batting', 'plays'], help='Run specific test category only') args = parser.parse_args() tester = APIDataIntegrityTester(verbose=args.verbose) try: if args.test: tester.run_specific_tests(args.test) else: tester.run_all_tests() except KeyboardInterrupt: print("\nTest suite interrupted by user") sys.exit(1) except Exception as e: print(f"Test suite failed with error: {e}") sys.exit(1) if __name__ == "__main__": main()