major-domo-database/.claude/sqlite-to-postgres/api_data_integrity_tests.py

#!/usr/bin/env python3
"""
API Data Integrity Test Suite

Compares data between localhost PostgreSQL API and production SQLite API
to identify and validate data migration issues.

Usage:
    python api_data_integrity_tests.py
    python api_data_integrity_tests.py --verbose
    python api_data_integrity_tests.py --test players
"""

import requests
import json
import sys
import argparse
from typing import Dict, List, Any, Tuple
from dataclasses import dataclass
from datetime import datetime
import logging

# API Configuration
LOCALHOST_API = "http://localhost:801/api/v3"
PRODUCTION_API = "https://sba.manticorum.com/api/v3"

# Test Configuration
TEST_SEASON = 10
SAMPLE_PLAYER_IDS = [9916, 9958, 9525, 9349, 9892]  # Known problematic + some others
SAMPLE_TEAM_IDS = [404, 428, 443, 422, 425]
SAMPLE_GAME_IDS = [1571, 1458, 1710]

@dataclass
class TestResult:
    """Container for test results"""
    test_name: str
    passed: bool
    localhost_data: Any
    production_data: Any
    error_message: str = ""
    details: Dict[str, Any] = None

class APIDataIntegrityTester:
    """Test suite for comparing API data between localhost and production"""

    def __init__(self, verbose: bool = False):
        self.verbose = verbose
        self.results: List[TestResult] = []
        self.setup_logging()

    def setup_logging(self):
        """Configure logging"""
        level = logging.DEBUG if self.verbose else logging.INFO
        log_filename = f'logs/api_integrity_test_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'
        logging.basicConfig(
            level=level,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[
                logging.StreamHandler(),
                logging.FileHandler(log_filename)
            ]
        )
        self.logger = logging.getLogger(__name__)

    def make_request(self, base_url: str, endpoint: str, params: Dict = None) -> Tuple[bool, Any]:
        """Make API request with error handling"""
        try:
            url = f"{base_url}{endpoint}"
            self.logger.debug(f"Making request to: {url} with params: {params}")

            response = requests.get(url, params=params, timeout=30)
            response.raise_for_status()

            return True, response.json()
        except requests.exceptions.RequestException as e:
            self.logger.error(f"Request failed for {base_url}{endpoint}: {e}")
            return False, str(e)
        except json.JSONDecodeError as e:
            self.logger.error(f"JSON decode failed for {base_url}{endpoint}: {e}")
            return False, f"Invalid JSON response: {e}"

    def compare_player_data(self, player_id: int) -> TestResult:
        """Compare player data between APIs"""
        test_name = f"Player ID {player_id} Data Comparison"

        # Get data from both APIs
        localhost_success, localhost_data = self.make_request(LOCALHOST_API, f"/players/{player_id}")
        production_success, production_data = self.make_request(PRODUCTION_API, f"/players/{player_id}")

        if not localhost_success or not production_success:
            return TestResult(
                test_name=test_name,
                passed=False,
                localhost_data=localhost_data if localhost_success else None,
                production_data=production_data if production_success else None,
                error_message="API request failed"
            )

        # Compare key fields
        fields_to_compare = ['id', 'name', 'pos_1', 'season']
        differences = {}

        for field in fields_to_compare:
            localhost_val = localhost_data.get(field)
            production_val = production_data.get(field)

            if localhost_val != production_val:
                differences[field] = {
                    'localhost': localhost_val,
                    'production': production_val
                }

        passed = len(differences) == 0
        error_msg = f"Field differences: {differences}" if differences else ""

        return TestResult(
            test_name=test_name,
            passed=passed,
            localhost_data=localhost_data,
            production_data=production_data,
            error_message=error_msg,
            details={'differences': differences}
        )

    def compare_batting_stats(self, params: Dict) -> TestResult:
        """Compare batting statistics between APIs"""
        test_name = f"Batting Stats Comparison: {params}"

        # Ensure season is included
        if 'season' not in params:
            params['season'] = TEST_SEASON

        localhost_success, localhost_data = self.make_request(LOCALHOST_API, "/plays/batting", params)
        production_success, production_data = self.make_request(PRODUCTION_API, "/plays/batting", params)

        if not localhost_success or not production_success:
            return TestResult(
                test_name=test_name,
                passed=False,
                localhost_data=localhost_data if localhost_success else None,
                production_data=production_data if production_success else None,
                error_message="API request failed"
            )

        # Compare counts and top results
        localhost_count = localhost_data.get('count', 0)
        production_count = production_data.get('count', 0)

        localhost_stats = localhost_data.get('stats', [])
        production_stats = production_data.get('stats', [])

        differences = {}

        # Compare counts
        if localhost_count != production_count:
            differences['count'] = {
                'localhost': localhost_count,
                'production': production_count
            }

        # Compare top 3 results if available
        top_n = min(3, len(localhost_stats), len(production_stats))
        if top_n > 0:
            top_differences = []
            for i in range(top_n):
                local_player = localhost_stats[i].get('player', {})
                prod_player = production_stats[i].get('player', {})

                if local_player.get('name') != prod_player.get('name'):
                    top_differences.append({
                        'rank': i + 1,
                        'localhost_player': local_player.get('name'),
                        'production_player': prod_player.get('name'),
                        'localhost_id': local_player.get('id'),
                        'production_id': prod_player.get('id')
                    })

            if top_differences:
                differences['top_players'] = top_differences

        passed = len(differences) == 0
        error_msg = f"Differences found: {differences}" if differences else ""

        return TestResult(
            test_name=test_name,
            passed=passed,
            localhost_data={'count': localhost_count, 'top_3': localhost_stats[:3]},
            production_data={'count': production_count, 'top_3': production_stats[:3]},
            error_message=error_msg,
            details={'differences': differences}
        )

    def compare_play_data(self, params: Dict) -> TestResult:
        """Compare play data between APIs"""
        test_name = f"Play Data Comparison: {params}"

        if 'season' not in params:
            params['season'] = TEST_SEASON

        localhost_success, localhost_data = self.make_request(LOCALHOST_API, "/plays", params)
        production_success, production_data = self.make_request(PRODUCTION_API, "/plays", params)

        if not localhost_success or not production_success:
            return TestResult(
                test_name=test_name,
                passed=False,
                localhost_data=localhost_data if localhost_success else None,
                production_data=production_data if production_success else None,
                error_message="API request failed"
            )

        localhost_count = localhost_data.get('count', 0)
        production_count = production_data.get('count', 0)

        localhost_plays = localhost_data.get('plays', [])
        production_plays = production_data.get('plays', [])

        # Compare basic metrics
        differences = {}
        if localhost_count != production_count:
            differences['count'] = {
                'localhost': localhost_count,
                'production': production_count
            }

        # Compare first play if available
        if localhost_plays and production_plays:
            local_first = localhost_plays[0]
            prod_first = production_plays[0]

            key_fields = ['batter_id', 'pitcher_id', 'on_base_code', 'pa', 'hit']
            first_play_diffs = {}

            for field in key_fields:
                if local_first.get(field) != prod_first.get(field):
                    first_play_diffs[field] = {
                        'localhost': local_first.get(field),
                        'production': prod_first.get(field)
                    }

            if first_play_diffs:
                differences['first_play'] = first_play_diffs

        passed = len(differences) == 0
        error_msg = f"Differences found: {differences}" if differences else ""

        return TestResult(
            test_name=test_name,
            passed=passed,
            localhost_data={'count': localhost_count, 'sample_play': localhost_plays[0] if localhost_plays else None},
            production_data={'count': production_count, 'sample_play': production_plays[0] if production_plays else None},
            error_message=error_msg,
            details={'differences': differences}
        )

    def test_known_problematic_players(self) -> List[TestResult]:
        """Test the specific players we know are problematic"""
        self.logger.info("Testing known problematic players...")
        results = []

        for player_id in SAMPLE_PLAYER_IDS:
            result = self.compare_player_data(player_id)
            results.append(result)
            self.logger.info(f"Player {player_id}: {'PASS' if result.passed else 'FAIL'}")
            if not result.passed and self.verbose:
                self.logger.debug(f"  Error: {result.error_message}")

        return results

    def test_batting_statistics(self) -> List[TestResult]:
        """Test various batting statistics endpoints"""
        self.logger.info("Testing batting statistics...")
        results = []

        test_cases = [
            # The original problematic query
            {'season': TEST_SEASON, 'group_by': 'playerteam', 'limit': 10, 'obc': '111', 'sort': 'repri-desc'},
            # Basic season stats
            {'season': TEST_SEASON, 'group_by': 'player', 'limit': 5, 'sort': 'repri-desc'},
            # Team level stats
            {'season': TEST_SEASON, 'group_by': 'team', 'limit': 5},
            # Specific on-base situations
            {'season': TEST_SEASON, 'group_by': 'player', 'limit': 5, 'obc': '000'},
            {'season': TEST_SEASON, 'group_by': 'player', 'limit': 5, 'obc': '100'},
        ]

        for params in test_cases:
            result = self.compare_batting_stats(params)
            results.append(result)
            self.logger.info(f"Batting stats {params}: {'PASS' if result.passed else 'FAIL'}")
            if not result.passed and self.verbose:
                self.logger.debug(f"  Error: {result.error_message}")

        return results

    def test_play_data(self) -> List[TestResult]:
        """Test play-by-play data"""
        self.logger.info("Testing play data...")
        results = []

        test_cases = [
            # Basic plays
            {'season': TEST_SEASON, 'limit': 5},
            # Specific on-base codes
            {'season': TEST_SEASON, 'obc': '111', 'limit': 5},
            {'season': TEST_SEASON, 'obc': '000', 'limit': 5},
            # Player-specific plays
            {'season': TEST_SEASON, 'batter_id': '9916', 'limit': 5},
        ]

        for params in test_cases:
            result = self.compare_play_data(params)
            results.append(result)
            self.logger.info(f"Play data {params}: {'PASS' if result.passed else 'FAIL'}")
            if not result.passed and self.verbose:
                self.logger.debug(f"  Error: {result.error_message}")

        return results

    def test_api_connectivity(self) -> List[TestResult]:
        """Test basic API connectivity and health"""
        self.logger.info("Testing API connectivity...")
        results = []

        # Test basic endpoints
        endpoints = [
            ("/players", {'season': TEST_SEASON, 'limit': 1}),
            ("/teams", {'season': TEST_SEASON, 'limit': 1}),
            ("/plays", {'season': TEST_SEASON, 'limit': 1}),
        ]

        for endpoint, params in endpoints:
            test_name = f"API Connectivity: {endpoint}"

            localhost_success, localhost_data = self.make_request(LOCALHOST_API, endpoint, params)
            production_success, production_data = self.make_request(PRODUCTION_API, endpoint, params)

            passed = localhost_success and production_success
            error_msg = ""
            if not localhost_success:
                error_msg += f"Localhost failed: {localhost_data}. "
            if not production_success:
                error_msg += f"Production failed: {production_data}. "

            result = TestResult(
                test_name=test_name,
                passed=passed,
                localhost_data=localhost_data if localhost_success else None,
                production_data=production_data if production_success else None,
                error_message=error_msg.strip()
            )

            results.append(result)
            self.logger.info(f"Connectivity {endpoint}: {'PASS' if result.passed else 'FAIL'}")

        return results

    def run_all_tests(self) -> None:
        """Run the complete test suite"""
        self.logger.info("Starting API Data Integrity Test Suite")
        self.logger.info(f"Localhost API: {LOCALHOST_API}")
        self.logger.info(f"Production API: {PRODUCTION_API}")
        self.logger.info(f"Test Season: {TEST_SEASON}")
        self.logger.info("=" * 60)

        # Run all test categories
        self.results.extend(self.test_api_connectivity())
        self.results.extend(self.test_known_problematic_players())
        self.results.extend(self.test_batting_statistics())
        self.results.extend(self.test_play_data())

        # Generate summary
        self.generate_summary()

    def run_specific_tests(self, test_category: str) -> None:
        """Run specific test category"""
        self.logger.info(f"Running {test_category} tests only")

        if test_category == "connectivity":
            self.results.extend(self.test_api_connectivity())
        elif test_category == "players":
            self.results.extend(self.test_known_problematic_players())
        elif test_category == "batting":
            self.results.extend(self.test_batting_statistics())
        elif test_category == "plays":
            self.results.extend(self.test_play_data())
        else:
            self.logger.error(f"Unknown test category: {test_category}")
            return

        self.generate_summary()

    def generate_summary(self) -> None:
        """Generate and display test summary"""
        total_tests = len(self.results)
        passed_tests = sum(1 for r in self.results if r.passed)
        failed_tests = total_tests - passed_tests

        self.logger.info("=" * 60)
        self.logger.info("TEST SUMMARY")
        self.logger.info("=" * 60)
        self.logger.info(f"Total Tests: {total_tests}")
        self.logger.info(f"Passed: {passed_tests}")
        self.logger.info(f"Failed: {failed_tests}")
        self.logger.info(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%" if total_tests > 0 else "No tests run")

        if failed_tests > 0:
            self.logger.info("\nFAILED TESTS:")
            self.logger.info("-" * 40)
            for result in self.results:
                if not result.passed:
                    self.logger.info(f"❌ {result.test_name}")
                    if result.error_message:
                        self.logger.info(f"   Error: {result.error_message}")
                    if self.verbose and result.details:
                        self.logger.info(f"   Details: {json.dumps(result.details, indent=2)}")

        # Save detailed results to file
        self.save_detailed_results()

    def save_detailed_results(self) -> None:
        """Save detailed test results to JSON file"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"logs/api_integrity_results_{timestamp}.json"

        results_data = {
            'timestamp': timestamp,
            'localhost_api': LOCALHOST_API,
            'production_api': PRODUCTION_API,
            'test_season': TEST_SEASON,
            'summary': {
                'total_tests': len(self.results),
                'passed': sum(1 for r in self.results if r.passed),
                'failed': sum(1 for r in self.results if not r.passed)
            },
            'results': [
                {
                    'test_name': r.test_name,
                    'passed': r.passed,
                    'error_message': r.error_message,
                    'localhost_data': r.localhost_data,
                    'production_data': r.production_data,
                    'details': r.details
                }
                for r in self.results
            ]
        }

        try:
            with open(filename, 'w') as f:
                json.dump(results_data, f, indent=2, default=str)
            self.logger.info(f"\nDetailed results saved to: {filename}")
        except Exception as e:
            self.logger.error(f"Failed to save results: {e}")


def main():
    """Main entry point"""
    parser = argparse.ArgumentParser(description="API Data Integrity Test Suite")
    parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
    parser.add_argument('--test', '-t', choices=['connectivity', 'players', 'batting', 'plays'],
                       help='Run specific test category only')

    args = parser.parse_args()

    tester = APIDataIntegrityTester(verbose=args.verbose)

    try:
        if args.test:
            tester.run_specific_tests(args.test)
        else:
            tester.run_all_tests()
    except KeyboardInterrupt:
        print("\nTest suite interrupted by user")
        sys.exit(1)
    except Exception as e:
        print(f"Test suite failed with error: {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()