#!/usr/bin/env python3
"""Validate generated perk JSONs against NoteDiscovery source."""

import json
import os
import re
import subprocess

def get_source_content():
    """Fetch perk source from NoteDiscovery."""
    result = subprocess.run(
        ['python', 'client.py', 'read', 'gaming/vagabond-rpg/perks-full-list.md'],
        cwd=os.path.expanduser('~/.claude/skills/notediscovery'),
        capture_output=True,
        text=True
    )
    data = json.loads(result.stdout)
    return data['content']

def parse_source_perks(text):
    """Parse source perk data into a dictionary."""
    perks = {}

    # Find the Full Perk Descriptions section
    desc_section = text.split('## Full Perk Descriptions')[1] if '## Full Perk Descriptions' in text else text

    # Split by perk headers
    perk_blocks = re.split(r'\n### ', desc_section)

    for block in perk_blocks[1:]:  # Skip first empty block
        lines = block.strip().split('\n')
        if not lines:
            continue

        name = lines[0].strip()

        # Skip category headers
        if name in ['Stat-Based Perks (No Training Required)', 'Spell-Based Perks', 'No Prerequisites']:
            continue

        prereq = ''
        description_lines = []

        for i, line in enumerate(lines[1:], 1):
            if line.startswith('**Prerequisite:**'):
                prereq = line.replace('**Prerequisite:**', '').strip()
            elif line.strip() and not line.startswith('---'):
                description_lines.append(line)

        description = ' '.join(description_lines).strip()

        if name and description:
            perks[name.lower()] = {
                'name': name,
                'prerequisite': prereq,
                'description': description
            }

    return perks

def normalize_text(text):
    """Normalize text for comparison."""
    if not text:
        return ''
    # Remove HTML tags
    text = re.sub(r'<[^>]+>', '', text)
    # Normalize whitespace
    text = ' '.join(text.split())
    # Normalize quotes and apostrophes
    text = text.replace('\u2019', "'").replace('\u201c', '"').replace('\u201d', '"')
    return text.strip()

def compare_perks(source, perks_dir):
    """Compare generated perks against source."""
    discrepancies = []

    for filename in sorted(os.listdir(perks_dir)):
        if not filename.endswith('.json'):
            continue

        filepath = os.path.join(perks_dir, filename)
        with open(filepath, 'r') as f:
            generated = json.load(f)

        perk_name = generated['name']
        perk_key = perk_name.lower()

        if perk_key not in source:
            discrepancies.append({
                'perk': perk_name,
                'file': filename,
                'issue': f'NOT IN SOURCE - perk "{perk_name}" not found in source data'
            })
            continue

        src = source[perk_key]
        gen_system = generated.get('system', {})

        issues = []

        # Check description text
        src_desc = normalize_text(src['description'])
        gen_desc = normalize_text(gen_system.get('description', ''))

        if src_desc != gen_desc:
            # Calculate similarity
            src_words = set(src_desc.lower().split())
            gen_words = set(gen_desc.lower().split())
            if src_words and gen_words:
                similarity = len(src_words & gen_words) / len(src_words | gen_words)
            else:
                similarity = 0

            if similarity < 0.9:  # Only report if less than 90% similar
                issues.append(f"DESCRIPTION MISMATCH (similarity: {similarity:.1%})")
                issues.append(f"  SOURCE: {src_desc[:150]}...")
                issues.append(f"  GENERATED: {gen_desc[:150]}...")

        if issues:
            discrepancies.append({
                'perk': perk_name,
                'file': filename,
                'issues': issues
            })

    # Check for missing perks in generated
    generated_names = set()
    for filename in os.listdir(perks_dir):
        if filename.endswith('.json'):
            with open(os.path.join(perks_dir, filename), 'r') as f:
                data = json.load(f)
                generated_names.add(data['name'].lower())

    for perk_key in source:
        if perk_key not in generated_names:
            discrepancies.append({
                'perk': source[perk_key]['name'],
                'file': 'MISSING',
                'issue': 'MISSING - no generated file for this perk'
            })

    return discrepancies

def main():
    print("Fetching source content from NoteDiscovery...")
    source_content = get_source_content()

    print("Parsing source perks...")
    source = parse_source_perks(source_content)
    print(f"Found {len(source)} perks in source\n")

    perks_dir = 'packs/_source/perks'
    perk_files = [f for f in os.listdir(perks_dir) if f.endswith('.json')]
    print(f"Found {len(perk_files)} generated perk files\n")

    print("Comparing perks...")
    discrepancies = compare_perks(source, perks_dir)

    if not discrepancies:
        print("\n" + "="*60)
        print("NO DISCREPANCIES FOUND")
        print(f"All {len(perk_files)} perks match source!")
        print("="*60)
    else:
        print("\n" + "="*60)
        print(f"FOUND {len(discrepancies)} PERK(S) WITH DISCREPANCIES")
        print("="*60 + "\n")
        for d in discrepancies:
            print(f"### {d['perk']} ({d.get('file', 'N/A')})")
            if 'issue' in d:
                print(f"  - {d['issue']}")
            if 'issues' in d:
                for issue in d['issues']:
                    print(f"  - {issue}")
            print()

if __name__ == '__main__':
    main()