#!/usr/bin/env python3 """ Cognitive Memory Migration Script Migrates all memories from MemoryGraph SQLite database to markdown-based cognitive memory system. Idempotent - skips files that already exist. Usage: python migrate.py # Run migration python migrate.py --dry-run # Preview without writing python migrate.py --verify # Verify post-migration integrity """ import json import os import re import sqlite3 import sys from datetime import datetime, timezone from pathlib import Path # Import from sibling module sys.path.insert(0, str(Path(__file__).parent)) from client import ( CognitiveMemoryClient, MEMORY_DIR, TYPE_DIRS, TYPE_WEIGHTS, VALID_TYPES, calculate_decay_score, make_filename, parse_frontmatter, serialize_frontmatter, slugify, ) # MemoryGraph database location MEMORYGRAPH_DB = Path.home() / ".memorygraph" / "memory.db" # Memory type mapping: MemoryGraph types -> cognitive-memory types # MemoryGraph has more types; map extras to closest cognitive-memory equivalent TYPE_MAP = { "solution": "solution", "problem": "problem", "error": "error", "fix": "fix", "code_pattern": "code_pattern", "decision": "decision", "configuration": "configuration", "workflow": "workflow", "general": "general", # MemoryGraph-only types mapped to closest equivalents "task": "general", "project": "general", "technology": "general", "command": "general", "file_context": "general", } def load_sqlite_memories(db_path: Path) -> list: """Load all memories from MemoryGraph SQLite database.""" conn = sqlite3.connect(str(db_path)) conn.row_factory = sqlite3.Row rows = conn.execute( "SELECT id, properties, created_at, updated_at FROM nodes WHERE label = 'Memory'" ).fetchall() memories = [] for row in rows: props = json.loads(row["properties"]) memories.append({ "id": props.get("id", row["id"]), "type": props.get("type", "general"), "title": props.get("title", "Untitled"), "content": props.get("content", ""), "summary": props.get("summary"), "tags": props.get("tags", []), "importance": props.get("importance", 0.5), "confidence": props.get("confidence", 0.8), "usage_count": props.get("usage_count", 0), "created_at": props.get("created_at", row["created_at"]), "updated_at": props.get("updated_at", row["updated_at"]), }) conn.close() return memories def load_sqlite_relationships(db_path: Path) -> list: """Load all relationships from MemoryGraph SQLite database.""" conn = sqlite3.connect(str(db_path)) conn.row_factory = sqlite3.Row rows = conn.execute( "SELECT id, from_id, to_id, rel_type, properties, created_at FROM relationships" ).fetchall() relationships = [] for row in rows: props = json.loads(row["properties"]) # Parse context - may be a JSON string within JSON context_raw = props.get("context", "") context_text = "" if context_raw: try: ctx = json.loads(context_raw) if isinstance(context_raw, str) else context_raw if isinstance(ctx, dict): context_text = ctx.get("text", "") else: context_text = str(ctx) except (json.JSONDecodeError, TypeError): context_text = str(context_raw) relationships.append({ "id": row["id"], "from_id": row["from_id"], "to_id": row["to_id"], "rel_type": row["rel_type"], "strength": props.get("strength", 0.5), "context": context_text, }) conn.close() return relationships def migrate(dry_run: bool = False): """Run the full migration from MemoryGraph to cognitive-memory.""" if not MEMORYGRAPH_DB.exists(): print(f"Error: MemoryGraph database not found at {MEMORYGRAPH_DB}") sys.exit(1) print(f"Loading memories from {MEMORYGRAPH_DB}...") memories = load_sqlite_memories(MEMORYGRAPH_DB) relationships = load_sqlite_relationships(MEMORYGRAPH_DB) print(f"Found {len(memories)} memories and {len(relationships)} relationships") if dry_run: print("\n--- DRY RUN ---") by_type = {} for mem in memories: t = TYPE_MAP.get(mem["type"], "general") by_type[t] = by_type.get(t, 0) + 1 print("Type distribution after mapping:") for t, count in sorted(by_type.items(), key=lambda x: -x[1]): dir_name = TYPE_DIRS.get(t, "general") print(f" graph/{dir_name}/: {count}") print(f"\nRelationships to embed: {len(relationships)}") return # Initialize client (creates directories) client = CognitiveMemoryClient() # Build memory ID -> file path mapping id_to_path = {} created_count = 0 skipped_count = 0 print("\nPhase 1: Creating markdown files...") for i, mem in enumerate(memories, 1): memory_id = mem["id"] mem_type = TYPE_MAP.get(mem["type"], "general") type_dir = TYPE_DIRS.get(mem_type, "general") # Create filename filename = make_filename(mem["title"], memory_id) rel_path = f"graph/{type_dir}/{filename}" full_path = MEMORY_DIR / rel_path # Check if already exists (idempotent) if full_path.exists(): id_to_path[memory_id] = (full_path, rel_path) skipped_count += 1 continue # Build frontmatter frontmatter = { "id": memory_id, "type": mem_type, "title": mem["title"], "tags": mem.get("tags", []), "importance": mem.get("importance", 0.5), "confidence": mem.get("confidence", 0.8), "created": mem.get("created_at", ""), "updated": mem.get("updated_at", ""), } # Build content body content = mem.get("content", "") if mem.get("summary"): content = f"{content}\n\n**Summary:** {mem['summary']}" # Write file client._write_memory_file(full_path, frontmatter, content) id_to_path[memory_id] = (full_path, rel_path) created_count += 1 if i % 50 == 0: print(f" {i}/{len(memories)} files created...") print(f" Created: {created_count}, Skipped (existing): {skipped_count}") # Phase 2: Embed relationships into frontmatter print("\nPhase 2: Embedding relationships into frontmatter...") rel_count = 0 # Group relationships by source memory from_rels = {} # from_id -> list of (to_id, type, strength, context) for rel in relationships: from_rels.setdefault(rel["from_id"], []).append(rel) for from_id, rels in from_rels.items(): if from_id not in id_to_path: print(f" Warning: Source memory {from_id[:8]} not found, skipping {len(rels)} relationships") continue full_path, rel_path = id_to_path[from_id] # Read current frontmatter fm, body = client._read_memory_file(full_path) existing_rels = fm.get("relations", []) existing_targets = {(r.get("target"), r.get("type")) for r in existing_rels} added = 0 for rel in rels: to_id = rel["to_id"] if to_id not in id_to_path: continue if (to_id, rel["rel_type"]) in existing_targets: continue # Already exists # Normalize relation type to valid set rel_type = rel["rel_type"] if rel_type not in ("SOLVES", "CAUSES", "BUILDS_ON", "ALTERNATIVE_TO", "REQUIRES", "FOLLOWS", "RELATED_TO"): rel_type = "RELATED_TO" # Map unknown types to RELATED_TO new_rel = { "target": to_id, "type": rel_type, "direction": "outgoing", "strength": rel.get("strength", 0.5), } if rel.get("context"): new_rel["context"] = rel["context"] existing_rels.append(new_rel) added += 1 if added > 0: fm["relations"] = existing_rels client._write_memory_file(full_path, fm, body) rel_count += added # Also add incoming relations to target memories for rel in rels: to_id = rel["to_id"] if to_id not in id_to_path: continue to_path, to_rel = id_to_path[to_id] to_fm, to_body = client._read_memory_file(to_path) to_rels = to_fm.get("relations", []) # Check for existing incoming has_incoming = any( r.get("target") == from_id and r.get("direction") == "incoming" for r in to_rels ) if has_incoming: continue rel_type = rel["rel_type"] if rel_type not in ("SOLVES", "CAUSES", "BUILDS_ON", "ALTERNATIVE_TO", "REQUIRES", "FOLLOWS", "RELATED_TO"): rel_type = "RELATED_TO" incoming = { "target": from_id, "type": rel_type, "direction": "incoming", "strength": rel.get("strength", 0.5), } if rel.get("context"): incoming["context"] = rel["context"] to_rels.append(incoming) to_fm["relations"] = to_rels client._write_memory_file(to_path, to_fm, to_body) print(f" Embedded {rel_count} outgoing relationships") # Phase 3: Build _index.json print("\nPhase 3: Building index...") indexed = client.reindex() print(f" Indexed {indexed} memories") # Phase 4: Initialize _state.json with usage data print("\nPhase 4: Initializing state with usage data...") state = client._load_state() now = datetime.now(timezone.utc) for mem in memories: mid = mem["id"] usage_count = mem.get("usage_count", 0) created_str = mem.get("created_at", "") # Calculate initial decay try: created_dt = datetime.fromisoformat(created_str.replace("Z", "+00:00")) if created_dt.tzinfo is None: created_dt = created_dt.replace(tzinfo=timezone.utc) days = (now - created_dt).total_seconds() / 86400 except (ValueError, AttributeError): days = 30 mem_type = TYPE_MAP.get(mem["type"], "general") type_weight = TYPE_WEIGHTS.get(mem_type, 1.0) importance = mem.get("importance", 0.5) decay_score = calculate_decay_score(importance, days, usage_count, type_weight) state.setdefault("entries", {})[mid] = { "access_count": usage_count, "last_accessed": mem.get("updated_at", mem.get("created_at", now.isoformat())), "decay_score": round(decay_score, 4), } client._save_state(state) print(f" Initialized state for {len(state.get('entries', {}))} memories") # Phase 5: Git commit all migrated files print("\nPhase 5: Git commit...") try: import subprocess subprocess.run( ["git", "add", "-A"], cwd=str(MEMORY_DIR), capture_output=True, timeout=30 ) subprocess.run( ["git", "commit", "-m", f"migrate: {len(memories)} memories from MemoryGraph\n\n" f"- {created_count} new markdown files created\n" f"- {rel_count} relationships embedded\n" f"- {indexed} entries indexed\n" f"- State initialized with usage data"], cwd=str(MEMORY_DIR), capture_output=True, timeout=30 ) print(" Committed to git") except Exception as e: print(f" Warning: Git commit failed: {e}") # Phase 6: Archive MemoryGraph database print("\nPhase 6: Archiving MemoryGraph database...") archive_path = MEMORYGRAPH_DB.with_suffix(".db.archive") if not archive_path.exists(): import shutil shutil.copy2(str(MEMORYGRAPH_DB), str(archive_path)) print(f" Archived to {archive_path}") else: print(f" Archive already exists at {archive_path}") # Generate CORE.md print("\nPhase 7: Generating CORE.md...") client.core() print(" CORE.md generated") # Summary print("\n" + "=" * 60) print("Migration Complete!") print("=" * 60) print(f" Memories migrated: {len(memories)}") print(f" Files created: {created_count}") print(f" Files skipped: {skipped_count}") print(f" Relations embedded: {rel_count}") print(f" Index entries: {indexed}") print(f" Memory dir: {MEMORY_DIR}") print(f" Archive: {archive_path}") def verify(): """Verify migration integrity.""" print("Verifying migration integrity...\n") if not MEMORYGRAPH_DB.exists(): # Try archive archive = MEMORYGRAPH_DB.with_suffix(".db.archive") if archive.exists(): db_path = archive else: print("Error: No MemoryGraph database found for verification") sys.exit(1) else: db_path = MEMORYGRAPH_DB # Load SQLite data memories = load_sqlite_memories(db_path) relationships = load_sqlite_relationships(db_path) client = CognitiveMemoryClient() index = client._load_index() state = client._load_state() errors = [] warnings = [] # Check 1: Count match sqlite_count = len(memories) md_count = len(index.get("entries", {})) if sqlite_count != md_count: errors.append(f"Count mismatch: SQLite={sqlite_count}, Index={md_count}") else: print(f"[OK] Memory count matches: {sqlite_count}") # Check 2: All memories have files missing_files = 0 for mid, entry in index.get("entries", {}).items(): path = MEMORY_DIR / entry.get("path", "") if not path.exists(): missing_files += 1 if missing_files <= 5: errors.append(f"Missing file: {entry.get('path')} ({entry.get('title', '')[:40]})") if missing_files == 0: print(f"[OK] All {md_count} files exist on disk") else: errors.append(f"Total missing files: {missing_files}") # Check 3: State entries state_count = len(state.get("entries", {})) if state_count != sqlite_count: warnings.append(f"State entry count mismatch: expected={sqlite_count}, got={state_count}") else: print(f"[OK] State entries match: {state_count}") # Check 4: Spot check 5 random memories import random sample = random.sample(memories, min(5, len(memories))) spot_ok = 0 for mem in sample: path = client._resolve_memory_path(mem["id"]) if path: fm, body = client._read_memory_file(path) if fm.get("title") == mem["title"]: spot_ok += 1 else: warnings.append( f"Title mismatch for {mem['id'][:8]}: " f"SQLite='{mem['title'][:40]}', MD='{fm.get('title', '')[:40]}'" ) else: errors.append(f"Memory {mem['id'][:8]} not found in markdown: {mem['title'][:40]}") print(f"[OK] Spot check: {spot_ok}/5 memories match") # Check 5: Relationships rel_in_index = sum( len(entry.get("relations", [])) for entry in index.get("entries", {}).values() ) # Each relationship creates 2 entries (outgoing + incoming) expected_rel_entries = len(relationships) * 2 if rel_in_index < len(relationships): warnings.append( f"Relation count may be low: SQLite={len(relationships)}, " f"Index entries={rel_in_index} (expected ~{expected_rel_entries})" ) else: print(f"[OK] Relationships: {len(relationships)} original, {rel_in_index} index entries") # Check 6: Git status try: import subprocess result = subprocess.run( ["git", "status", "--porcelain"], cwd=str(MEMORY_DIR), capture_output=True, text=True, timeout=5 ) if result.returncode == 0: untracked = [l for l in result.stdout.strip().split("\n") if l.strip() and not l.startswith("??")] if untracked: warnings.append(f"Uncommitted changes in memory repo: {len(untracked)} files") else: print("[OK] Git repo clean") else: warnings.append("Not a git repo or git error") except Exception: warnings.append("Could not check git status") # Check 7: CORE.md exists core_path = MEMORY_DIR / "CORE.md" if core_path.exists(): content = core_path.read_text() print(f"[OK] CORE.md exists ({len(content)} chars)") else: warnings.append("CORE.md not found") # Report print() if errors: print(f"ERRORS ({len(errors)}):") for e in errors: print(f" [!] {e}") if warnings: print(f"WARNINGS ({len(warnings)}):") for w in warnings: print(f" [?] {w}") if not errors and not warnings: print("All checks passed!") elif not errors: print(f"\nMigration OK with {len(warnings)} warning(s)") else: print(f"\nMigration has {len(errors)} error(s) that need attention") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Migrate MemoryGraph to Cognitive Memory") parser.add_argument("--dry-run", action="store_true", help="Preview without writing") parser.add_argument("--verify", action="store_true", help="Verify migration integrity") args = parser.parse_args() if args.verify: verify() else: migrate(dry_run=args.dry_run)