diff --git a/.mcp-full.json b/.mcp-full.json index 56cfc15..a3a562e 100644 --- a/.mcp-full.json +++ b/.mcp-full.json @@ -1,9 +1,11 @@ { "mcpServers": { "cognitive-memory": { - "command": "/home/cal/.claude/skills/cognitive-memory/mcp-server/cognitive-memory-mcp", + "command": "python3", "type": "stdio", - "args": [], + "args": [ + "/mnt/NV2/Development/cognitive-memory/mcp_server.py" + ], "env": {} }, "n8n-mcp": { diff --git a/.mcp.json b/.mcp.json index 8e62c85..04d1c12 100644 --- a/.mcp.json +++ b/.mcp.json @@ -1,9 +1,12 @@ { "mcpServers": { "cognitive-memory": { - "command": "/home/cal/.claude/skills/cognitive-memory/mcp-server/cognitive-memory-mcp", + "command": "python3", "type": "stdio", - "args": [], + "args": [ + "/mnt/NV2/Development/cognitive-memory/mcp_server.py" + ], "env": {} - } } + } + } } diff --git a/CLAUDE.md b/CLAUDE.md index 3b41f1b..14be083 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -8,6 +8,15 @@ Automatic loads are NOT enough — Read loads required CLAUDE.md context along t - When writing tests, include detailed docstrings explaining "what" and "why" - Launch sub-agents with Sonnet model unless another model is specified by the user +## Memory Protocol (Cognitive Memory) +- App: `/mnt/NV2/Development/cognitive-memory/` | Data: `~/.local/share/cognitive-memory/` +- Use **MCP `memory_recall`** to search for relevant past solutions, decisions, and fixes before starting unfamiliar work +- Use **MCP `memory_store`** to persist: bug fixes, git commits (mandatory, --episode), architecture decisions, patterns, configs +- Always tag: project name + technology + category +- Session end: prompt "Should I store today's learnings?" +- `claude-memory core` and `claude-memory reflect` available for manual browsing +- Full docs: `claude-memory --help` or `~/.claude/skills/cognitive-memory/SKILL.md` (skill layer) / `/mnt/NV2/Development/cognitive-memory/` (app code) + ## Git Commits - NEVER commit/add/push/tag without explicit user approval ("commit this", "go ahead") - Don't autopilot: find bug → fix → **ASK** → commit. Silence ≠ approval. @@ -32,12 +41,3 @@ Automatic loads are NOT enough — Read loads required CLAUDE.md context along t - Just `ssh ` (e.g. `ssh sba-db`, `ssh proxmox`, `ssh manticore`) - If unsure of an alias, read `~/.ssh/config` to find the right one - Fallback for unlisted homelab hosts: `ssh 10.10.0.x` (wildcard rule handles key/user) - -## Memory Protocol (Cognitive Memory) -- Skill: `~/.claude/skills/cognitive-memory/` | Data: `~/.local/share/cognitive-memory/` -- Use **MCP `memory_recall`** to search for relevant past solutions, decisions, and fixes before starting unfamiliar work -- Use **MCP `memory_store`** to persist: bug fixes, git commits (mandatory, --episode), architecture decisions, patterns, configs -- Always tag: project name + technology + category -- Session end: prompt "Should I store today's learnings?" -- `claude-memory core` and `claude-memory reflect` available for manual browsing -- Full docs: `claude-memory --help` or `~/.claude/skills/cognitive-memory/SKILL.md` diff --git a/settings.json b/settings.json index 0c80337..68e6912 100644 --- a/settings.json +++ b/settings.json @@ -159,7 +159,7 @@ "hooks": [ { "type": "command", - "command": "/usr/bin/python3 /home/cal/.claude/skills/cognitive-memory/scripts/session_memory.py", + "command": "/usr/bin/python3 /mnt/NV2/Development/cognitive-memory/scripts/session_memory.py", "timeout": 15 } ] diff --git a/skills/cognitive-memory/SKILL.md b/skills/cognitive-memory/SKILL.md index 0ff002a..f200736 100644 --- a/skills/cognitive-memory/SKILL.md +++ b/skills/cognitive-memory/SKILL.md @@ -75,7 +75,7 @@ claude-memory tags related "python" claude-memory tags suggest ``` -**Full command list:** `store`, `recall`, `get`, `search`, `update`, `delete`, `stats`, `recent`, `decay`, `core`, `episode`, `reindex`, `pin`, `embed`, `reflect`, `reflection`, `tags`, `procedure`, `merge`, `edge-get`, `edge-search`, `edge-update`, `edge-delete`, `config` +**Full command list:** `store`, `recall`, `get`, `search`, `update`, `delete`, `stats`, `recent`, `decay`, `core`, `episode`, `reindex`, `pin`, `embed`, `reflect`, `reflection`, `tags`, `procedure`, `merge`, `edge-get`, `edge-search`, `edge-update`, `edge-delete`, `config`, `graphs`, `graph-create` ### Memory Types @@ -101,32 +101,41 @@ claude-memory tags suggest | 0.5-0.7 | Standard - useful pattern or solution | | 0.3-0.4 | Minor - nice-to-know, edge cases | -## Skill Directory Structure +## Directory Structure +### Skill layer (Claude Code reads this) ``` ~/.claude/skills/cognitive-memory/ -├── client.py # Core API + CLI entrypoint -├── mcp_server.py # MCP server for Claude Code tools ├── SKILL.md # This file -├── SCHEMA.md # Format reference for all file types -├── feature.json # Skill manifest +└── SCHEMA.md # Memory file format reference +``` + +### Application code +``` +/mnt/NV2/Development/cognitive-memory/ +├── client.py # Core API + CLI entrypoint +├── cli.py # CLI interface +├── common.py # Shared utilities +├── analysis.py # Reflection/analysis +├── edges.py # Edge management +├── embeddings.py # Embedding support +├── mcp_server.py # MCP server for Claude Code tools +├── feature.json # Version metadata ├── scripts/ │ ├── session_memory.py # SessionEnd hook — auto-stores session learnings -│ └── ensure-symlinks.sh # Refreshes MEMORY.md symlinks to CORE.md -├── systemd/ -│ ├── README.md # Install instructions for timers -│ ├── cognitive-memory-daily.* # Daily: decay, core, symlinks -│ ├── cognitive-memory-embed.* # Hourly: refresh embeddings -│ └── cognitive-memory-weekly.* # Weekly: reflection cycle -└── dev/ - ├── PROJECT_PLAN.json # Development roadmap - └── migrate.py # One-time MemoryGraph migration +│ ├── edge-proposer.py # Edge proposer +│ └── memory-git-sync.sh # Git sync for data dir +└── systemd/ + ├── README.md # Install instructions for timers + ├── cognitive-memory-daily.* # Daily: decay, core, git sync + ├── cognitive-memory-embed.* # Hourly: refresh embeddings + └── cognitive-memory-weekly.* # Weekly: reflection cycle ``` ## Data Directory Structure Data lives at `$XDG_DATA_HOME/cognitive-memory/` (default: `~/.local/share/cognitive-memory/`). -Override with `COGNITIVE_MEMORY_DIR` env var. Named graphs live as siblings (see Multi-Graph below). +Override with `COGNITIVE_MEMORY_DIR` env var. Named graphs live as siblings (see [Multi-Graph](#multi-graph) below). ``` ~/.local/share/cognitive-memory/ @@ -354,6 +363,96 @@ View config: `claude-memory config --show` Provider changes trigger automatic re-embedding (dimension mismatch safety). Config stored in `_config.json` (gitignored, may contain API key). +## Multi-Graph + +Graphs are named, isolated memory namespaces. Each graph has its own index, state, embeddings, episodes, edges, CORE.md, and REFLECTION.md. Use them to separate unrelated domains (e.g., work vs. personal, per-project isolation). + +### Storage Layout + +``` +~/.local/share/ +├── cognitive-memory/ # "default" graph (always exists) +│ ├── graph/, episodes/, vault/ +│ ├── _index.json, _state.json, _embeddings.json, _config.json +│ ├── CORE.md, REFLECTION.md +│ └── ... +└── cognitive-memory-graphs/ # Named graphs live here as siblings + ├── work/ # Graph "work" + │ ├── graph/, episodes/, vault/ + │ ├── _index.json, _state.json, ... + │ └── ... + └── research/ # Graph "research" + └── ... +``` + +### Creating a Graph + +Use `graph-create` to set up a new graph with the full directory structure: + +```bash +# Convention path (~/.local/share/cognitive-memory-graphs/work/) +claude-memory graph-create work + +# Custom path (registered in default graph's _config.json automatically) +claude-memory graph-create work --path /mnt/data/work-memories +``` + +Graphs are also auto-created on first use — storing a memory with `graph=` creates the directory structure automatically: + +```bash +# CLI: use --graph before the subcommand +claude-memory --graph work store --type decision \ + --title "Chose Postgres" --content "..." --tags "db,arch" + +# MCP: pass graph parameter on any tool +# memory_store(type="decision", title="...", content="...", graph="work") +``` + +### Using Graphs + +Every CLI command and MCP tool accepts a graph parameter: + +```bash +# CLI +claude-memory --graph work recall "authentication" +claude-memory --graph work stats +claude-memory --graph work embed + +# List all graphs (default + configured + discovered on disk) +claude-memory graphs +# MCP: memory_graphs() +``` + +### Per-Project Graph Routing + +Set `COGNITIVE_MEMORY_GRAPH` to automatically route all memory operations to a specific graph without passing `graph=` on every call. Configure it per-project in Claude Code's settings: + +```json +// .claude/settings.json (in your project root) +{ + "env": { + "COGNITIVE_MEMORY_GRAPH": "paper-dynasty" + } +} +``` + +Resolution order: explicit `graph` parameter > `COGNITIVE_MEMORY_GRAPH` env var > `"default"`. + +This means a project can set its default graph, but individual calls can still override it with an explicit `graph` parameter when needed. + +### Graph Isolation + +- Each graph has **independent** index, state, embeddings, decay scores, episodes, and edges +- Edges can only connect memories **within the same graph** — cross-graph edges are not supported +- Embedding configuration (`_config.json`) is per-graph — each graph can use a different provider +- The graph registry (custom path mappings) lives in the **default** graph's `_config.json` + +### Automated Maintenance + +- **Systemd timers** (decay, core, embed, reflect) run against all graphs via `maintain-all-graphs.sh` +- **Git sync** (`memory-git-sync.sh`) syncs the default graph and any named graphs that are git repos +- **`edge-proposer.py`** and **`session_memory.py`** accept `--graph` to target a specific graph + ## Episode Logging Daily markdown files appended during sessions, providing chronological context: @@ -390,7 +489,8 @@ This skill should be used proactively when: --- -**Location**: `~/.claude/skills/cognitive-memory/` +**Skill**: `~/.claude/skills/cognitive-memory/` +**App**: `/mnt/NV2/Development/cognitive-memory/` **Data**: `$XDG_DATA_HOME/cognitive-memory/` (default: `~/.local/share/cognitive-memory/`) **Version**: 3.1.0 **Created**: 2026-02-13 diff --git a/skills/cognitive-memory/analysis.py b/skills/cognitive-memory/analysis.py deleted file mode 100644 index 3bf1c4e..0000000 --- a/skills/cognitive-memory/analysis.py +++ /dev/null @@ -1,861 +0,0 @@ -""" -Cognitive Memory - Analysis Mixin - -Decay scoring, CORE.md generation, tag analysis, reflection clustering, -memory merging, and REFLECTION.md generation. -""" - -import json -import subprocess -import sys -from datetime import datetime, timedelta, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - -from common import ( - CORE_MAX_CHARS, - MEMORY_DIR, - THRESHOLD_ACTIVE, - THRESHOLD_DORMANT, - THRESHOLD_FADING, - TYPE_WEIGHTS, - calculate_decay_score, -) - - -class AnalysisMixin: - """Mixin providing analysis operations for CognitiveMemoryClient.""" - - def decay(self) -> Dict[str, Any]: - """Recalculate all decay scores. Updates _state.json.""" - index = self._load_index() - state = self._load_state() - now = datetime.now(timezone.utc) - updated_count = 0 - - for mid, entry in index.get("entries", {}).items(): - s = state.setdefault("entries", {}).setdefault( - mid, - { - "access_count": 0, - "last_accessed": entry.get("created", now.isoformat()), - "decay_score": 0.5, - }, - ) - - # Calculate days since last access - last_str = s.get("last_accessed", entry.get("created", "")) - try: - last_dt = datetime.fromisoformat(last_str.replace("Z", "+00:00")) - if last_dt.tzinfo is None: - last_dt = last_dt.replace(tzinfo=timezone.utc) - days = (now - last_dt).total_seconds() / 86400 - except (ValueError, AttributeError): - days = 30 # Default to 30 days if unparseable - - importance = entry.get("importance", 0.5) - mem_type = entry.get("type", "general") - type_weight = TYPE_WEIGHTS.get(mem_type, 1.0) - access_count = s.get("access_count", 0) - - # Check if pinned (vault) - path = entry.get("path", "") - if path.startswith("vault/"): - s["decay_score"] = 999.0 # Pinned memories never decay - else: - s["decay_score"] = round( - calculate_decay_score(importance, days, access_count, type_weight), - 4, - ) - - updated_count += 1 - - self._save_state(state) - - # Summary - state_entries = state.get("entries", {}) - return { - "updated": updated_count, - "active": sum( - 1 - for s in state_entries.values() - if s.get("decay_score", 0) >= THRESHOLD_ACTIVE - ), - "fading": sum( - 1 - for s in state_entries.values() - if THRESHOLD_FADING <= s.get("decay_score", 0) < THRESHOLD_ACTIVE - ), - "dormant": sum( - 1 - for s in state_entries.values() - if THRESHOLD_DORMANT <= s.get("decay_score", 0) < THRESHOLD_FADING - ), - "archived": sum( - 1 - for s in state_entries.values() - if 0 < s.get("decay_score", 0) < THRESHOLD_DORMANT - ), - } - - def core(self) -> str: - """Generate CORE.md from top memories by decay score.""" - index = self._load_index() - state = self._load_state() - - def _extract_summary(body: str, max_len: int = 80) -> str: - """Extract first meaningful sentence from memory body.""" - if not body or not body.strip(): - return "" - # Skip leading code blocks, headings, and list markers - for ln in body.strip().split("\n"): - stripped = ln.strip() - if not stripped: - continue - if stripped.startswith("```") or stripped.startswith("#"): - continue - first_line = stripped.lstrip("- *>").strip() - break - else: - return "" - if not first_line: - return "" - # Extract first sentence (split on '. ') - dot_pos = first_line.find(". ") - if 0 < dot_pos < max_len: - sentence = first_line[: dot_pos + 1] - else: - sentence = first_line - if len(sentence) > max_len: - sentence = sentence[: max_len - 3].rstrip() + "..." - return sentence - - # Collect all memories with decay scores - memories = [] - for mid, entry in index.get("entries", {}).items(): - s = state.get("entries", {}).get(mid, {}) - decay = s.get("decay_score", 0.5) - if decay < THRESHOLD_FADING: - continue # Skip low-relevance - memories.append( - { - "id": mid, - "title": entry.get("title", ""), - "type": entry.get("type", "general"), - "path": entry.get("path", ""), - "importance": entry.get("importance", 0.5), - "decay_score": decay, - "tags": entry.get("tags", []), - } - ) - - # Sort by decay score descending - memories.sort(key=lambda x: x["decay_score"], reverse=True) - - # Group by type category - categories = { - "Critical Solutions": ["solution"], - "Active Decisions": ["decision"], - "Key Fixes": ["fix"], - "Configurations": ["configuration"], - "Key Procedures": ["procedure"], - "Patterns & Workflows": ["code_pattern", "workflow"], - "Known Issues": ["problem", "error"], - "Insights": ["insight"], - "General": ["general"], - } - - now_str = datetime.now(timezone.utc).strftime("%Y-%m-%d") - active_count = sum(1 for m in memories if m["decay_score"] >= THRESHOLD_ACTIVE) - total_count = len(index.get("entries", {})) - - lines = [ - "# Memory Core (auto-generated)", - f"> Last updated: {now_str} | Active memories: {active_count}/{total_count} | Next refresh: daily (systemd timer)", - "", - ] - - char_count = sum(len(l) for l in lines) - - for cat_name, cat_types in categories.items(): - cat_memories = [m for m in memories if m["type"] in cat_types] - if not cat_memories: - continue - - section_lines = [f"## {cat_name}", ""] - for mem in cat_memories[:10]: # Cap per section (reduced for summaries) - path = mem["path"] - title = mem["title"] - tags = ", ".join(mem["tags"][:3]) if mem["tags"] else "" - - # Read body to extract summary - summary = "" - mem_path = self.memory_dir / path - if mem_path.exists(): - try: - _, body = self._read_memory_file(mem_path) - summary = _extract_summary(body) - except Exception: - pass - - line = f"- [{title}]({path})" - if summary: - line += f" - {summary}" - if tags: - line += f" ({tags})" - section_lines.append(line) - - # Check budget - added = sum(len(l) for l in section_lines) + 2 - if char_count + added > CORE_MAX_CHARS: - break - - section_lines.append("") - section_text = "\n".join(section_lines) - if char_count + len(section_text) > CORE_MAX_CHARS: - break - lines.extend(section_lines) - char_count += len(section_text) - - core_content = "\n".join(lines) - core_path = self.memory_dir / "CORE.md" - core_path.write_text(core_content, encoding="utf-8") - self._git_commit("core: regenerate CORE.md", [core_path]) - - return core_content - - def tags_list(self, limit: int = 0) -> List[Dict[str, Any]]: - """List all tags with occurrence counts across all memories. - - Returns list of {"tag": str, "count": int} sorted by count descending. - """ - index = self._load_index() - tag_counts: Dict[str, int] = {} - - for entry in index.get("entries", {}).values(): - for tag in entry.get("tags", []): - normalized = tag.lower().strip() - if normalized: - tag_counts[normalized] = tag_counts.get(normalized, 0) + 1 - - results = [{"tag": tag, "count": count} for tag, count in tag_counts.items()] - results.sort(key=lambda x: x["count"], reverse=True) - - if limit > 0: - results = results[:limit] - return results - - def tags_related(self, tag: str, limit: int = 0) -> List[Dict[str, Any]]: - """Find tags that co-occur with the given tag. - - Returns list of {"tag": str, "co_occurrences": int, "memories_with_both": int} - sorted by co_occurrences descending. - """ - tag = tag.lower().strip() - index = self._load_index() - co_counts: Dict[str, int] = {} - - for entry in index.get("entries", {}).values(): - entry_tags = [t.lower().strip() for t in entry.get("tags", [])] - if tag not in entry_tags: - continue - for other_tag in entry_tags: - if other_tag != tag and other_tag: - co_counts[other_tag] = co_counts.get(other_tag, 0) + 1 - - results = [ - {"tag": t, "co_occurrences": count, "memories_with_both": count} - for t, count in co_counts.items() - ] - results.sort(key=lambda x: x["co_occurrences"], reverse=True) - - if limit > 0: - results = results[:limit] - return results - - def tags_suggest(self, memory_id: str) -> List[Dict[str, Any]]: - """Suggest tags for a memory based on co-occurrence patterns. - - Returns top 10 suggestions as {"tag": str, "score": int, "reason": str}. - """ - index = self._load_index() - entry = index.get("entries", {}).get(memory_id) - if not entry: - return [] - - existing_tags = set(t.lower().strip() for t in entry.get("tags", [])) - if not existing_tags: - return [] - - # For each existing tag, find co-occurring tags and accumulate scores - candidate_scores: Dict[str, int] = {} - candidate_sources: Dict[str, set] = {} - - for existing_tag in existing_tags: - co_tags = self.tags_related(existing_tag) - for co_entry in co_tags: - candidate = co_entry["tag"] - if candidate in existing_tags: - continue - candidate_scores[candidate] = ( - candidate_scores.get(candidate, 0) + co_entry["co_occurrences"] - ) - if candidate not in candidate_sources: - candidate_sources[candidate] = set() - candidate_sources[candidate].add(existing_tag) - - results = [] - for candidate, score in candidate_scores.items(): - sources = sorted(candidate_sources[candidate]) - reason = "co-occurs with: " + ", ".join(sources) - results.append({"tag": candidate, "score": score, "reason": reason}) - - results.sort(key=lambda x: x["score"], reverse=True) - return results[:10] - - def reflect( - self, - since: Optional[str] = None, - dry_run: bool = False, - ) -> Dict[str, Any]: - """Review recent memories, identify tag-based clusters, and output consolidation recommendations. - - Clusters memories that share 2+ tags using iterative union-find merging. - Does NOT auto-create insights; the agent reads output and decides what to store. - - Args: - since: ISO date string (YYYY-MM-DD) to filter memories from. Falls back to - _state.json's last_reflection, then 30 days ago. - dry_run: If True, return analysis without updating _state.json or logging episode. - - Returns: - Dict with clusters, total_memories_reviewed, clusters_found, and since date. - """ - now = datetime.now(timezone.utc) - state = self._load_state() - - # Determine time window - if since: - since_date = since - elif state.get("last_reflection"): - # last_reflection may be a full ISO timestamp; extract date portion - since_date = state["last_reflection"][:10] - else: - since_dt = now - timedelta(days=30) - since_date = since_dt.strftime("%Y-%m-%d") - - # Load recent memories from index - index = self._load_index() - recent_memories = [] - for mid, entry in index.get("entries", {}).items(): - created = entry.get("created", "") - # Compare date portion only (YYYY-MM-DD) - created_date = created[:10] if created else "" - if created_date >= since_date: - recent_memories.append( - { - "id": mid, - "title": entry.get("title", ""), - "type": entry.get("type", "general"), - "tags": [t.lower().strip() for t in entry.get("tags", [])], - } - ) - - total_reviewed = len(recent_memories) - - # Union-find clustering by tag overlap >= 2 - # parent[i] = index of parent in recent_memories list - n = len(recent_memories) - parent = list(range(n)) - - def find(x: int) -> int: - while parent[x] != x: - parent[x] = parent[parent[x]] # path compression - x = parent[x] - return x - - def union(a: int, b: int): - ra, rb = find(a), find(b) - if ra != rb: - parent[rb] = ra - - # Compare all pairs, check tag overlap - for i in range(n): - tags_i = set(recent_memories[i]["tags"]) - for j in range(i + 1, n): - tags_j = set(recent_memories[j]["tags"]) - if len(tags_i & tags_j) >= 2: - union(i, j) - - # Group by root - clusters_map: Dict[int, List[int]] = {} - for i in range(n): - root = find(i) - clusters_map.setdefault(root, []).append(i) - - # Build output clusters (only clusters with 2+ members) - clusters = [] - cluster_id = 0 - for indices in clusters_map.values(): - if len(indices) < 2: - continue - cluster_id += 1 - members = [] - all_tag_sets = [] - types_seen = set() - for idx in indices: - mem = recent_memories[idx] - members.append( - { - "id": mem["id"], - "title": mem["title"], - "type": mem["type"], - "tags": mem["tags"], - } - ) - all_tag_sets.append(set(mem["tags"])) - types_seen.add(mem["type"]) - - # Common tags = intersection of ALL members - common_tags = ( - sorted(set.intersection(*all_tag_sets)) if all_tag_sets else [] - ) - - # Shared tags = tags appearing in 2+ members - tag_counts: Dict[str, int] = {} - for ts in all_tag_sets: - for t in ts: - tag_counts[t] = tag_counts.get(t, 0) + 1 - shared_tags = sorted(t for t, c in tag_counts.items() if c >= 2) - - # Suggested topic - tag_label = ( - ", ".join(common_tags[:4]) - if common_tags - else ", ".join(shared_tags[:4]) - ) - type_label = ", ".join(sorted(types_seen)) - suggested_topic = f"Pattern: {tag_label} across {type_label}" - - clusters.append( - { - "cluster_id": cluster_id, - "members": members, - "common_tags": common_tags, - "shared_tags": shared_tags, - "suggested_topic": suggested_topic, - "member_count": len(members), - } - ) - - # Sort clusters by member count descending - clusters.sort(key=lambda c: c["member_count"], reverse=True) - # Re-number after sorting - for i, c in enumerate(clusters): - c["cluster_id"] = i + 1 - - result = { - "clusters": clusters, - "total_memories_reviewed": total_reviewed, - "clusters_found": len(clusters), - "since": since_date, - } - - if not dry_run: - # Update state with last_reflection timestamp - state["last_reflection"] = now.isoformat() - self._save_state(state) - - # Log an episode entry - self.episode( - type="reflection", - title=f"Reflection: {len(clusters)} clusters from {total_reviewed} memories", - tags=["reflection", "cognitive-memory"], - summary=f"Reviewed {total_reviewed} memories since {since_date}, found {len(clusters)} clusters", - ) - - # Regenerate REFLECTION.md - self.reflection_summary() - - return result - - def merge( - self, - keep_id: str, - absorb_id: str, - dry_run: bool = False, - ) -> Dict[str, Any]: - """Merge two memories: absorb one into another. - - The 'keep' memory absorbs the content, tags, and relations from - the 'absorb' memory. The absorb memory is then deleted. - All other memories referencing absorb_id are updated to point to keep_id. - - If dry_run=True, returns what would change without writing anything. - """ - # Resolve both memory file paths - keep_path = self._resolve_memory_path(keep_id) - if not keep_path: - raise ValueError(f"Keep memory not found: {keep_id}") - absorb_path = self._resolve_memory_path(absorb_id) - if not absorb_path: - raise ValueError(f"Absorb memory not found: {absorb_id}") - - # Read both memory files - keep_fm, keep_body = self._read_memory_file(keep_path) - absorb_fm, absorb_body = self._read_memory_file(absorb_path) - - keep_title = keep_fm.get("title", keep_id[:8]) - absorb_title = absorb_fm.get("title", absorb_id[:8]) - - # Combine content - merged_body = keep_body - if absorb_body.strip(): - merged_body = ( - keep_body.rstrip() - + f"\n\n---\n*Merged from: {absorb_title}*\n\n" - + absorb_body.strip() - ) - - # Merge tags (sorted, deduplicated) - keep_tags = keep_fm.get("tags", []) - absorb_tags = absorb_fm.get("tags", []) - merged_tags = sorted(list(set(keep_tags + absorb_tags))) - - # importance = max of both - merged_importance = max( - keep_fm.get("importance", 0.5), - absorb_fm.get("importance", 0.5), - ) - - # Merge relations: combine both relation lists - keep_rels = list(keep_fm.get("relations", [])) - absorb_rels = list(absorb_fm.get("relations", [])) - combined_rels = keep_rels + absorb_rels - - # Replace any relation targeting absorb_id with keep_id - for rel in combined_rels: - if rel.get("target") == absorb_id: - rel["target"] = keep_id - - # Deduplicate by (target, type) tuple - seen = set() - deduped_rels = [] - for rel in combined_rels: - key = (rel.get("target"), rel.get("type")) - if key not in seen: - seen.add(key) - deduped_rels.append(rel) - - # Remove self-referential relations (target == keep_id) - merged_rels = [r for r in deduped_rels if r.get("target") != keep_id] - - # Scan all other memories for relations targeting absorb_id - index = self._load_index() - updated_others = [] - for mid, entry in index.get("entries", {}).items(): - if mid in (keep_id, absorb_id): - continue - rels = entry.get("relations", []) - needs_update = any(r.get("target") == absorb_id for r in rels) - if needs_update: - updated_others.append(mid) - - if dry_run: - return { - "dry_run": True, - "keep_id": keep_id, - "keep_title": keep_title, - "absorb_id": absorb_id, - "absorb_title": absorb_title, - "merged_tags": merged_tags, - "merged_importance": merged_importance, - "merged_relations_count": len(merged_rels), - "other_memories_updated": len(updated_others), - "other_memory_ids": updated_others, - } - - # Write updated keep memory file - now = datetime.now(timezone.utc).isoformat() - keep_fm["tags"] = merged_tags - keep_fm["importance"] = merged_importance - keep_fm["relations"] = merged_rels - keep_fm["updated"] = now - self._write_memory_file(keep_path, keep_fm, merged_body) - - # Update index for keep memory (refresh content_preview with merged body) - keep_rel_path = str(keep_path.relative_to(self.memory_dir)) - preview = merged_body.strip()[:200] - if len(merged_body.strip()) > 200: - last_space = preview.rfind(" ") - if last_space > 0: - preview = preview[:last_space] - self._update_index_entry( - keep_id, keep_fm, keep_rel_path, content_preview=preview - ) - - # Update all other memories that reference absorb_id - for mid in updated_others: - other_path = self._resolve_memory_path(mid) - if not other_path: - continue - try: - other_fm, other_body = self._read_memory_file(other_path) - other_rels = other_fm.get("relations", []) - for rel in other_rels: - if rel.get("target") == absorb_id: - rel["target"] = keep_id - # Deduplicate after replacement - seen_other = set() - deduped_other = [] - for rel in other_rels: - key = (rel.get("target"), rel.get("type")) - if key not in seen_other: - seen_other.add(key) - deduped_other.append(rel) - # Remove self-referential - other_fm["relations"] = [ - r for r in deduped_other if r.get("target") != mid - ] - other_fm["updated"] = now - self._write_memory_file(other_path, other_fm, other_body) - other_rel_path = str(other_path.relative_to(self.memory_dir)) - self._update_index_entry(mid, other_fm, other_rel_path) - except Exception: - pass - - # Delete absorb memory file and remove from index/state - absorb_path.unlink() - self._remove_index_entry(absorb_id) - state = self._load_state() - state.get("entries", {}).pop(absorb_id, None) - self._save_state(state) - - # Git: stage absorb deletion - try: - absorb_rel = absorb_path.relative_to(self.memory_dir) - subprocess.run( - ["git", "rm", "--cached", str(absorb_rel)], - cwd=str(self.memory_dir), - capture_output=True, - timeout=5, - ) - except Exception: - pass - - self._git_commit(f"merge: {keep_title} absorbed {absorb_title}") - - return { - "success": True, - "keep_id": keep_id, - "keep_title": keep_title, - "absorb_id": absorb_id, - "absorb_title": absorb_title, - "merged_tags": merged_tags, - "merged_importance": merged_importance, - "merged_relations_count": len(merged_rels), - "other_memories_updated": len(updated_others), - } - - def reflection_summary(self) -> str: - """Generate REFLECTION.md with patterns and insights from the memory system. - - Writes to REFLECTION.md in the memory data directory and git-commits the file. - Returns the generated content. - """ - index = self._load_index() - state = self._load_state() - entries = index.get("entries", {}) - state_entries = state.get("entries", {}) - now_str = datetime.now(timezone.utc).strftime("%Y-%m-%d") - - # Last reflection date - last_reflection = state.get("last_reflection", "") - last_reflection_date = last_reflection[:10] if last_reflection else "never" - - # Count total reflections from episode files - total_reflections = 0 - episodes_dir = self.memory_dir / "episodes" - if episodes_dir.exists(): - for ep_file in sorted(episodes_dir.glob("*.md")): - try: - text = ep_file.read_text(encoding="utf-8") - # Count lines that look like reflection episode entries - for line in text.split("\n"): - if "Reflection:" in line and line.strip().startswith("## "): - total_reflections += 1 - except OSError: - pass - - lines = [ - "# Reflection Summary (auto-generated)", - f"> Last updated: {now_str} | Last reflection: {last_reflection_date} | Total reflections: {total_reflections}", - "", - ] - - # === Themes section === - # Get top tags, then find co-occurrence pairs - lines.append("## Themes") - lines.append("Top tag co-occurrences revealing recurring themes:") - lines.append("") - - top_tags = self.tags_list(limit=10) - # Build co-occurrence pairs across top tags - pair_data: Dict[Tuple[str, str], List[str]] = {} # (tag1, tag2) -> [titles] - for tag_info in top_tags[:5]: - tag = tag_info["tag"] - co_tags = self.tags_related(tag, limit=5) - for co in co_tags: - other = co["tag"] - # Canonical pair ordering - pair = tuple(sorted([tag, other])) - if pair in pair_data: - continue - # Find memories with both tags and collect titles - titles = [] - for mid, entry in entries.items(): - mem_tags = [t.lower().strip() for t in entry.get("tags", [])] - if pair[0] in mem_tags and pair[1] in mem_tags: - titles.append(entry.get("title", "untitled")) - if titles: - pair_data[pair] = titles - - # Sort pairs by memory count descending, show top 8 - sorted_pairs = sorted(pair_data.items(), key=lambda x: len(x[1]), reverse=True) - for (tag_a, tag_b), titles in sorted_pairs[:8]: - example_titles = ", ".join(f'"{t}"' for t in titles[:3]) - lines.append( - f"- **{tag_a} + {tag_b}**: {len(titles)} memories ({example_titles})" - ) - - if not sorted_pairs: - lines.append("- No co-occurrence data available yet.") - lines.append("") - - # === Cross-Project Patterns section === - lines.append("## Cross-Project Patterns") - lines.append("Tags that span multiple projects:") - lines.append("") - - known_projects = [ - "major-domo", - "paper-dynasty", - "homelab", - "vagabond-rpg", - "foundryvtt", - "strat-gameplay-webapp", - ] - # Build tag -> {project -> count} mapping - tag_project_map: Dict[str, Dict[str, int]] = {} - for mid, entry in entries.items(): - mem_tags = [t.lower().strip() for t in entry.get("tags", [])] - # Identify which projects this memory belongs to - mem_projects = [t for t in mem_tags if t in known_projects] - # For non-project tags, record which projects they appear in - non_project_tags = [t for t in mem_tags if t not in known_projects] - for tag in non_project_tags: - if tag not in tag_project_map: - tag_project_map[tag] = {} - for proj in mem_projects: - tag_project_map[tag][proj] = tag_project_map[tag].get(proj, 0) + 1 - - # Filter to tags spanning 2+ projects, sort by project count then total - cross_project = [] - for tag, projects in tag_project_map.items(): - if len(projects) >= 2: - total = sum(projects.values()) - cross_project.append((tag, projects, total)) - cross_project.sort(key=lambda x: (len(x[1]), x[2]), reverse=True) - - for tag, projects, total in cross_project[:10]: - proj_parts = ", ".join( - f"{p} ({c})" - for p, c in sorted(projects.items(), key=lambda x: x[1], reverse=True) - ) - lines.append(f"- **{tag}**: appears in {proj_parts}") - - if not cross_project: - lines.append("- No cross-project patterns detected yet.") - lines.append("") - - # === Most Accessed section === - lines.append("## Most Accessed") - lines.append("Top 10 memories by access count:") - lines.append("") - - # Sort state entries by access_count descending - accessed = [] - for mid, s in state_entries.items(): - count = s.get("access_count", 0) - if count > 0: - entry = entries.get(mid) - if entry: - accessed.append( - ( - mid, - entry.get("title", "untitled"), - entry.get("path", ""), - count, - ) - ) - accessed.sort(key=lambda x: x[3], reverse=True) - - for mid, title, path, count in accessed[:10]: - lines.append(f"1. [{title}]({path}) - {count} accesses") - - if not accessed: - lines.append("- No access data recorded yet.") - lines.append("") - - # === Recent Insights section === - lines.append("## Recent Insights") - lines.append("Insight-type memories:") - lines.append("") - - insights = [] - for mid, entry in entries.items(): - if entry.get("type") == "insight": - insights.append((mid, entry)) - # Sort by created date descending - insights.sort(key=lambda x: x[1].get("created", ""), reverse=True) - - for mid, entry in insights[:10]: - title = entry.get("title", "untitled") - path = entry.get("path", "") - preview = entry.get("content_preview", "") - if preview: - lines.append(f"- [{title}]({path}) - {preview[:80]}") - else: - lines.append(f"- [{title}]({path})") - - if not insights: - lines.append("- No insight memories stored yet.") - lines.append("") - - # === Consolidation History section === - lines.append("## Consolidation History") - lines.append("") - - merge_count = 0 - if episodes_dir.exists(): - for ep_file in sorted(episodes_dir.glob("*.md")): - try: - text = ep_file.read_text(encoding="utf-8") - for line_text in text.split("\n"): - stripped = line_text.strip() - if stripped.startswith("## ") and "merge" in stripped.lower(): - merge_count += 1 - except OSError: - pass - - lines.append(f"- Total merges performed: {merge_count}") - lines.append("") - - content = "\n".join(lines) - - # Write REFLECTION.md - reflection_path = self.memory_dir / "REFLECTION.md" - reflection_path.write_text(content, encoding="utf-8") - self._git_commit("reflection: regenerate REFLECTION.md", [reflection_path]) - - return content diff --git a/skills/cognitive-memory/cli.py b/skills/cognitive-memory/cli.py deleted file mode 100644 index 01575cb..0000000 --- a/skills/cognitive-memory/cli.py +++ /dev/null @@ -1,500 +0,0 @@ -#!/usr/bin/env python3 -""" -Cognitive Memory - CLI Interface - -Command-line interface for the cognitive memory system. -""" - -import argparse -import json -import sys - -from client import CognitiveMemoryClient -from common import ( - MEMORY_DIR, - VALID_RELATION_TYPES, - VALID_TYPES, - _load_memory_config, - resolve_graph_path, - list_graphs, -) - - -def main(): - parser = argparse.ArgumentParser( - description="Cognitive Memory - Markdown-based memory system with decay scoring", - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument( - "--graph", - default=None, - help="Named memory graph to use (default: 'default')", - ) - subparsers = parser.add_subparsers(dest="command", help="Commands") - - # store - sp = subparsers.add_parser("store", help="Store a new memory") - sp.add_argument( - "--type", "-t", required=True, choices=VALID_TYPES, help="Memory type" - ) - sp.add_argument("--title", required=True, help="Memory title") - sp.add_argument("--content", "-c", required=True, help="Memory content") - sp.add_argument("--tags", help="Comma-separated tags") - sp.add_argument( - "--importance", "-i", type=float, default=0.5, help="Importance 0.0-1.0" - ) - sp.add_argument("--confidence", type=float, default=0.8, help="Confidence 0.0-1.0") - sp.add_argument( - "--episode", - action="store_true", - default=False, - help="Also log an episode entry", - ) - - # recall - sp = subparsers.add_parser("recall", help="Search memories by query") - sp.add_argument("query", help="Search query") - sp.add_argument("--types", help="Comma-separated memory types") - sp.add_argument("--limit", "-n", type=int, default=10, help="Max results") - sp.add_argument( - "--no-semantic", - action="store_true", - default=False, - help="Disable semantic search (keyword-only, faster)", - ) - - # get - sp = subparsers.add_parser("get", help="Get memory by ID") - sp.add_argument("memory_id", help="Memory UUID") - - # relate - sp = subparsers.add_parser("relate", help="Create relationship") - sp.add_argument("from_id", help="Source memory UUID") - sp.add_argument("to_id", help="Target memory UUID") - sp.add_argument("rel_type", choices=VALID_RELATION_TYPES, help="Relationship type") - sp.add_argument("--strength", type=float, default=0.8, help="Strength 0.0-1.0") - sp.add_argument("--context", help="Context description") - sp.add_argument("--description", help="Rich edge description body") - - # edge-get - sp = subparsers.add_parser("edge-get", help="Get edge by ID") - sp.add_argument("edge_id", help="Edge UUID") - - # edge-search - sp = subparsers.add_parser("edge-search", help="Search edges") - sp.add_argument("--query", "-q", help="Text query") - sp.add_argument("--types", help="Comma-separated relation types") - sp.add_argument("--from-id", help="Filter by source memory ID") - sp.add_argument("--to-id", help="Filter by target memory ID") - sp.add_argument("--limit", "-n", type=int, default=20, help="Max results") - - # edge-update - sp = subparsers.add_parser("edge-update", help="Update an edge") - sp.add_argument("edge_id", help="Edge UUID") - sp.add_argument("--description", help="New description body") - sp.add_argument("--strength", type=float, help="New strength 0.0-1.0") - - # edge-delete - sp = subparsers.add_parser("edge-delete", help="Delete an edge") - sp.add_argument("edge_id", help="Edge UUID") - - # search - sp = subparsers.add_parser("search", help="Filter memories") - sp.add_argument("--query", "-q", help="Text query") - sp.add_argument("--types", help="Comma-separated memory types") - sp.add_argument("--tags", help="Comma-separated tags") - sp.add_argument("--min-importance", type=float, help="Minimum importance") - sp.add_argument("--limit", "-n", type=int, default=20, help="Max results") - - # update - sp = subparsers.add_parser("update", help="Update a memory") - sp.add_argument("memory_id", help="Memory UUID") - sp.add_argument("--title", help="New title") - sp.add_argument("--content", help="New content") - sp.add_argument("--tags", help="New tags (comma-separated)") - sp.add_argument("--importance", type=float, help="New importance") - - # delete - sp = subparsers.add_parser("delete", help="Delete a memory") - sp.add_argument("memory_id", help="Memory UUID") - sp.add_argument("--force", "-f", action="store_true", help="Skip confirmation") - - # related - sp = subparsers.add_parser("related", help="Get related memories") - sp.add_argument("memory_id", help="Memory UUID") - sp.add_argument("--types", help="Comma-separated relationship types") - sp.add_argument("--depth", type=int, default=1, help="Traversal depth 1-5") - - # stats - subparsers.add_parser("stats", help="Show statistics") - - # recent - sp = subparsers.add_parser("recent", help="Recently created memories") - sp.add_argument("--limit", "-n", type=int, default=20, help="Max results") - - # decay - subparsers.add_parser("decay", help="Recalculate all decay scores") - - # core - subparsers.add_parser("core", help="Generate CORE.md") - - # episode - sp = subparsers.add_parser("episode", help="Log episode entry") - sp.add_argument("--type", "-t", required=True, help="Entry type") - sp.add_argument("--title", required=True, help="Entry title") - sp.add_argument("--tags", help="Comma-separated tags") - sp.add_argument("--summary", "-s", help="Summary text") - sp.add_argument("--memory-link", help="Path to related memory file") - - # reindex - subparsers.add_parser("reindex", help="Rebuild index from files") - - # embed - embed_parser = subparsers.add_parser( - "embed", help="Generate embeddings for all memories via Ollama" - ) - embed_parser.add_argument( - "--if-changed", - action="store_true", - help="Skip if no memories were added or deleted since last embed", - ) - - # pin - sp = subparsers.add_parser("pin", help="Move memory to vault (never decays)") - sp.add_argument("memory_id", help="Memory UUID") - - # reflect - sp = subparsers.add_parser( - "reflect", help="Review recent memories and identify clusters" - ) - sp.add_argument("--since", help="ISO date (YYYY-MM-DD) to review from") - sp.add_argument( - "--dry-run", action="store_true", help="Preview without updating state" - ) - - # merge - sp = subparsers.add_parser( - "merge", help="Merge two memories (absorb one into another)" - ) - sp.add_argument("keep_id", help="Memory UUID to keep") - sp.add_argument("absorb_id", help="Memory UUID to absorb and delete") - sp.add_argument( - "--dry-run", action="store_true", help="Preview merge without writing" - ) - - # reflection - subparsers.add_parser("reflection", help="Generate REFLECTION.md summary") - - # tags - sp = subparsers.add_parser("tags", help="Tag analysis commands") - tags_sub = sp.add_subparsers(dest="tags_command") - sp2 = tags_sub.add_parser("list", help="List all tags with counts") - sp2.add_argument("--limit", "-n", type=int, default=0, help="Max results (0=all)") - sp3 = tags_sub.add_parser("related", help="Find co-occurring tags") - sp3.add_argument("tag", help="Tag to analyze") - sp3.add_argument("--limit", "-n", type=int, default=0, help="Max results (0=all)") - sp4 = tags_sub.add_parser("suggest", help="Suggest tags for a memory") - sp4.add_argument("memory_id", help="Memory UUID") - - # procedure - sp = subparsers.add_parser( - "procedure", help="Store a procedure memory (convenience wrapper)" - ) - sp.add_argument("--title", required=True, help="Procedure title") - sp.add_argument("--content", "-c", required=True, help="Procedure description") - sp.add_argument("--steps", help="Comma-separated ordered steps") - sp.add_argument("--preconditions", help="Comma-separated preconditions") - sp.add_argument("--postconditions", help="Comma-separated postconditions") - sp.add_argument("--tags", help="Comma-separated tags") - sp.add_argument( - "--importance", "-i", type=float, default=0.5, help="Importance 0.0-1.0" - ) - - # config - sp = subparsers.add_parser("config", help="Manage embedding config") - sp.add_argument("--show", action="store_true", help="Display current config") - sp.add_argument( - "--provider", choices=["ollama", "openai"], help="Set embedding provider" - ) - sp.add_argument("--openai-key", help="Set OpenAI API key") - sp.add_argument( - "--ollama-model", help="Set Ollama model name (e.g. qwen3-embedding:8b)" - ) - - # graphs - subparsers.add_parser("graphs", help="List available memory graphs") - - args = parser.parse_args() - - if not args.command: - parser.print_help() - sys.exit(1) - - graph_path = resolve_graph_path(args.graph) - client = CognitiveMemoryClient(memory_dir=graph_path) - result = None - - if args.command == "store": - tags = [t.strip() for t in args.tags.split(",")] if args.tags else None - memory_id = client.store( - type=args.type, - title=args.title, - content=args.content, - tags=tags, - importance=args.importance, - confidence=args.confidence, - ) - result = {"success": True, "memory_id": memory_id} - - if args.episode: - # Get the relative path from the index for memory_link - index = client._load_index() - entry = index.get("entries", {}).get(memory_id, {}) - rel_path = entry.get("path", "") - - # Truncate content at word boundary for summary (max 100 chars) - summary = args.content.strip()[:100] - if len(args.content.strip()) > 100: - last_space = summary.rfind(" ") - if last_space > 0: - summary = summary[:last_space] - - client.episode( - type=args.type, - title=args.title, - tags=tags or [], - summary=summary, - memory_link=rel_path, - ) - result["episode_logged"] = True - - elif args.command == "recall": - types = [t.strip() for t in args.types.split(",")] if args.types else None - result = client.recall( - args.query, - memory_types=types, - limit=args.limit, - semantic=not args.no_semantic, - ) - - elif args.command == "get": - result = client.get(args.memory_id) - if not result: - result = {"error": "Memory not found"} - - elif args.command == "relate": - edge_id = client.relate( - args.from_id, - args.to_id, - args.rel_type, - strength=args.strength, - context=args.context, - description=args.description, - ) - result = {"success": bool(edge_id), "edge_id": edge_id} - - elif args.command == "edge-get": - result = client.edge_get(args.edge_id) - if not result: - result = {"error": "Edge not found"} - - elif args.command == "edge-search": - types = [t.strip() for t in args.types.split(",")] if args.types else None - result = client.edge_search( - query=args.query, - types=types, - from_id=getattr(args, "from_id", None), - to_id=getattr(args, "to_id", None), - limit=args.limit, - ) - - elif args.command == "edge-update": - success = client.edge_update( - args.edge_id, - description=args.description, - strength=args.strength, - ) - result = {"success": success} - - elif args.command == "edge-delete": - success = client.edge_delete(args.edge_id) - result = {"success": success} - - elif args.command == "search": - types = [t.strip() for t in args.types.split(",")] if args.types else None - tags = [t.strip() for t in args.tags.split(",")] if args.tags else None - result = client.search( - query=args.query, - memory_types=types, - tags=tags, - min_importance=args.min_importance, - limit=args.limit, - ) - - elif args.command == "update": - tags = [t.strip() for t in args.tags.split(",")] if args.tags else None - success = client.update( - args.memory_id, - title=args.title, - content=args.content, - tags=tags, - importance=args.importance, - ) - result = {"success": success} - - elif args.command == "delete": - if not args.force: - mem = client.get(args.memory_id) - if mem: - print(f"Deleting: {mem.get('title')}", file=sys.stderr) - success = client.delete(args.memory_id) - result = {"success": success} - - elif args.command == "related": - types = [t.strip() for t in args.types.split(",")] if args.types else None - result = client.related(args.memory_id, rel_types=types, max_depth=args.depth) - - elif args.command == "stats": - result = client.stats() - - elif args.command == "recent": - result = client.recent(limit=args.limit) - - elif args.command == "decay": - result = client.decay() - - elif args.command == "core": - content = client.core() - # Print path, not content (content is written to file) - result = { - "success": True, - "path": str(client.memory_dir / "CORE.md"), - "chars": len(content), - } - - elif args.command == "episode": - tags = [t.strip() for t in args.tags.split(",")] if args.tags else None - client.episode( - type=args.type, - title=args.title, - tags=tags, - summary=args.summary, - memory_link=args.memory_link, - ) - result = {"success": True} - - elif args.command == "reindex": - count = client.reindex() - result = {"success": True, "indexed": count} - - elif args.command == "embed": - if_changed = getattr(args, "if_changed", False) - if not if_changed: - print( - "Generating embeddings (this may take a while if model needs to be pulled)...", - file=sys.stderr, - ) - result = client.embed(if_changed=if_changed) - - elif args.command == "pin": - success = client.pin(args.memory_id) - result = {"success": success} - - elif args.command == "reflect": - result = client.reflect( - since=args.since, - dry_run=args.dry_run, - ) - - elif args.command == "merge": - result = client.merge( - keep_id=args.keep_id, - absorb_id=args.absorb_id, - dry_run=args.dry_run, - ) - - elif args.command == "reflection": - content = client.reflection_summary() - result = { - "success": True, - "path": str(client.memory_dir / "REFLECTION.md"), - "chars": len(content), - } - - elif args.command == "tags": - if args.tags_command == "list": - result = client.tags_list(limit=args.limit) - elif args.tags_command == "related": - result = client.tags_related(args.tag, limit=args.limit) - elif args.tags_command == "suggest": - result = client.tags_suggest(args.memory_id) - else: - # No subcommand given, print tags help - # Re-parse to get the tags subparser for help output - for action in parser._subparsers._actions: - if isinstance(action, argparse._SubParsersAction): - tags_parser = action.choices.get("tags") - if tags_parser: - tags_parser.print_help() - break - sys.exit(1) - - elif args.command == "procedure": - tags = [t.strip() for t in args.tags.split(",")] if args.tags else None - steps = [s.strip() for s in args.steps.split(",")] if args.steps else None - preconditions = ( - [p.strip() for p in args.preconditions.split(",")] - if args.preconditions - else None - ) - postconditions = ( - [p.strip() for p in args.postconditions.split(",")] - if args.postconditions - else None - ) - memory_id = client.store( - type="procedure", - title=args.title, - content=args.content, - tags=tags, - importance=args.importance, - steps=steps, - preconditions=preconditions, - postconditions=postconditions, - ) - result = {"success": True, "memory_id": memory_id} - - elif args.command == "graphs": - result = list_graphs() - - elif args.command == "config": - config_path = client.memory_dir / "_config.json" - config = _load_memory_config(config_path) - changed = False - - if args.provider: - config["embedding_provider"] = args.provider - changed = True - if args.openai_key: - config["openai_api_key"] = args.openai_key - changed = True - if args.ollama_model: - config["ollama_model"] = args.ollama_model - changed = True - - if changed: - config_path.write_text(json.dumps(config, indent=2)) - result = {"success": True, "updated": True} - elif args.show or not changed: - # Mask API key for display - display = dict(config) - key = display.get("openai_api_key") - if key and isinstance(key, str) and len(key) > 8: - display["openai_api_key"] = key[:4] + "..." + key[-4:] - result = display - - print(json.dumps(result, indent=2, default=str)) - - -if __name__ == "__main__": - main() diff --git a/skills/cognitive-memory/client.py b/skills/cognitive-memory/client.py deleted file mode 100644 index c26885a..0000000 --- a/skills/cognitive-memory/client.py +++ /dev/null @@ -1,1091 +0,0 @@ -#!/usr/bin/env python3 -""" -Cognitive Memory Client - -Markdown-based memory system with decay scoring, episodic logging, and auto-curated CORE.md. -Stores memories as human-readable markdown files with YAML frontmatter. - -Usage: - # CLI - python client.py store --type solution --title "Fixed X" --content "Details..." --tags "python,fix" - python client.py recall "timeout error" - python client.py get - python client.py relate SOLVES - python client.py core - python client.py episode --type fix --title "Fixed reconnection" --tags "discord,python" - - # Python - from client import CognitiveMemoryClient - client = CognitiveMemoryClient() - memory_id = client.store(type="solution", title="...", content="...") -""" - -import json -import os -import subprocess -import sys -import tempfile -import uuid -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - -# Import everything from common for backward compatibility -from common import ( - CORE_MAX_CHARS, - CONFIG_PATH, - DECAY_LAMBDA, - EDGE_FIELD_ORDER, - EDGES_DIR_NAME, - EMBEDDING_MODEL, - EMBEDDING_TIMEOUT, - EMBEDDINGS_PATH, - FIELD_ORDER, - INDEX_PATH, - MEMORY_DIR, - OLLAMA_URL, - OPENAI_EMBED_URL, - OPENAI_MODEL_DEFAULT, - STATE_PATH, - THRESHOLD_ACTIVE, - THRESHOLD_DORMANT, - THRESHOLD_FADING, - TYPE_DIRS, - TYPE_WEIGHTS, - VALID_RELATION_TYPES, - VALID_TYPES, - _cosine_similarity, - _format_yaml_value, - _load_memory_config, - _make_edge_filename, - _needs_quoting, - _ollama_embed, - _openai_embed, - _parse_relations_block, - _parse_scalar, - _quote_yaml, - calculate_decay_score, - make_filename, - parse_frontmatter, - serialize_edge_frontmatter, - serialize_frontmatter, - slugify, -) - -# Import mixins -from edges import EdgesMixin -from embeddings import EmbeddingsMixin -from analysis import AnalysisMixin - -# ============================================================================= -# CLIENT -# ============================================================================= - - -class CognitiveMemoryClient(EdgesMixin, EmbeddingsMixin, AnalysisMixin): - """Client for markdown-based cognitive memory system.""" - - def __init__(self, memory_dir: Optional[Path] = None): - self.memory_dir = memory_dir or MEMORY_DIR - self.index_path = self.memory_dir / "_index.json" - self.state_path = self.memory_dir / "_state.json" - self._embeddings_cache: Optional[Dict] = None - self._embeddings_mtime: float = 0.0 - self._ensure_dirs() - - def _ensure_dirs(self): - """Create directory structure if needed.""" - for type_dir in TYPE_DIRS.values(): - (self.memory_dir / "graph" / type_dir).mkdir(parents=True, exist_ok=True) - (self.memory_dir / "graph" / EDGES_DIR_NAME).mkdir(parents=True, exist_ok=True) - (self.memory_dir / "episodes").mkdir(parents=True, exist_ok=True) - (self.memory_dir / "vault").mkdir(parents=True, exist_ok=True) - - def _load_embeddings_cached(self) -> Optional[Dict]: - """Load _embeddings.json with mtime-based caching. - - Returns the parsed dict, or None if the file doesn't exist or fails to parse. - Only re-reads from disk when the file's mtime has changed. - """ - embeddings_path = self.memory_dir / "_embeddings.json" - if not embeddings_path.exists(): - return None - try: - current_mtime = embeddings_path.stat().st_mtime - except OSError: - return None - if ( - self._embeddings_cache is not None - and current_mtime == self._embeddings_mtime - ): - return self._embeddings_cache - try: - data = json.loads(embeddings_path.read_text()) - self._embeddings_cache = data - self._embeddings_mtime = current_mtime - return data - except (json.JSONDecodeError, OSError): - return None - - # ------------------------------------------------------------------------- - # Index and State management - # ------------------------------------------------------------------------- - - def _load_index(self) -> Dict: - """Load _index.json, return empty structure if missing.""" - if self.index_path.exists(): - try: - return json.loads(self.index_path.read_text()) - except (json.JSONDecodeError, OSError): - pass - return {"version": 2, "updated": "", "count": 0, "entries": {}, "edges": {}} - - def _save_index(self, index: Dict): - """Write _index.json.""" - index["version"] = 2 - index.setdefault("edges", {}) - index["updated"] = datetime.now(timezone.utc).isoformat() - index["count"] = len(index.get("entries", {})) - self.index_path.write_text(json.dumps(index, indent=2, default=str)) - - def _load_state(self) -> Dict: - """Load _state.json, return empty structure if missing.""" - if self.state_path.exists(): - try: - return json.loads(self.state_path.read_text()) - except (json.JSONDecodeError, OSError): - pass - return {"version": 1, "updated": "", "entries": {}} - - def _save_state(self, state: Dict): - """Write _state.json atomically, merging top-level keys to prevent race conditions.""" - # Merge with existing state to preserve keys written by concurrent processes - if self.state_path.exists(): - try: - existing = json.loads(self.state_path.read_text()) - existing.update(state) - state = existing - except (json.JSONDecodeError, OSError): - pass - state["updated"] = datetime.now(timezone.utc).isoformat() - # Atomic write: write to temp file then rename - fd, tmp_path = tempfile.mkstemp(dir=self.memory_dir, suffix=".tmp") - try: - with os.fdopen(fd, "w") as f: - json.dump(state, f, indent=2, default=str) - os.replace(tmp_path, self.state_path) - except Exception: - try: - os.unlink(tmp_path) - except OSError: - pass - raise - - # ------------------------------------------------------------------------- - # File I/O - # ------------------------------------------------------------------------- - - def _read_memory_file(self, path: Path) -> Tuple[Dict[str, Any], str]: - """Read a memory markdown file, return (frontmatter, body).""" - text = path.read_text(encoding="utf-8") - return parse_frontmatter(text) - - def _write_memory_file(self, path: Path, frontmatter: Dict[str, Any], body: str): - """Write a memory markdown file with frontmatter and body.""" - fm_str = serialize_frontmatter(frontmatter) - content = f"{fm_str}\n\n{body.strip()}\n" if body.strip() else f"{fm_str}\n" - path.write_text(content, encoding="utf-8") - - @staticmethod - def _resolve_prefix(partial_id: str, keys) -> Optional[str]: - """Resolve a partial UUID prefix to a full ID (git-style). - - Returns the full key if exactly one match is found, None if zero - matches, or raises ValueError on ambiguous (multiple) matches. - """ - if partial_id in keys: - return partial_id - matches = [k for k in keys if k.startswith(partial_id)] - if len(matches) == 1: - return matches[0] - if len(matches) > 1: - raise ValueError( - f"Ambiguous ID prefix '{partial_id}' matches {len(matches)} " - f"entries: {', '.join(sorted(matches)[:5])}" - ) - return None - - def _resolve_memory_path(self, memory_id: str) -> Optional[Path]: - """Find the file path for a memory by ID using the index.""" - index = self._load_index() - entries = index.get("entries", {}) - full_id = self._resolve_prefix(memory_id, entries) - if full_id: - entry = entries[full_id] - path = self.memory_dir / entry["path"] - if path.exists(): - return path - # Fallback: scan files (slow but reliable) - return self._scan_for_memory(memory_id) - - def _scan_for_memory(self, memory_id: str) -> Optional[Path]: - """Scan graph/ and vault/ directories for a memory file by ID.""" - search_dirs = [self.memory_dir / "graph", self.memory_dir / "vault"] - for search_dir in search_dirs: - if not search_dir.exists(): - continue - for md_file in search_dir.rglob("*.md"): - try: - fm, _ = self._read_memory_file(md_file) - if fm.get("id") == memory_id: - return md_file - except Exception: - continue - return None - - # ------------------------------------------------------------------------- - # Git operations - # ------------------------------------------------------------------------- - - def _git_commit(self, message: str, files: Optional[List[Path]] = None): - """Stage and commit files in the memory repo.""" - try: - # Check if it's a git repo - result = subprocess.run( - ["git", "rev-parse", "--git-dir"], - cwd=str(self.memory_dir), - capture_output=True, - timeout=5, - ) - if result.returncode != 0: - return # Not a git repo, skip - - if files: - for f in files: - rel = f.relative_to(self.memory_dir) - subprocess.run( - ["git", "add", str(rel)], - cwd=str(self.memory_dir), - capture_output=True, - timeout=5, - ) - else: - subprocess.run( - ["git", "add", "-A"], - cwd=str(self.memory_dir), - capture_output=True, - timeout=5, - ) - - subprocess.run( - ["git", "commit", "-m", message, "--allow-empty"], - cwd=str(self.memory_dir), - capture_output=True, - timeout=10, - ) - except (subprocess.TimeoutExpired, FileNotFoundError, OSError): - pass # Git operations are best-effort - - # ------------------------------------------------------------------------- - # Index helpers - # ------------------------------------------------------------------------- - - def _update_index_entry( - self, - memory_id: str, - frontmatter: Dict[str, Any], - rel_path: str, - content_preview: str = "", - ): - """Add or update an entry in the index.""" - index = self._load_index() - entry = { - "title": frontmatter.get("title", ""), - "type": frontmatter.get("type", "general"), - "tags": frontmatter.get("tags", []), - "importance": frontmatter.get("importance", 0.5), - "confidence": frontmatter.get("confidence", 0.8), - "created": frontmatter.get("created", ""), - "updated": frontmatter.get("updated", ""), - "path": rel_path, - "relations": frontmatter.get("relations", []), - "content_preview": content_preview, - } - # Preserve existing content_preview if not provided - if not content_preview: - existing = index.get("entries", {}).get(memory_id, {}) - entry["content_preview"] = existing.get("content_preview", "") - index.setdefault("entries", {})[memory_id] = entry - self._save_index(index) - - def _remove_index_entry(self, memory_id: str): - """Remove an entry from the index.""" - index = self._load_index() - index.get("entries", {}).pop(memory_id, None) - self._save_index(index) - - def _maybe_refresh_decay(self): - """Auto-refresh all decay scores if state is older than 24 hours.""" - if not self.state_path.exists(): - self.decay() - return - try: - state = json.loads(self.state_path.read_text()) - updated_str = state.get("updated", "") - if not updated_str: - self.decay() - return - updated_dt = datetime.fromisoformat(updated_str.replace("Z", "+00:00")) - if updated_dt.tzinfo is None: - updated_dt = updated_dt.replace(tzinfo=timezone.utc) - age_hours = (datetime.now(timezone.utc) - updated_dt).total_seconds() / 3600 - if age_hours > 24: - self.decay() - except (json.JSONDecodeError, ValueError, OSError): - self.decay() - - # ------------------------------------------------------------------------- - # Edge index helpers - # ------------------------------------------------------------------------- - - def _update_edge_index( - self, edge_id: str, edge_data: Dict[str, Any], rel_path: str - ): - """Add or update an edge entry in the index.""" - index = self._load_index() - index.setdefault("edges", {})[edge_id] = { - "type": edge_data.get("type", ""), - "from_id": edge_data.get("from_id", ""), - "from_title": edge_data.get("from_title", ""), - "to_id": edge_data.get("to_id", ""), - "to_title": edge_data.get("to_title", ""), - "strength": edge_data.get("strength", 0.8), - "created": edge_data.get("created", ""), - "updated": edge_data.get("updated", ""), - "path": rel_path, - } - self._save_index(index) - - def _remove_edge_index(self, edge_id: str): - """Remove an edge entry from the index.""" - index = self._load_index() - index.get("edges", {}).pop(edge_id, None) - self._save_index(index) - - def _scan_for_edge(self, edge_id: str) -> Optional[Path]: - """Fallback file scan for an edge by ID if index is stale.""" - edges_dir = self.memory_dir / "graph" / EDGES_DIR_NAME - if not edges_dir.exists(): - return None - for md_file in edges_dir.glob("*.md"): - try: - fm, _ = self._read_memory_file(md_file) - if fm.get("id") == edge_id: - return md_file - except Exception: - continue - return None - - def _resolve_edge_path(self, edge_id: str) -> Optional[Path]: - """Find the file path for an edge by ID using the index.""" - index = self._load_index() - edges = index.get("edges", {}) - full_id = self._resolve_prefix(edge_id, edges) - if full_id: - entry = edges[full_id] - path = self.memory_dir / entry["path"] - if path.exists(): - return path - return self._scan_for_edge(edge_id) - - # ========================================================================= - # PUBLIC API - # ========================================================================= - - def store( - self, - type: str, - title: str, - content: str, - tags: Optional[List[str]] = None, - importance: float = 0.5, - confidence: float = 0.8, - steps: Optional[List[str]] = None, - preconditions: Optional[List[str]] = None, - postconditions: Optional[List[str]] = None, - ) -> str: - """Store a new memory as a markdown file. - - Returns the memory UUID. - """ - if type not in VALID_TYPES: - raise ValueError(f"Invalid type: {type}. Valid: {VALID_TYPES}") - - memory_id = str(uuid.uuid4()) - now = datetime.now(timezone.utc).isoformat() - tags = [t.lower().strip() for t in (tags or [])] - - frontmatter = { - "id": memory_id, - "type": type, - "title": title, - "tags": tags, - "importance": max(0.0, min(1.0, importance)), - "confidence": max(0.0, min(1.0, confidence)), - "created": now, - "updated": now, - } - - # Add procedure-specific fields when type is "procedure" - if type == "procedure": - if steps: - frontmatter["steps"] = steps - if preconditions: - frontmatter["preconditions"] = preconditions - if postconditions: - frontmatter["postconditions"] = postconditions - - # Determine file path - type_dir = TYPE_DIRS[type] - filename = make_filename(title, memory_id) - rel_path = f"graph/{type_dir}/{filename}" - full_path = self.memory_dir / rel_path - - # Write file - self._write_memory_file(full_path, frontmatter, content) - - # Update index with content preview (truncate at word boundary) - preview = content.strip()[:200] - if len(content.strip()) > 200: - last_space = preview.rfind(" ") - if last_space > 0: - preview = preview[:last_space] - self._update_index_entry( - memory_id, frontmatter, rel_path, content_preview=preview - ) - - # Init state entry - state = self._load_state() - state.setdefault("entries", {})[memory_id] = { - "access_count": 0, - "last_accessed": now, - "decay_score": importance * TYPE_WEIGHTS.get(type, 1.0) * 0.5, - } - self._save_state(state) - - # Git commit - self._git_commit(f"store: {title}", [full_path]) - - return memory_id - - def recall( - self, - query: str, - memory_types: Optional[List[str]] = None, - limit: int = 10, - semantic: bool = True, - ) -> List[Dict[str, Any]]: - """Search memories by query, ranked by relevance and decay score. - - When semantic=True and embeddings exist, merges keyword and semantic results. - """ - self._maybe_refresh_decay() - query_lower = query.lower().strip() - terms = query_lower.split() - if not terms: - return [] - - index = self._load_index() - state = self._load_state() - results = [] - - for mid, entry in index.get("entries", {}).items(): - # Filter by type - if memory_types and entry.get("type") not in memory_types: - continue - - # Check decay threshold - skip archived - s = state.get("entries", {}).get(mid, {}) - decay = s.get("decay_score", 0.5) - if decay < THRESHOLD_DORMANT: - continue - - # Score based on term matches - title = (entry.get("title") or "").lower() - tags_str = " ".join(entry.get("tags") or []).lower() - - title_matches = sum(1 for t in terms if t in title) * 3 - tag_matches = sum(1 for t in terms if t in tags_str) * 2 - - score = title_matches + tag_matches - if score == 0: - # Check content_preview from index (no file I/O needed) - preview = (entry.get("content_preview") or "").lower() - preview_matches = sum(1 for t in terms if t in preview) - if preview_matches > 0: - score = preview_matches - else: - continue - - # Weight by decay score - weighted_score = score * (1 + decay) - - results.append( - { - "id": mid, - "type": entry.get("type"), - "title": entry.get("title"), - "tags": entry.get("tags", []), - "importance": entry.get("importance"), - "decay_score": round(decay, 3), - "path": entry.get("path"), - "created": entry.get("created"), - "_score": weighted_score, - } - ) - - results.sort(key=lambda x: x.pop("_score", 0), reverse=True) - keyword_results = results[:limit] - - # Merge with semantic results (on by default) - # Weights: semantic 60%, keyword 40% - # Conceptual matching dominates; keyword acts as precision boost for exact terms - if semantic: - sem_results = self.semantic_recall(query, limit=limit) - if sem_results: - score_map: Dict[str, float] = {} - result_map: Dict[str, Dict] = {} - - # Keyword: normalize rank to 0-1 (rank 1 = 1.0, last = ~0.1) - kw_weight = 0.4 - for i, r in enumerate(keyword_results): - mid = r["id"] - normalized = (limit - i) / limit - score_map[mid] = normalized * kw_weight - result_map[mid] = r - - # Semantic: similarity is already 0-1 - sem_weight = 0.6 - for r in sem_results: - mid = r["id"] - sim = r.get("similarity", 0.0) - sem_score = sim * sem_weight - if mid in score_map: - score_map[mid] += sem_score - result_map[mid]["similarity"] = sim - else: - score_map[mid] = sem_score - idx_entry = index.get("entries", {}).get(mid, {}) - s = state.get("entries", {}).get(mid, {}) - result_map[mid] = { - "id": mid, - "type": r.get("type"), - "title": r.get("title"), - "tags": r.get("tags", []), - "importance": idx_entry.get("importance"), - "decay_score": round(s.get("decay_score", 0.5), 3), - "similarity": sim, - "path": r.get("path"), - "created": idx_entry.get("created"), - } - - # Sort by merged score - merged = sorted( - result_map.values(), - key=lambda x: score_map.get(x["id"], 0), - reverse=True, - ) - return merged[:limit] - - return keyword_results - - def get(self, memory_id: str) -> Optional[Dict[str, Any]]: - """Get a memory by ID, update access count.""" - path = self._resolve_memory_path(memory_id) - if not path: - return None - - fm, body = self._read_memory_file(path) - - # Update access count in state - state = self._load_state() - now = datetime.now(timezone.utc).isoformat() - entry = state.setdefault("entries", {}).setdefault( - memory_id, - { - "access_count": 0, - "last_accessed": now, - "decay_score": 0.5, - }, - ) - entry["access_count"] = entry.get("access_count", 0) + 1 - entry["last_accessed"] = now - - # Recalculate decay score for this single memory (just accessed, so days=0) - importance = fm.get("importance", 0.5) - mem_type = fm.get("type", "general") - type_weight = TYPE_WEIGHTS.get(mem_type, 1.0) - entry["decay_score"] = round( - calculate_decay_score(importance, 0, entry["access_count"], type_weight), 4 - ) - - self._save_state(state) - - return { - "id": fm.get("id", memory_id), - "type": fm.get("type"), - "title": fm.get("title"), - "content": body, - "tags": fm.get("tags", []), - "importance": fm.get("importance"), - "confidence": fm.get("confidence"), - "created": fm.get("created"), - "updated": fm.get("updated"), - "relations": fm.get("relations", []), - "steps": fm.get("steps", []), - "preconditions": fm.get("preconditions", []), - "postconditions": fm.get("postconditions", []), - "access_count": entry["access_count"], - "decay_score": entry.get("decay_score", 0.5), - "path": str(path.relative_to(self.memory_dir)), - } - - def search( - self, - query: Optional[str] = None, - memory_types: Optional[List[str]] = None, - tags: Optional[List[str]] = None, - min_importance: Optional[float] = None, - limit: int = 20, - ) -> List[Dict[str, Any]]: - """Filter memories by type, tags, importance. Optional text query.""" - self._maybe_refresh_decay() - index = self._load_index() - state = self._load_state() - query_terms = query.lower().strip().split() if query else [] - filter_tags = set(t.lower() for t in tags) if tags else None - - results = [] - for mid, entry in index.get("entries", {}).items(): - # Type filter - if memory_types and entry.get("type") not in memory_types: - continue - # Importance filter - if min_importance and entry.get("importance", 0) < min_importance: - continue - # Tag filter - if filter_tags: - mem_tags = set(t.lower() for t in entry.get("tags", [])) - if not mem_tags.intersection(filter_tags): - continue - # Query filter - if query_terms: - title = (entry.get("title") or "").lower() - tags_str = " ".join(entry.get("tags") or []).lower() - searchable = f"{title} {tags_str}" - if not any(t in searchable for t in query_terms): - # Check content_preview from index (no file I/O needed) - preview = (entry.get("content_preview") or "").lower() - if not any(t in preview for t in query_terms): - continue - - s = state.get("entries", {}).get(mid, {}) - results.append( - { - "id": mid, - "type": entry.get("type"), - "title": entry.get("title"), - "tags": entry.get("tags", []), - "importance": entry.get("importance"), - "decay_score": round(s.get("decay_score", 0.5), 3), - "path": entry.get("path"), - "created": entry.get("created"), - } - ) - - # Sort by importance descending - results.sort(key=lambda x: x.get("importance", 0), reverse=True) - return results[:limit] - - def update( - self, - memory_id: str, - title: Optional[str] = None, - content: Optional[str] = None, - tags: Optional[List[str]] = None, - importance: Optional[float] = None, - ) -> bool: - """Update fields of an existing memory.""" - path = self._resolve_memory_path(memory_id) - if not path: - return False - - fm, body = self._read_memory_file(path) - now = datetime.now(timezone.utc).isoformat() - - if title is not None: - fm["title"] = title - if tags is not None: - fm["tags"] = [t.lower().strip() for t in tags] - if importance is not None: - fm["importance"] = max(0.0, min(1.0, importance)) - if content is not None: - body = content - - fm["updated"] = now - self._write_memory_file(path, fm, body) - - # Update index (refresh content_preview if content changed) - rel_path = str(path.relative_to(self.memory_dir)) - preview = "" - if content is not None: - preview = body.strip()[:200] - if len(body.strip()) > 200: - last_space = preview.rfind(" ") - if last_space > 0: - preview = preview[:last_space] - self._update_index_entry(memory_id, fm, rel_path, content_preview=preview) - - self._git_commit(f"update: {fm.get('title', memory_id[:8])}", [path]) - return True - - def delete(self, memory_id: str) -> bool: - """Delete a memory file, cascade-delete edges, remove from index.""" - path = self._resolve_memory_path(memory_id) - if not path: - return False - - fm, _ = self._read_memory_file(path) - title = fm.get("title", memory_id[:8]) - - # Cascade-delete edges where from_id or to_id matches - index = self._load_index() - edges_to_delete = [] - for eid, edata in index.get("edges", {}).items(): - if edata.get("from_id") == memory_id or edata.get("to_id") == memory_id: - edges_to_delete.append(eid) - for eid in edges_to_delete: - self.edge_delete(eid) - - # Remove file - path.unlink() - - # Remove from index and state - self._remove_index_entry(memory_id) - state = self._load_state() - state.get("entries", {}).pop(memory_id, None) - self._save_state(state) - - # Remove incoming references from other memories - index = self._load_index() - for mid, entry in index.get("entries", {}).items(): - rels = entry.get("relations", []) - original_count = len(rels) - rels = [r for r in rels if r.get("target") != memory_id] - if len(rels) != original_count: - entry["relations"] = rels - other_path = self._resolve_memory_path(mid) - if other_path: - try: - other_fm, other_body = self._read_memory_file(other_path) - other_fm["relations"] = [ - r - for r in other_fm.get("relations", []) - if r.get("target") != memory_id - ] - self._write_memory_file(other_path, other_fm, other_body) - except Exception: - pass - self._save_index(index) - - # Git: stage deletion - try: - rel_path = path.relative_to(self.memory_dir) - subprocess.run( - ["git", "rm", "--cached", str(rel_path)], - cwd=str(self.memory_dir), - capture_output=True, - timeout=5, - ) - except Exception: - pass - self._git_commit(f"delete: {title}") - - return True - - def related( - self, - memory_id: str, - rel_types: Optional[List[str]] = None, - max_depth: int = 1, - ) -> List[Dict[str, Any]]: - """Traverse relations from a memory, depth-limited BFS.""" - max_depth = max(1, min(5, max_depth)) - index = self._load_index() - visited = set() - results = [] - - # Resolve partial ID prefix to full UUID - entries = index.get("entries", {}) - resolved = self._resolve_prefix(memory_id, entries) - if resolved: - memory_id = resolved - - def traverse(mid: str, depth: int): - if depth > max_depth or mid in visited: - return - visited.add(mid) - - entry = entries.get(mid) - if not entry: - return - - for rel in entry.get("relations", []): - target_id = rel.get("target") - if not target_id or target_id in visited: - continue - if rel_types and rel.get("type") not in rel_types: - continue - - target_entry = index.get("entries", {}).get(target_id) - if target_entry: - results.append( - { - "id": target_id, - "type": target_entry.get("type"), - "title": target_entry.get("title"), - "relationship": rel.get("type"), - "direction": rel.get("direction", "outgoing"), - "strength": rel.get("strength"), - "depth": depth, - } - ) - traverse(target_id, depth + 1) - - traverse(memory_id, 1) - return results - - def stats(self) -> Dict[str, Any]: - """Get statistics about the memory system.""" - index = self._load_index() - state = self._load_state() - entries = index.get("entries", {}) - - by_type = {} - total_relations = 0 - for entry in entries.values(): - t = entry.get("type", "unknown") - by_type[t] = by_type.get(t, 0) + 1 - total_relations += len(entry.get("relations", [])) - - # Count files per directory - dir_counts = {} - for type_dir in TYPE_DIRS.values(): - d = self.memory_dir / "graph" / type_dir - if d.exists(): - dir_counts[type_dir] = len(list(d.glob("*.md"))) - vault_count = ( - len(list((self.memory_dir / "vault").glob("*.md"))) - if (self.memory_dir / "vault").exists() - else 0 - ) - - # Decay stats - state_entries = state.get("entries", {}) - active = sum( - 1 - for s in state_entries.values() - if s.get("decay_score", 0) >= THRESHOLD_ACTIVE - ) - fading = sum( - 1 - for s in state_entries.values() - if THRESHOLD_FADING <= s.get("decay_score", 0) < THRESHOLD_ACTIVE - ) - dormant = sum( - 1 - for s in state_entries.values() - if THRESHOLD_DORMANT <= s.get("decay_score", 0) < THRESHOLD_FADING - ) - archived = sum( - 1 - for s in state_entries.values() - if s.get("decay_score", 0) < THRESHOLD_DORMANT - ) - - # Unique outgoing relations only (avoid double-counting) - unique_relations = total_relations // 2 if total_relations > 0 else 0 - - return { - "total_memories": len(entries), - "by_type": by_type, - "dir_counts": dir_counts, - "vault_count": vault_count, - "total_relations": unique_relations, - "decay_summary": { - "active": active, - "fading": fading, - "dormant": dormant, - "archived": archived, - }, - "memory_dir": str(self.memory_dir), - } - - def recent(self, limit: int = 20) -> List[Dict[str, Any]]: - """Get most recently created memories.""" - index = self._load_index() - entries = [ - {"id": mid, **entry} for mid, entry in index.get("entries", {}).items() - ] - entries.sort(key=lambda x: x.get("created", ""), reverse=True) - return entries[:limit] - - def episode( - self, - type: str, - title: str, - tags: Optional[List[str]] = None, - summary: Optional[str] = None, - memory_link: Optional[str] = None, - ): - """Append an entry to today's episode file.""" - now_local = datetime.now().astimezone() - today = now_local.strftime("%Y-%m-%d") - time_str = now_local.strftime("%H:%M") - ep_path = self.memory_dir / "episodes" / f"{today}.md" - - tags = tags or [] - tags_str = ", ".join(tags) - - entry_lines = [ - f"## {time_str} - {title}", - f"- **Type:** {type}", - ] - if tags_str: - entry_lines.append(f"- **Tags:** {tags_str}") - if memory_link: - # Extract name from link path - name = Path(memory_link).stem - entry_lines.append(f"- **Memory:** [{name}]({memory_link})") - if summary: - entry_lines.append(f"- **Summary:** {summary}") - - entry_text = "\n".join(entry_lines) - - if ep_path.exists(): - existing = ep_path.read_text(encoding="utf-8") - new_content = f"{existing.rstrip()}\n\n{entry_text}\n" - else: - new_content = f"# {today}\n\n{entry_text}\n" - - ep_path.write_text(new_content, encoding="utf-8") - self._git_commit(f"episode: {title}", [ep_path]) - - def reindex(self) -> int: - """Rebuild _index.json from all markdown files. Recovery command.""" - index = {"version": 2, "updated": "", "count": 0, "entries": {}, "edges": {}} - count = 0 - - search_dirs = [ - ("graph", self.memory_dir / "graph"), - ("vault", self.memory_dir / "vault"), - ] - - edges_dir = self.memory_dir / "graph" / EDGES_DIR_NAME - - for _prefix, search_dir in search_dirs: - if not search_dir.exists(): - continue - for md_file in search_dir.rglob("*.md"): - # Skip edge files — handled separately - if edges_dir.exists() and md_file.parent == edges_dir: - continue - try: - fm, body = self._read_memory_file(md_file) - mid = fm.get("id") - if not mid: - continue - rel_path = str(md_file.relative_to(self.memory_dir)) - preview = body.strip()[:200] - if len(body.strip()) > 200: - last_space = preview.rfind(" ") - if last_space > 0: - preview = preview[:last_space] - index["entries"][mid] = { - "title": fm.get("title", ""), - "type": fm.get("type", "general"), - "tags": fm.get("tags", []), - "importance": fm.get("importance", 0.5), - "confidence": fm.get("confidence", 0.8), - "created": fm.get("created", ""), - "updated": fm.get("updated", ""), - "path": rel_path, - "relations": fm.get("relations", []), - "content_preview": preview, - } - count += 1 - except Exception as e: - print(f"Warning: Failed to index {md_file}: {e}", file=sys.stderr) - - # Scan edge files - if edges_dir.exists(): - for md_file in edges_dir.glob("*.md"): - try: - fm, _ = self._read_memory_file(md_file) - eid = fm.get("id") - if not eid: - continue - rel_path = str(md_file.relative_to(self.memory_dir)) - index["edges"][eid] = { - "type": fm.get("type", ""), - "from_id": fm.get("from_id", ""), - "from_title": fm.get("from_title", ""), - "to_id": fm.get("to_id", ""), - "to_title": fm.get("to_title", ""), - "strength": fm.get("strength", 0.8), - "created": fm.get("created", ""), - "updated": fm.get("updated", ""), - "path": rel_path, - } - except Exception as e: - print( - f"Warning: Failed to index edge {md_file}: {e}", file=sys.stderr - ) - - self._save_index(index) - return count - - def pin(self, memory_id: str) -> bool: - """Move a memory to vault/ (never decays).""" - path = self._resolve_memory_path(memory_id) - if not path: - return False - - fm, body = self._read_memory_file(path) - vault_dir = self.memory_dir / "vault" - vault_dir.mkdir(parents=True, exist_ok=True) - - new_path = vault_dir / path.name - self._write_memory_file(new_path, fm, body) - - # Remove old file - old_rel = str(path.relative_to(self.memory_dir)) - path.unlink() - - # Update index with new path - new_rel = str(new_path.relative_to(self.memory_dir)) - self._update_index_entry(memory_id, fm, new_rel) - - # Set decay to infinity in state - state = self._load_state() - state.setdefault("entries", {}).setdefault(memory_id, {})["decay_score"] = 999.0 - self._save_state(state) - - self._git_commit(f"pin: {fm.get('title', memory_id[:8])}", [new_path]) - return True - - -if __name__ == "__main__": - from cli import main - - main() diff --git a/skills/cognitive-memory/common.py b/skills/cognitive-memory/common.py deleted file mode 100644 index 3d587fe..0000000 --- a/skills/cognitive-memory/common.py +++ /dev/null @@ -1,581 +0,0 @@ -""" -Cognitive Memory - Common Constants & Helpers - -Module-level constants, YAML parsing, slug generation, decay calculation, -embedding helpers, and cosine similarity. Shared by all other modules. -""" - -import json -import math -import os -import re -import urllib.request -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple -from urllib.error import URLError - -# ============================================================================= -# CONSTANTS -# ============================================================================= - -# Data directory resolution order: -# 1. COGNITIVE_MEMORY_DIR env var (explicit override) -# 2. XDG_DATA_HOME/cognitive-memory/ (Linux standard) -# 3. ~/.local/share/cognitive-memory/ (XDG default) -_env_dir = os.environ.get("COGNITIVE_MEMORY_DIR", "") -if _env_dir: - MEMORY_DIR = Path(_env_dir).expanduser() -else: - _xdg_data = os.environ.get("XDG_DATA_HOME", "") or str( - Path.home() / ".local" / "share" - ) - MEMORY_DIR = Path(_xdg_data) / "cognitive-memory" - -INDEX_PATH = MEMORY_DIR / "_index.json" -STATE_PATH = MEMORY_DIR / "_state.json" -EMBEDDINGS_PATH = MEMORY_DIR / "_embeddings.json" -OLLAMA_URL = "http://localhost:11434" -EMBEDDING_MODEL = "nomic-embed-text" -EMBEDDING_TIMEOUT = 5 # seconds -CONFIG_PATH = MEMORY_DIR / "_config.json" -OPENAI_EMBED_URL = "https://api.openai.com/v1/embeddings" -OPENAI_MODEL_DEFAULT = "text-embedding-3-small" - -# Memory type -> directory name mapping -TYPE_DIRS = { - "solution": "solutions", - "fix": "fixes", - "decision": "decisions", - "configuration": "configurations", - "problem": "problems", - "workflow": "workflows", - "code_pattern": "code-patterns", - "error": "errors", - "general": "general", - "procedure": "procedures", - "insight": "insights", -} - -VALID_TYPES = list(TYPE_DIRS.keys()) - -# Decay model type weights -TYPE_WEIGHTS = { - "decision": 1.3, - "solution": 1.2, - "insight": 1.25, - "code_pattern": 1.1, - "configuration": 1.1, - "fix": 1.0, - "workflow": 1.0, - "problem": 0.9, - "error": 0.8, - "general": 0.8, - "procedure": 1.4, -} - -DECAY_LAMBDA = 0.03 # Half-life ~23 days - -# Decay score thresholds -THRESHOLD_ACTIVE = 0.5 -THRESHOLD_FADING = 0.2 -THRESHOLD_DORMANT = 0.05 - -# Relationship types (subset from MemoryGraph, focused on most useful) -VALID_RELATION_TYPES = [ - "SOLVES", - "CAUSES", - "BUILDS_ON", - "ALTERNATIVE_TO", - "REQUIRES", - "FOLLOWS", - "RELATED_TO", -] - -# Edge file constants -EDGES_DIR_NAME = "edges" -EDGE_FIELD_ORDER = [ - "id", - "type", - "from_id", - "from_title", - "to_id", - "to_title", - "strength", - "created", - "updated", -] - -# Frontmatter field order for consistent output -FIELD_ORDER = [ - "id", - "type", - "title", - "tags", - "importance", - "confidence", - "steps", - "preconditions", - "postconditions", - "created", - "updated", - "relations", -] - -# CORE.md token budget (approximate, 1 token ~= 4 chars) -CORE_MAX_CHARS = 12000 # ~3K tokens - -GRAPHS_BASE_DIR = MEMORY_DIR.parent / "cognitive-memory-graphs" - - -# ============================================================================= -# YAML FRONTMATTER PARSING (stdlib only) -# ============================================================================= - - -def _needs_quoting(s: str) -> bool: - """Check if a YAML string value needs quoting.""" - if not s: - return True - if any(c in s for c in ":#{}[]&*?|>!%@`"): - return True - try: - float(s) - return True - except ValueError: - pass - if s.lower() in ("true", "false", "null", "yes", "no", "on", "off"): - return True - return False - - -def _quote_yaml(s: str) -> str: - """Quote a string for YAML, escaping internal quotes.""" - escaped = s.replace("\\", "\\\\").replace('"', '\\"') - return f'"{escaped}"' - - -def _format_yaml_value(value: Any, force_quote: bool = False) -> str: - """Format a Python value for YAML output.""" - if value is None: - return "null" - if isinstance(value, bool): - return "true" if value else "false" - if isinstance(value, (int, float)): - return str(value) - s = str(value) - if force_quote or _needs_quoting(s): - return _quote_yaml(s) - return s - - -def _parse_scalar(value: str) -> Any: - """Parse a YAML scalar value to Python type.""" - v = value.strip() - if not v or v == "null": - return None - if v == "true": - return True - if v == "false": - return False - # Try numeric - try: - if "." in v: - return float(v) - return int(v) - except ValueError: - pass - # Strip quotes - if (v.startswith('"') and v.endswith('"')) or ( - v.startswith("'") and v.endswith("'") - ): - return v[1:-1] - return v - - -def serialize_frontmatter(data: Dict[str, Any]) -> str: - """Serialize a dict to YAML frontmatter string (between --- markers).""" - lines = ["---"] - - for key in FIELD_ORDER: - if key not in data: - continue - value = data[key] - - if key == "tags" and isinstance(value, list): - if value: - items = ", ".join(_format_yaml_value(t) for t in value) - lines.append(f"tags: [{items}]") - else: - lines.append("tags: []") - - elif key in ("steps", "preconditions", "postconditions") and isinstance( - value, list - ): - if not value: - continue - lines.append(f"{key}:") - for item in value: - lines.append(f" - {_format_yaml_value(str(item), force_quote=True)}") - - elif key == "relations" and isinstance(value, list): - if not value: - continue - lines.append("relations:") - for rel in value: - first = True - for rk in [ - "target", - "type", - "direction", - "strength", - "context", - "edge_id", - ]: - if rk not in rel: - continue - rv = rel[rk] - prefix = " - " if first else " " - force_q = rk in ("context",) - lines.append( - f"{prefix}{rk}: {_format_yaml_value(rv, force_quote=force_q)}" - ) - first = False - - elif key == "title": - lines.append(f"title: {_format_yaml_value(value, force_quote=True)}") - - else: - lines.append(f"{key}: {_format_yaml_value(value)}") - - lines.append("---") - return "\n".join(lines) - - -def parse_frontmatter(text: str) -> Tuple[Dict[str, Any], str]: - """Parse YAML frontmatter and body from markdown text. - - Returns (frontmatter_dict, body_text). - """ - if not text.startswith("---\n"): - return {}, text - - # Find closing --- - end_match = re.search(r"\n---\s*\n", text[3:]) - if not end_match: - # Try end of string - if text.rstrip().endswith("---"): - end_pos = text.rstrip().rfind("\n---") - if end_pos <= 3: - return {}, text - fm_text = text[4:end_pos] - body = "" - else: - return {}, text - else: - end_pos = end_match.start() + 3 # Offset from text[3:] - fm_text = text[4:end_pos] - body = text[end_pos + end_match.end() - end_match.start() :] - - body = body.lstrip("\n") - data = {} - lines = fm_text.split("\n") - i = 0 - - while i < len(lines): - line = lines[i] - - # Skip empty lines - if not line.strip(): - i += 1 - continue - - # Must be a top-level key (no leading whitespace) - if line[0] == " ": - i += 1 - continue - - if ":" not in line: - i += 1 - continue - - key, _, rest = line.partition(":") - key = key.strip() - rest = rest.strip() - - if not rest: - # Block value - collect indented lines - block_lines = [] - j = i + 1 - while j < len(lines) and lines[j] and lines[j][0] == " ": - block_lines.append(lines[j]) - j += 1 - - if key == "relations": - data["relations"] = _parse_relations_block(block_lines) - elif block_lines and block_lines[0].strip().startswith("- "): - # Simple list - data[key] = [ - _parse_scalar(bl.strip().lstrip("- ")) - for bl in block_lines - if bl.strip().startswith("- ") - ] - else: - data[key] = None - i = j - continue - - # Inline list: [a, b, c] - if rest.startswith("[") and rest.endswith("]"): - inner = rest[1:-1] - if inner.strip(): - data[key] = [ - _parse_scalar(v.strip()) for v in inner.split(",") if v.strip() - ] - else: - data[key] = [] - else: - data[key] = _parse_scalar(rest) - - i += 1 - - return data, body - - -def _parse_relations_block(lines: List[str]) -> List[Dict[str, Any]]: - """Parse a YAML block list of relation dicts.""" - relations = [] - current = None - - for line in lines: - stripped = line.strip() - if not stripped: - continue - - if stripped.startswith("- "): - # New relation entry - current = {} - relations.append(current) - # Parse key:value on same line as - - rest = stripped[2:] - if ":" in rest: - k, _, v = rest.partition(":") - current[k.strip()] = _parse_scalar(v.strip()) - elif current is not None and ":" in stripped: - k, _, v = stripped.partition(":") - current[k.strip()] = _parse_scalar(v.strip()) - - return relations - - -# ============================================================================= -# HELPER FUNCTIONS -# ============================================================================= - - -def slugify(text: str, max_length: int = 60) -> str: - """Convert text to a URL-friendly slug.""" - text = text.lower().strip() - text = re.sub(r"[^\w\s-]", "", text) - text = re.sub(r"[\s_]+", "-", text) - text = re.sub(r"-+", "-", text) - text = text.strip("-") - if len(text) > max_length: - text = text[:max_length].rstrip("-") - return text or "untitled" - - -def make_filename(title: str, memory_id: str) -> str: - """Create a filename from title and UUID suffix.""" - slug = slugify(title) - suffix = memory_id[:6] - return f"{slug}-{suffix}.md" - - -def calculate_decay_score( - importance: float, days_since_access: float, access_count: int, type_weight: float -) -> float: - """Calculate decay score for a memory. - - decay_score = importance * e^(-lambda * days) * log2(access_count + 1) * type_weight - """ - time_factor = math.exp(-DECAY_LAMBDA * days_since_access) - usage_factor = math.log2(access_count + 1) if access_count > 0 else 1.0 - return importance * time_factor * usage_factor * type_weight - - -def _ollama_embed( - texts: List[str], - model: str = EMBEDDING_MODEL, - timeout: int = EMBEDDING_TIMEOUT, -) -> Optional[List[List[float]]]: - """Get embeddings from Ollama for a list of texts. - - Returns list of embedding vectors, or None if Ollama is unavailable. - """ - try: - payload = json.dumps({"model": model, "input": texts}).encode("utf-8") - req = urllib.request.Request( - f"{OLLAMA_URL}/api/embed", - data=payload, - headers={"Content-Type": "application/json"}, - method="POST", - ) - with urllib.request.urlopen(req, timeout=timeout) as resp: - if resp.status != 200: - return None - data = json.loads(resp.read().decode("utf-8")) - embeddings = data.get("embeddings") - if embeddings and isinstance(embeddings, list): - return embeddings - return None - except ( - ConnectionRefusedError, - URLError, - TimeoutError, - OSError, - json.JSONDecodeError, - ValueError, - KeyError, - ): - return None - - -def _load_memory_config(config_path: Optional[Path] = None) -> Dict[str, Any]: - """Read _config.json, return defaults if missing.""" - path = config_path or CONFIG_PATH - defaults = { - "embedding_provider": "ollama", - "openai_api_key": None, - "ollama_model": EMBEDDING_MODEL, - "openai_model": OPENAI_MODEL_DEFAULT, - } - if path.exists(): - try: - data = json.loads(path.read_text()) - for k, v in defaults.items(): - data.setdefault(k, v) - return data - except (json.JSONDecodeError, OSError): - pass - return defaults - - -def _openai_embed( - texts: List[str], - api_key: str, - model: str = OPENAI_MODEL_DEFAULT, - timeout: int = 30, -) -> Optional[List[List[float]]]: - """Get embeddings from OpenAI API (stdlib-only, same interface as _ollama_embed).""" - try: - payload = json.dumps({"input": texts, "model": model}).encode("utf-8") - req = urllib.request.Request( - OPENAI_EMBED_URL, - data=payload, - headers={ - "Content-Type": "application/json", - "Authorization": f"Bearer {api_key}", - }, - method="POST", - ) - with urllib.request.urlopen(req, timeout=timeout) as resp: - if resp.status != 200: - return None - data = json.loads(resp.read().decode("utf-8")) - items = data.get("data", []) - if items and isinstance(items, list): - # Sort by index to ensure order matches input - items.sort(key=lambda x: x.get("index", 0)) - return [item["embedding"] for item in items] - return None - except ( - ConnectionRefusedError, - URLError, - TimeoutError, - OSError, - json.JSONDecodeError, - ValueError, - KeyError, - ): - return None - - -def _cosine_similarity(a: List[float], b: List[float]) -> float: - """Compute cosine similarity between two vectors.""" - dot = sum(x * y for x, y in zip(a, b)) - norm_a = math.sqrt(sum(x * x for x in a)) - norm_b = math.sqrt(sum(x * x for x in b)) - if norm_a == 0.0 or norm_b == 0.0: - return 0.0 - return dot / (norm_a * norm_b) - - -def _make_edge_filename( - from_title: str, rel_type: str, to_title: str, edge_id: str -) -> str: - """Produce edge filename: {from-slug}--{TYPE}--{to-slug}-{6char}.md""" - from_slug = slugify(from_title, max_length=30) - to_slug = slugify(to_title, max_length=30) - suffix = edge_id[:6] - return f"{from_slug}--{rel_type}--{to_slug}-{suffix}.md" - - -def serialize_edge_frontmatter(data: Dict[str, Any]) -> str: - """Serialize an edge dict to YAML frontmatter string.""" - lines = ["---"] - for key in EDGE_FIELD_ORDER: - if key not in data: - continue - value = data[key] - if key in ("from_title", "to_title"): - lines.append(f"{key}: {_format_yaml_value(value, force_quote=True)}") - else: - lines.append(f"{key}: {_format_yaml_value(value)}") - lines.append("---") - return "\n".join(lines) - - -def load_graph_config(config_path: Optional[Path] = None) -> Dict[str, Dict[str, Any]]: - """Load named graphs config from _config.json 'graphs' key.""" - cfg = _load_memory_config(config_path) - return cfg.get("graphs", {}) - - -def resolve_graph_path( - graph_name: Optional[str], config_path: Optional[Path] = None -) -> Path: - """Resolve graph name to directory path. None/'default' → MEMORY_DIR.""" - if not graph_name or graph_name == "default": - return MEMORY_DIR - graphs = load_graph_config(config_path) - if graph_name in graphs: - p = graphs[graph_name].get("path", "") - if p: - return Path(p).expanduser() - # Convention: sibling of MEMORY_DIR - return GRAPHS_BASE_DIR / graph_name - - -def list_graphs(config_path: Optional[Path] = None) -> List[Dict[str, Any]]: - """List all known graphs: default + configured + discovered on disk.""" - result = [{"name": "default", "path": str(MEMORY_DIR)}] - seen = {"default"} - - # From config - graphs = load_graph_config(config_path) - for name, cfg in graphs.items(): - if name not in seen: - p = cfg.get("path", "") - path = str(Path(p).expanduser()) if p else str(GRAPHS_BASE_DIR / name) - result.append({"name": name, "path": path}) - seen.add(name) - - # Discover on disk - if GRAPHS_BASE_DIR.exists(): - for d in sorted(GRAPHS_BASE_DIR.iterdir()): - if d.is_dir() and d.name not in seen: - result.append({"name": d.name, "path": str(d)}) - seen.add(d.name) - - return result diff --git a/skills/cognitive-memory/dev/PROJECT_PLAN.json b/skills/cognitive-memory/dev/PROJECT_PLAN.json deleted file mode 100644 index e48d128..0000000 --- a/skills/cognitive-memory/dev/PROJECT_PLAN.json +++ /dev/null @@ -1,461 +0,0 @@ -{ - "meta": { - "version": "1.0.0", - "created": "2026-02-13", - "lastUpdated": "2026-02-13", - "planType": "feature", - "project": "cognitive-memory", - "description": "Remaining phases for the cognitive-memory skill. Phase 1 (core system + migration) is complete. This plan covers Phase 1 polish, Phase 2 (reflection + procedural + search), and Phase 3 (identity + economy + multi-agent).", - "totalEstimatedHours": 52, - "totalTasks": 18, - "completedTasks": 10, - "note": "SKILL.md, SCHEMA.md, CLAUDE.md, and feature.json updated to v2.0.0 with all Phase 2 features" - }, - "categories": { - "critical": "Must fix - blocking daily usage", - "high": "Phase 1 polish - improves existing system significantly", - "medium": "Phase 2 - reflection, procedural memory, semantic search", - "low": "Phase 3 - identity, token economy, multi-agent", - "feature": "Nice-to-have enhancements" - }, - "tasks": [ - { - "id": "HIGH-001", - "name": "Add one-line summaries to CORE.md entries", - "description": "CORE.md currently lists memory titles and tags but no summary. Each entry should include a brief one-line summary extracted from the memory content (first sentence or explicit summary field). This makes CORE.md useful at a glance without opening individual files.", - "category": "high", - "priority": 1, - "completed": true, - "tested": true, - "dependencies": [], - "files": [ - { - "path": "~/.claude/skills/cognitive-memory/client.py", - "lines": [ - 539, - 580 - ], - "issue": "core() method builds entries from index only, doesn't read file content for summaries" - } - ], - "suggestedFix": "In core(), read the first sentence of each memory body (or a 'summary' frontmatter field if present) and append it after the link. Format: '- [title](path) - one-line summary'", - "estimatedHours": 1, - "notes": "Budget is ~3K tokens. With summaries, may need to reduce entries per section from 15 to 10." - }, - { - "id": "HIGH-002", - "name": "Add scheduled decay recalculation", - "description": "Decay scores only update when 'decay' command is run manually. Should auto-recalculate during recall/search operations if scores are stale (>24h since last calculation). Currently 56 memories are archived and 187 dormant - accessing one should refresh its score automatically.", - "category": "high", - "priority": 2, - "completed": true, - "tested": true, - "dependencies": [], - "files": [ - { - "path": "~/.claude/skills/cognitive-memory/client.py", - "lines": [ - 283, - 350 - ], - "issue": "recall() and search() read decay scores from state but never recalculate them" - } - ], - "suggestedFix": "Add a _maybe_refresh_decay() check at the start of recall/search. If _state.json 'updated' is >24h old, run decay(). Also, when get() updates access_count, recalculate that memory's individual decay score immediately.", - "estimatedHours": 2, - "notes": "Be careful not to make recall() slow. Full decay recalc over 313 memories should be fast (<100ms) since it's just math, no file I/O." - }, - { - "id": "HIGH-003", - "name": "Add 'merge' command to consolidate duplicate/related memories", - "description": "With 313 memories, there are inevitably duplicates and near-duplicates (e.g., multiple PostgreSQL migration fixes). A 'merge' command should combine two memories into one, preserving the best content from each, merging tags, and keeping all relations.", - "category": "high", - "priority": 3, - "completed": true, - "tested": true, - "dependencies": [], - "files": [ - { - "path": "~/.claude/skills/cognitive-memory/client.py", - "issue": "No merge functionality exists" - } - ], - "suggestedFix": "Add merge(keep_id, absorb_id) method: read both memories, combine content (keep_id content + separator + absorb_id content), merge tags (union), take max importance, redirect all absorb_id relations to keep_id, delete absorb_id file. Add 'merge' CLI subcommand.", - "estimatedHours": 3, - "notes": "Should also update any other memories that reference absorb_id in their relations to point to keep_id instead." - }, - { - "id": "HIGH-004", - "name": "Improve recall content search performance", - "description": "Currently recall() falls back to reading individual markdown files for body content search when title/tag matching fails. With 313+ files this is slow. Should use _index.json with a content preview field, or build a simple inverted index.", - "category": "high", - "priority": 4, - "completed": true, - "tested": true, - "dependencies": [], - "files": [ - { - "path": "~/.claude/skills/cognitive-memory/client.py", - "lines": [ - 310, - 325 - ], - "issue": "recall() reads individual files for content matching - O(n) file I/O" - } - ], - "suggestedFix": "Option A: Add 'content_preview' (first 200 chars) to _index.json entries during reindex. Option B: Build a simple inverted word index in _index.json mapping common terms to memory IDs. Option A is simpler and sufficient for 300-500 memories.", - "estimatedHours": 2, - "notes": "Option B becomes necessary if memory count grows past ~1000. For now, Option A is the right call." - }, - { - "id": "MED-001", - "name": "Reflection cycle - automated consolidation sessions", - "description": "Structured process where the agent reviews recent memories, identifies patterns across them, consolidates duplicates, and generates insight memories. Triggered manually ('reflect') or automatically after N new memories since last reflection.", - "category": "medium", - "priority": 5, - "completed": true, - "tested": true, - "dependencies": [ - "HIGH-003" - ], - "files": [ - { - "path": "~/.claude/skills/cognitive-memory/client.py", - "issue": "No reflection functionality exists" - } - ], - "suggestedFix": "Add reflect() method and 'reflect' CLI command. Steps: (1) Load memories created since last reflection, (2) Group by tags/project, (3) Identify clusters of related memories, (4) For each cluster, generate a consolidated 'insight' memory (new type) summarizing the pattern, (5) Create BUILDS_ON relations from insight to source memories, (6) Log reflection as episode entry, (7) Store last_reflection timestamp in _state.json.", - "estimatedHours": 6, - "notes": "The actual insight generation requires Claude to analyze the cluster - this command should output the cluster data and prompt the agent to create the insight, not try to auto-generate it. Phase 2 core feature." - }, - { - "id": "MED-002", - "name": "Procedural memory store - learned workflow patterns", - "description": "New memory type 'procedure' that encodes multi-step workflows the agent has learned. Stored in graph/procedures/ with special frontmatter fields: steps (ordered list), preconditions, postconditions, success_rate. Procedures are higher-weight in decay (1.4) since they encode reusable operational knowledge.", - "category": "medium", - "priority": 6, - "completed": true, - "tested": true, - "dependencies": [], - "files": [ - { - "path": "~/.claude/skills/cognitive-memory/client.py", - "issue": "No procedure type or special handling exists" - } - ], - "suggestedFix": "Add 'procedure' to VALID_TYPES and TYPE_DIRS (graph/procedures/). Add TYPE_WEIGHTS['procedure'] = 1.4. Extend frontmatter to support 'steps' field (ordered list of strings). Add 'procedure' CLI subcommand that prompts for step-by-step input. Update CORE.md generation to include a 'Key Procedures' section.", - "estimatedHours": 4, - "notes": "Procedures should be extractable from episode logs - if the same sequence of actions appears in multiple episodes, suggest creating a procedure." - }, - { - "id": "MED-003", - "name": "Embedding-based semantic search via local model", - "description": "Use a local embedding model (e.g., all-MiniLM-L6-v2 via sentence-transformers, or Ollama embeddings) to enable semantic search that finds conceptually similar memories even without keyword overlap. Store embeddings in a separate _embeddings.json file.", - "category": "medium", - "priority": 7, - "completed": true, - "tested": true, - "dependencies": [ - "HIGH-004" - ], - "files": [ - { - "path": "~/.claude/skills/cognitive-memory/client.py", - "issue": "Search is keyword-only, no semantic understanding" - } - ], - "suggestedFix": "Create embeddings.py module. On 'embed' command: (1) Load all memories, (2) Generate embeddings for title + first 200 chars of content, (3) Store in _embeddings.json (gitignored). On recall: compute query embedding, cosine similarity against all stored embeddings, merge scores with keyword search. Use Ollama API if available (check localhost:11434), fall back to keyword-only.", - "estimatedHours": 6, - "notes": "Keep this optional - keyword search must remain the default. Ollama is already running on the homelab. Embedding model choice: nomic-embed-text or all-minilm via Ollama. _embeddings.json should be gitignored and regeneratable." - }, - { - "id": "MED-004", - "name": "Auto-episode logging from git commits", - "description": "Automatically create episode entries when memories are stored after git commits. Detect when the context is a post-commit storage trigger and auto-populate episode fields from commit metadata (branch, files changed, commit message).", - "category": "medium", - "priority": 8, - "completed": true, - "tested": true, - "dependencies": [], - "files": [ - { - "path": "~/.claude/skills/cognitive-memory/client.py", - "lines": [ - 590 - ], - "issue": "store() doesn't auto-log episodes" - } - ], - "suggestedFix": "Add optional --episode flag to store command. When set, automatically append an episode entry after storing the memory. Include the memory file path as the memory_link. This avoids requiring two separate CLI calls.", - "estimatedHours": 1, - "notes": "Simple quality-of-life improvement. Most stores should also log an episode." - }, - { - "id": "MED-005", - "name": "REFLECTION.md - periodic reflection summary", - "description": "Similar to CORE.md but focused on patterns and insights discovered during reflection cycles. Auto-generated file that tracks: recurring themes, cross-project patterns, most accessed memories, and memories that were consolidated.", - "category": "medium", - "priority": 9, - "completed": true, - "tested": true, - "dependencies": [ - "MED-001" - ], - "files": [ - { - "path": "~/.claude/memory/REFLECTION.md", - "issue": "File does not exist" - } - ], - "suggestedFix": "Generate during reflect() command. Structure: '## Themes' (tag co-occurrence analysis), '## Cross-Project Patterns' (memories that share tags across different projects), '## Most Accessed' (top 10 by access_count), '## Consolidated' (memories merged in this reflection). Store at ~/.claude/memory/REFLECTION.md alongside CORE.md.", - "estimatedHours": 3, - "notes": "Depends on reflection cycle being implemented first." - }, - { - "id": "MED-006", - "name": "Tag co-occurrence analysis and suggestions", - "description": "Analyze tag patterns across memories to suggest related memories during store and identify under-tagged memories. Build a tag co-occurrence matrix that reveals hidden connections (e.g., 'redis' frequently co-occurs with 'timeout' and 'production').", - "category": "medium", - "priority": 10, - "completed": true, - "tested": true, - "dependencies": [], - "files": [ - { - "path": "~/.claude/skills/cognitive-memory/client.py", - "issue": "No tag analysis exists" - } - ], - "suggestedFix": "Add 'tags' CLI command with subcommands: 'tags list' (all tags with counts), 'tags related ' (co-occurring tags), 'tags suggest ' (suggest additional tags based on content and co-occurrence). Build co-occurrence matrix from _index.json during reindex.", - "estimatedHours": 3, - "notes": "Useful for maintaining tag hygiene and discovering patterns." - }, - { - "id": "LOW-001", - "name": "IDENTITY.md - agent identity and preferences tracking", - "description": "Auto-maintained markdown file that tracks the agent's learned identity: user preferences, communication style, project familiarity, skill proficiencies, and interaction patterns. Updated during reflection cycles based on accumulated memories.", - "category": "low", - "priority": 11, - "completed": false, - "tested": false, - "dependencies": [ - "MED-001" - ], - "files": [ - { - "path": "~/.claude/memory/IDENTITY.md", - "issue": "File does not exist" - } - ], - "suggestedFix": "Generate during reflection cycle. Sections: '## User Profile' (Cal's preferences from memories), '## Project Familiarity' (projects ranked by memory count and recency), '## Interaction Patterns' (common workflows, preferred tools), '## Learned Preferences' (extracted from decision memories). Load alongside CORE.md at session start.", - "estimatedHours": 4, - "notes": "Phase 3. Should be lightweight and focused on actionable context, not personality. Maximum ~1K tokens to avoid bloating system prompt." - }, - { - "id": "LOW-002", - "name": "Token economy - budget system for reflections", - "description": "Assign token costs to reflection operations and maintain a budget that replenishes over time. Prevents excessive reflection runs and encourages efficient memory management. Budget tracked in _state.json.", - "category": "low", - "priority": 12, - "completed": false, - "tested": false, - "dependencies": [ - "MED-001" - ], - "files": [ - { - "path": "~/.claude/skills/cognitive-memory/client.py", - "issue": "No budget tracking exists" - } - ], - "suggestedFix": "Add 'budget' section to _state.json: {tokens_remaining, tokens_max, last_refill, refill_rate}. Reflection costs: full reflect = 100 tokens, merge = 10, core regen = 5. Budget refills at 50 tokens/day. When budget is exhausted, reflection operations are blocked with a warning. 'budget' CLI command shows current balance.", - "estimatedHours": 3, - "notes": "Phase 3. Mainly useful if autonomous reflection is implemented (cron-triggered). For manual reflection, this may be unnecessary." - }, - { - "id": "LOW-003", - "name": "Multi-agent pending queue - gated writes for team workflows", - "description": "When multiple Claude Code agents work in a team, memory writes should go through a pending queue that the lead agent reviews before committing. Prevents conflicting or low-quality memories from parallel agents.", - "category": "low", - "priority": 13, - "completed": false, - "tested": false, - "dependencies": [], - "files": [ - { - "path": "~/.claude/memory/", - "issue": "No pending queue mechanism exists" - } - ], - "suggestedFix": "Add ~/.claude/memory/_pending/ directory (gitignored). When store() detects team context (environment variable or flag), write to _pending/ instead of graph/. Add 'pending' CLI command: 'pending list', 'pending approve ', 'pending reject ', 'pending approve-all'. Approved memories move to graph/ with normal git commit. Rejected ones are deleted.", - "estimatedHours": 5, - "notes": "Phase 3. Only needed if multi-agent workflows become common. For now, single-agent usage doesn't need this." - }, - { - "id": "LOW-004", - "name": "Memory visualization - graph export for external tools", - "description": "Export the memory graph as JSON or DOT format for visualization in tools like Obsidian, Gephi, or a custom web viewer. Show memories as nodes, relations as edges, with size/color based on decay score and type.", - "category": "low", - "priority": 14, - "completed": false, - "tested": false, - "dependencies": [], - "files": [ - { - "path": "~/.claude/skills/cognitive-memory/client.py", - "issue": "No export functionality exists" - } - ], - "suggestedFix": "Add 'export' CLI command with --format flag (json, dot, obsidian). JSON: full graph with nodes and edges. DOT: Graphviz format. Obsidian: Generate [[wikilinks]] in memory files for Obsidian graph view. Output to stdout or --output file.", - "estimatedHours": 3, - "notes": "Phase 3 nice-to-have. The Obsidian export is most interesting since Cal could browse memories in Obsidian's graph view." - }, - { - "id": "FEAT-001", - "name": "Cron-based auto-maintenance", - "description": "Set up a cron job or systemd timer that runs periodic maintenance: decay recalculation, CORE.md regeneration, and stale memory detection. Runs daily or weekly without requiring an active Claude session.", - "category": "feature", - "priority": 15, - "completed": false, - "tested": false, - "dependencies": [ - "HIGH-002" - ], - "files": [ - { - "path": "~/.claude/skills/cognitive-memory/client.py", - "issue": "No maintenance command exists" - } - ], - "suggestedFix": "Add 'maintain' CLI command that runs: decay(), core(), and reports stale/archived memory counts. Create systemd user timer or cron entry: '0 4 * * * python3 ~/.claude/skills/cognitive-memory/client.py maintain'. Add --quiet flag for cron usage.", - "estimatedHours": 2, - "notes": "Simple automation. Should also push to git after maintenance." - }, - { - "id": "FEAT-002", - "name": "Memory import from external sources", - "description": "Import memories from other formats: NoteDiscovery notes, markdown files, JSON dumps. Useful for bootstrapping memories from existing knowledge bases or migrating from other systems.", - "category": "feature", - "priority": 16, - "completed": false, - "tested": false, - "dependencies": [], - "files": [ - { - "path": "~/.claude/skills/cognitive-memory/client.py", - "issue": "No import functionality beyond migrate.py" - } - ], - "suggestedFix": "Add 'import' CLI command with --format flag (markdown, json, notediscovery). Markdown: parse frontmatter or use filename as title. JSON: expect {title, content, type, tags} objects. NoteDiscovery: use NoteDiscovery client to fetch and convert notes.", - "estimatedHours": 3, - "notes": "Lower priority but useful for knowledge consolidation." - }, - { - "id": "FEAT-003", - "name": "Memory health report", - "description": "Generate a health report showing: memories with no tags, memories with no relations, duplicate titles, stale memories that should be archived or revived, tag distribution imbalance, and orphaned relations.", - "category": "feature", - "priority": 17, - "completed": false, - "tested": false, - "dependencies": [], - "files": [ - { - "path": "~/.claude/skills/cognitive-memory/client.py", - "issue": "stats() only shows counts, no qualitative analysis" - } - ], - "suggestedFix": "Add 'health' CLI command. Checks: (1) untagged memories, (2) memories with 0 relations, (3) duplicate/near-duplicate titles (Levenshtein or token overlap), (4) memories below archive threshold, (5) tag distribution (warn if >50% of memories share same tag), (6) broken relation targets. Output as markdown report.", - "estimatedHours": 3, - "notes": "Good complement to reflection cycle. Run before reflect() to identify consolidation targets." - }, - { - "id": "DOCS-001", - "name": "Update NoteDiscovery with cognitive-memory reference docs", - "description": "Create reference documentation in NoteDiscovery at reference/skills/cognitive-memory covering the full skill documentation, CLI reference, schema, and migration notes.", - "category": "feature", - "priority": 18, - "completed": false, - "tested": false, - "dependencies": [], - "files": [], - "suggestedFix": "Use NoteDiscovery client to create/update a note at reference/skills/cognitive-memory with SKILL.md content adapted for the wiki format. Include links to the git repo.", - "estimatedHours": 1, - "notes": "Keeps NoteDiscovery in sync with skill documentation, matching the pattern used for MemoryGraph docs." - } - ], - "quickWins": [ - { - "taskId": "MED-004", - "estimatedMinutes": 45, - "impact": "Eliminates the need for two separate CLI calls (store + episode) on every memory save" - }, - { - "taskId": "HIGH-001", - "estimatedMinutes": 60, - "impact": "Makes CORE.md immediately useful at a glance without opening individual files" - }, - { - "taskId": "DOCS-001", - "estimatedMinutes": 30, - "impact": "Keeps wiki docs in sync with new skill" - } - ], - "productionBlockers": [], - "weeklyRoadmap": { - "week1": { - "theme": "Phase 1 Polish", - "tasks": [ - "HIGH-001", - "HIGH-002", - "HIGH-004", - "MED-004" - ], - "estimatedHours": 6 - }, - "week2": { - "theme": "Merge & Health", - "tasks": [ - "HIGH-003", - "FEAT-003", - "MED-006" - ], - "estimatedHours": 9 - }, - "week3": { - "theme": "Phase 2 Core - Reflection", - "tasks": [ - "MED-001", - "MED-005" - ], - "estimatedHours": 9 - }, - "week4": { - "theme": "Phase 2 - Procedural & Maintenance", - "tasks": [ - "MED-002", - "FEAT-001", - "DOCS-001" - ], - "estimatedHours": 7 - }, - "week5": { - "theme": "Phase 2 - Semantic Search", - "tasks": [ - "MED-003" - ], - "estimatedHours": 6 - }, - "week6_plus": { - "theme": "Phase 3 - Identity, Economy, Multi-Agent", - "tasks": [ - "LOW-001", - "LOW-002", - "LOW-003", - "LOW-004", - "FEAT-002" - ], - "estimatedHours": 18 - } - } -} \ No newline at end of file diff --git a/skills/cognitive-memory/dev/migrate.py b/skills/cognitive-memory/dev/migrate.py deleted file mode 100644 index dfa4faa..0000000 --- a/skills/cognitive-memory/dev/migrate.py +++ /dev/null @@ -1,531 +0,0 @@ -#!/usr/bin/env python3 -""" -Cognitive Memory Migration Script - -Migrates all memories from MemoryGraph SQLite database to markdown-based -cognitive memory system. Idempotent - skips files that already exist. - -Usage: - python migrate.py # Run migration - python migrate.py --dry-run # Preview without writing - python migrate.py --verify # Verify post-migration integrity -""" - -import json -import os -import re -import sqlite3 -import sys -from datetime import datetime, timezone -from pathlib import Path - -# Import from sibling module -sys.path.insert(0, str(Path(__file__).parent)) -from client import ( - CognitiveMemoryClient, - MEMORY_DIR, - TYPE_DIRS, - TYPE_WEIGHTS, - VALID_TYPES, - calculate_decay_score, - make_filename, - parse_frontmatter, - serialize_frontmatter, - slugify, -) - -# MemoryGraph database location -MEMORYGRAPH_DB = Path.home() / ".memorygraph" / "memory.db" - -# Memory type mapping: MemoryGraph types -> cognitive-memory types -# MemoryGraph has more types; map extras to closest cognitive-memory equivalent -TYPE_MAP = { - "solution": "solution", - "problem": "problem", - "error": "error", - "fix": "fix", - "code_pattern": "code_pattern", - "decision": "decision", - "configuration": "configuration", - "workflow": "workflow", - "general": "general", - # MemoryGraph-only types mapped to closest equivalents - "task": "general", - "project": "general", - "technology": "general", - "command": "general", - "file_context": "general", -} - - -def load_sqlite_memories(db_path: Path) -> list: - """Load all memories from MemoryGraph SQLite database.""" - conn = sqlite3.connect(str(db_path)) - conn.row_factory = sqlite3.Row - - rows = conn.execute( - "SELECT id, properties, created_at, updated_at FROM nodes WHERE label = 'Memory'" - ).fetchall() - - memories = [] - for row in rows: - props = json.loads(row["properties"]) - memories.append({ - "id": props.get("id", row["id"]), - "type": props.get("type", "general"), - "title": props.get("title", "Untitled"), - "content": props.get("content", ""), - "summary": props.get("summary"), - "tags": props.get("tags", []), - "importance": props.get("importance", 0.5), - "confidence": props.get("confidence", 0.8), - "usage_count": props.get("usage_count", 0), - "created_at": props.get("created_at", row["created_at"]), - "updated_at": props.get("updated_at", row["updated_at"]), - }) - - conn.close() - return memories - - -def load_sqlite_relationships(db_path: Path) -> list: - """Load all relationships from MemoryGraph SQLite database.""" - conn = sqlite3.connect(str(db_path)) - conn.row_factory = sqlite3.Row - - rows = conn.execute( - "SELECT id, from_id, to_id, rel_type, properties, created_at FROM relationships" - ).fetchall() - - relationships = [] - for row in rows: - props = json.loads(row["properties"]) - - # Parse context - may be a JSON string within JSON - context_raw = props.get("context", "") - context_text = "" - if context_raw: - try: - ctx = json.loads(context_raw) if isinstance(context_raw, str) else context_raw - if isinstance(ctx, dict): - context_text = ctx.get("text", "") - else: - context_text = str(ctx) - except (json.JSONDecodeError, TypeError): - context_text = str(context_raw) - - relationships.append({ - "id": row["id"], - "from_id": row["from_id"], - "to_id": row["to_id"], - "rel_type": row["rel_type"], - "strength": props.get("strength", 0.5), - "context": context_text, - }) - - conn.close() - return relationships - - -def migrate(dry_run: bool = False): - """Run the full migration from MemoryGraph to cognitive-memory.""" - if not MEMORYGRAPH_DB.exists(): - print(f"Error: MemoryGraph database not found at {MEMORYGRAPH_DB}") - sys.exit(1) - - print(f"Loading memories from {MEMORYGRAPH_DB}...") - memories = load_sqlite_memories(MEMORYGRAPH_DB) - relationships = load_sqlite_relationships(MEMORYGRAPH_DB) - - print(f"Found {len(memories)} memories and {len(relationships)} relationships") - - if dry_run: - print("\n--- DRY RUN ---") - by_type = {} - for mem in memories: - t = TYPE_MAP.get(mem["type"], "general") - by_type[t] = by_type.get(t, 0) + 1 - print("Type distribution after mapping:") - for t, count in sorted(by_type.items(), key=lambda x: -x[1]): - dir_name = TYPE_DIRS.get(t, "general") - print(f" graph/{dir_name}/: {count}") - print(f"\nRelationships to embed: {len(relationships)}") - return - - # Initialize client (creates directories) - client = CognitiveMemoryClient() - - # Build memory ID -> file path mapping - id_to_path = {} - created_count = 0 - skipped_count = 0 - - print("\nPhase 1: Creating markdown files...") - for i, mem in enumerate(memories, 1): - memory_id = mem["id"] - mem_type = TYPE_MAP.get(mem["type"], "general") - type_dir = TYPE_DIRS.get(mem_type, "general") - - # Create filename - filename = make_filename(mem["title"], memory_id) - rel_path = f"graph/{type_dir}/{filename}" - full_path = MEMORY_DIR / rel_path - - # Check if already exists (idempotent) - if full_path.exists(): - id_to_path[memory_id] = (full_path, rel_path) - skipped_count += 1 - continue - - # Build frontmatter - frontmatter = { - "id": memory_id, - "type": mem_type, - "title": mem["title"], - "tags": mem.get("tags", []), - "importance": mem.get("importance", 0.5), - "confidence": mem.get("confidence", 0.8), - "created": mem.get("created_at", ""), - "updated": mem.get("updated_at", ""), - } - - # Build content body - content = mem.get("content", "") - if mem.get("summary"): - content = f"{content}\n\n**Summary:** {mem['summary']}" - - # Write file - client._write_memory_file(full_path, frontmatter, content) - id_to_path[memory_id] = (full_path, rel_path) - created_count += 1 - - if i % 50 == 0: - print(f" {i}/{len(memories)} files created...") - - print(f" Created: {created_count}, Skipped (existing): {skipped_count}") - - # Phase 2: Embed relationships into frontmatter - print("\nPhase 2: Embedding relationships into frontmatter...") - rel_count = 0 - - # Group relationships by source memory - from_rels = {} # from_id -> list of (to_id, type, strength, context) - for rel in relationships: - from_rels.setdefault(rel["from_id"], []).append(rel) - - for from_id, rels in from_rels.items(): - if from_id not in id_to_path: - print(f" Warning: Source memory {from_id[:8]} not found, skipping {len(rels)} relationships") - continue - - full_path, rel_path = id_to_path[from_id] - - # Read current frontmatter - fm, body = client._read_memory_file(full_path) - existing_rels = fm.get("relations", []) - existing_targets = {(r.get("target"), r.get("type")) for r in existing_rels} - - added = 0 - for rel in rels: - to_id = rel["to_id"] - if to_id not in id_to_path: - continue - if (to_id, rel["rel_type"]) in existing_targets: - continue # Already exists - - # Normalize relation type to valid set - rel_type = rel["rel_type"] - if rel_type not in ("SOLVES", "CAUSES", "BUILDS_ON", "ALTERNATIVE_TO", - "REQUIRES", "FOLLOWS", "RELATED_TO"): - rel_type = "RELATED_TO" # Map unknown types to RELATED_TO - - new_rel = { - "target": to_id, - "type": rel_type, - "direction": "outgoing", - "strength": rel.get("strength", 0.5), - } - if rel.get("context"): - new_rel["context"] = rel["context"] - - existing_rels.append(new_rel) - added += 1 - - if added > 0: - fm["relations"] = existing_rels - client._write_memory_file(full_path, fm, body) - rel_count += added - - # Also add incoming relations to target memories - for rel in rels: - to_id = rel["to_id"] - if to_id not in id_to_path: - continue - - to_path, to_rel = id_to_path[to_id] - to_fm, to_body = client._read_memory_file(to_path) - to_rels = to_fm.get("relations", []) - - # Check for existing incoming - has_incoming = any( - r.get("target") == from_id and r.get("direction") == "incoming" - for r in to_rels - ) - if has_incoming: - continue - - rel_type = rel["rel_type"] - if rel_type not in ("SOLVES", "CAUSES", "BUILDS_ON", "ALTERNATIVE_TO", - "REQUIRES", "FOLLOWS", "RELATED_TO"): - rel_type = "RELATED_TO" - - incoming = { - "target": from_id, - "type": rel_type, - "direction": "incoming", - "strength": rel.get("strength", 0.5), - } - if rel.get("context"): - incoming["context"] = rel["context"] - - to_rels.append(incoming) - to_fm["relations"] = to_rels - client._write_memory_file(to_path, to_fm, to_body) - - print(f" Embedded {rel_count} outgoing relationships") - - # Phase 3: Build _index.json - print("\nPhase 3: Building index...") - indexed = client.reindex() - print(f" Indexed {indexed} memories") - - # Phase 4: Initialize _state.json with usage data - print("\nPhase 4: Initializing state with usage data...") - state = client._load_state() - now = datetime.now(timezone.utc) - - for mem in memories: - mid = mem["id"] - usage_count = mem.get("usage_count", 0) - created_str = mem.get("created_at", "") - - # Calculate initial decay - try: - created_dt = datetime.fromisoformat(created_str.replace("Z", "+00:00")) - if created_dt.tzinfo is None: - created_dt = created_dt.replace(tzinfo=timezone.utc) - days = (now - created_dt).total_seconds() / 86400 - except (ValueError, AttributeError): - days = 30 - - mem_type = TYPE_MAP.get(mem["type"], "general") - type_weight = TYPE_WEIGHTS.get(mem_type, 1.0) - importance = mem.get("importance", 0.5) - - decay_score = calculate_decay_score(importance, days, usage_count, type_weight) - - state.setdefault("entries", {})[mid] = { - "access_count": usage_count, - "last_accessed": mem.get("updated_at", mem.get("created_at", now.isoformat())), - "decay_score": round(decay_score, 4), - } - - client._save_state(state) - print(f" Initialized state for {len(state.get('entries', {}))} memories") - - # Phase 5: Git commit all migrated files - print("\nPhase 5: Git commit...") - try: - import subprocess - subprocess.run( - ["git", "add", "-A"], - cwd=str(MEMORY_DIR), - capture_output=True, timeout=30 - ) - subprocess.run( - ["git", "commit", "-m", - f"migrate: {len(memories)} memories from MemoryGraph\n\n" - f"- {created_count} new markdown files created\n" - f"- {rel_count} relationships embedded\n" - f"- {indexed} entries indexed\n" - f"- State initialized with usage data"], - cwd=str(MEMORY_DIR), - capture_output=True, timeout=30 - ) - print(" Committed to git") - except Exception as e: - print(f" Warning: Git commit failed: {e}") - - # Phase 6: Archive MemoryGraph database - print("\nPhase 6: Archiving MemoryGraph database...") - archive_path = MEMORYGRAPH_DB.with_suffix(".db.archive") - if not archive_path.exists(): - import shutil - shutil.copy2(str(MEMORYGRAPH_DB), str(archive_path)) - print(f" Archived to {archive_path}") - else: - print(f" Archive already exists at {archive_path}") - - # Generate CORE.md - print("\nPhase 7: Generating CORE.md...") - client.core() - print(" CORE.md generated") - - # Summary - print("\n" + "=" * 60) - print("Migration Complete!") - print("=" * 60) - print(f" Memories migrated: {len(memories)}") - print(f" Files created: {created_count}") - print(f" Files skipped: {skipped_count}") - print(f" Relations embedded: {rel_count}") - print(f" Index entries: {indexed}") - print(f" Memory dir: {MEMORY_DIR}") - print(f" Archive: {archive_path}") - - -def verify(): - """Verify migration integrity.""" - print("Verifying migration integrity...\n") - - if not MEMORYGRAPH_DB.exists(): - # Try archive - archive = MEMORYGRAPH_DB.with_suffix(".db.archive") - if archive.exists(): - db_path = archive - else: - print("Error: No MemoryGraph database found for verification") - sys.exit(1) - else: - db_path = MEMORYGRAPH_DB - - # Load SQLite data - memories = load_sqlite_memories(db_path) - relationships = load_sqlite_relationships(db_path) - - client = CognitiveMemoryClient() - index = client._load_index() - state = client._load_state() - - errors = [] - warnings = [] - - # Check 1: Count match - sqlite_count = len(memories) - md_count = len(index.get("entries", {})) - if sqlite_count != md_count: - errors.append(f"Count mismatch: SQLite={sqlite_count}, Index={md_count}") - else: - print(f"[OK] Memory count matches: {sqlite_count}") - - # Check 2: All memories have files - missing_files = 0 - for mid, entry in index.get("entries", {}).items(): - path = MEMORY_DIR / entry.get("path", "") - if not path.exists(): - missing_files += 1 - if missing_files <= 5: - errors.append(f"Missing file: {entry.get('path')} ({entry.get('title', '')[:40]})") - if missing_files == 0: - print(f"[OK] All {md_count} files exist on disk") - else: - errors.append(f"Total missing files: {missing_files}") - - # Check 3: State entries - state_count = len(state.get("entries", {})) - if state_count != sqlite_count: - warnings.append(f"State entry count mismatch: expected={sqlite_count}, got={state_count}") - else: - print(f"[OK] State entries match: {state_count}") - - # Check 4: Spot check 5 random memories - import random - sample = random.sample(memories, min(5, len(memories))) - spot_ok = 0 - for mem in sample: - path = client._resolve_memory_path(mem["id"]) - if path: - fm, body = client._read_memory_file(path) - if fm.get("title") == mem["title"]: - spot_ok += 1 - else: - warnings.append( - f"Title mismatch for {mem['id'][:8]}: " - f"SQLite='{mem['title'][:40]}', MD='{fm.get('title', '')[:40]}'" - ) - else: - errors.append(f"Memory {mem['id'][:8]} not found in markdown: {mem['title'][:40]}") - print(f"[OK] Spot check: {spot_ok}/5 memories match") - - # Check 5: Relationships - rel_in_index = sum( - len(entry.get("relations", [])) - for entry in index.get("entries", {}).values() - ) - # Each relationship creates 2 entries (outgoing + incoming) - expected_rel_entries = len(relationships) * 2 - if rel_in_index < len(relationships): - warnings.append( - f"Relation count may be low: SQLite={len(relationships)}, " - f"Index entries={rel_in_index} (expected ~{expected_rel_entries})" - ) - else: - print(f"[OK] Relationships: {len(relationships)} original, {rel_in_index} index entries") - - # Check 6: Git status - try: - import subprocess - result = subprocess.run( - ["git", "status", "--porcelain"], - cwd=str(MEMORY_DIR), - capture_output=True, text=True, timeout=5 - ) - if result.returncode == 0: - untracked = [l for l in result.stdout.strip().split("\n") if l.strip() and not l.startswith("??")] - if untracked: - warnings.append(f"Uncommitted changes in memory repo: {len(untracked)} files") - else: - print("[OK] Git repo clean") - else: - warnings.append("Not a git repo or git error") - except Exception: - warnings.append("Could not check git status") - - # Check 7: CORE.md exists - core_path = MEMORY_DIR / "CORE.md" - if core_path.exists(): - content = core_path.read_text() - print(f"[OK] CORE.md exists ({len(content)} chars)") - else: - warnings.append("CORE.md not found") - - # Report - print() - if errors: - print(f"ERRORS ({len(errors)}):") - for e in errors: - print(f" [!] {e}") - if warnings: - print(f"WARNINGS ({len(warnings)}):") - for w in warnings: - print(f" [?] {w}") - if not errors and not warnings: - print("All checks passed!") - elif not errors: - print(f"\nMigration OK with {len(warnings)} warning(s)") - else: - print(f"\nMigration has {len(errors)} error(s) that need attention") - - -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser(description="Migrate MemoryGraph to Cognitive Memory") - parser.add_argument("--dry-run", action="store_true", help="Preview without writing") - parser.add_argument("--verify", action="store_true", help="Verify migration integrity") - args = parser.parse_args() - - if args.verify: - verify() - else: - migrate(dry_run=args.dry_run) diff --git a/skills/cognitive-memory/edges.py b/skills/cognitive-memory/edges.py deleted file mode 100644 index 033acdb..0000000 --- a/skills/cognitive-memory/edges.py +++ /dev/null @@ -1,297 +0,0 @@ -"""EdgesMixin: edge/relationship operations for CognitiveMemoryClient. - -This module is part of the client.py mixin refactor. EdgesMixin provides all -edge CRUD and search operations. It relies on helper methods resolved at runtime -via MRO from the composed CognitiveMemoryClient class. -""" - -import uuid -import subprocess -from datetime import datetime, timezone -from pathlib import Path -from typing import Optional, List, Dict, Any - -from common import ( - VALID_RELATION_TYPES, - EDGES_DIR_NAME, - _make_edge_filename, - serialize_edge_frontmatter, -) - - -class EdgesMixin: - """Mixin providing edge/relationship operations for CognitiveMemoryClient.""" - - def relate( - self, - from_id: str, - to_id: str, - rel_type: str, - strength: float = 0.8, - context: Optional[str] = None, - description: Optional[str] = None, - ) -> str: - """Create a relationship between two memories with an edge file. - - Returns edge_id string, or empty string if duplicate. - """ - if rel_type not in VALID_RELATION_TYPES: - raise ValueError( - f"Invalid relation type: {rel_type}. Valid: {VALID_RELATION_TYPES}" - ) - - from_path = self._resolve_memory_path(from_id) - to_path = self._resolve_memory_path(to_id) - if not from_path or not to_path: - raise ValueError(f"Memory not found: {from_id if not from_path else to_id}") - - # Read source memory - fm, body = self._read_memory_file(from_path) - relations = fm.get("relations", []) - - # Check for duplicate - for r in relations: - if r.get("target") == to_id and r.get("type") == rel_type: - return "" # Already exists - - # Read target memory for title - to_fm, to_body = self._read_memory_file(to_path) - - # Create edge file - edge_id = str(uuid.uuid4()) - now = datetime.now(timezone.utc).isoformat() - from_title = fm.get("title", from_id[:8]) - to_title = to_fm.get("title", to_id[:8]) - clamped_strength = max(0.0, min(1.0, strength)) - - edge_data = { - "id": edge_id, - "type": rel_type, - "from_id": from_id, - "from_title": from_title, - "to_id": to_id, - "to_title": to_title, - "strength": clamped_strength, - "created": now, - "updated": now, - } - edge_filename = _make_edge_filename(from_title, rel_type, to_title, edge_id) - edge_path = self.memory_dir / "graph" / EDGES_DIR_NAME / edge_filename - edge_fm_str = serialize_edge_frontmatter(edge_data) - edge_body = description.strip() if description else "" - edge_content = ( - f"{edge_fm_str}\n\n{edge_body}\n" if edge_body else f"{edge_fm_str}\n" - ) - edge_path.write_text(edge_content, encoding="utf-8") - - # Update source memory frontmatter with edge_id - new_rel = { - "target": to_id, - "type": rel_type, - "direction": "outgoing", - "strength": clamped_strength, - "edge_id": edge_id, - } - if context: - new_rel["context"] = context - relations.append(new_rel) - fm["relations"] = relations - fm["updated"] = now - self._write_memory_file(from_path, fm, body) - - # Add incoming relation to target with edge_id - to_relations = to_fm.get("relations", []) - has_incoming = any( - r.get("target") == from_id - and r.get("type") == rel_type - and r.get("direction") == "incoming" - for r in to_relations - ) - if not has_incoming: - incoming_rel = { - "target": from_id, - "type": rel_type, - "direction": "incoming", - "strength": clamped_strength, - "edge_id": edge_id, - } - if context: - incoming_rel["context"] = context - to_relations.append(incoming_rel) - to_fm["relations"] = to_relations - to_fm["updated"] = now - self._write_memory_file(to_path, to_fm, to_body) - - # Update memory index - rel_from = str(from_path.relative_to(self.memory_dir)) - rel_to = str(to_path.relative_to(self.memory_dir)) - self._update_index_entry(from_id, fm, rel_from) - self._update_index_entry(to_id, to_fm, rel_to) - - # Update edge index - self._update_edge_index( - edge_id, edge_data, f"graph/{EDGES_DIR_NAME}/{edge_filename}" - ) - - self._git_commit( - f"relate: {from_id[:8]} --{rel_type}--> {to_id[:8]}", - [from_path, to_path, edge_path], - ) - return edge_id - - def edge_get(self, edge_id: str) -> Optional[Dict[str, Any]]: - """Read full edge file (frontmatter + body).""" - path = self._resolve_edge_path(edge_id) - if not path: - return None - fm, body = self._read_memory_file(path) - return { - "id": fm.get("id", edge_id), - "type": fm.get("type", ""), - "from_id": fm.get("from_id", ""), - "from_title": fm.get("from_title", ""), - "to_id": fm.get("to_id", ""), - "to_title": fm.get("to_title", ""), - "strength": fm.get("strength", 0.8), - "created": fm.get("created", ""), - "updated": fm.get("updated", ""), - "description": body.strip(), - "path": str(path.relative_to(self.memory_dir)), - } - - def edge_search( - self, - query: Optional[str] = None, - types: Optional[List[str]] = None, - from_id: Optional[str] = None, - to_id: Optional[str] = None, - limit: int = 20, - ) -> List[Dict[str, Any]]: - """Search edges via index.""" - index = self._load_index() - results = [] - query_lower = query.lower().strip() if query else "" - - # Resolve partial IDs to full UUIDs via prefix match - if from_id: - entries = index.get("entries", {}) - resolved = self._resolve_prefix(from_id, entries) - from_id = resolved or from_id - if to_id: - entries = index.get("entries", {}) - resolved = self._resolve_prefix(to_id, entries) - to_id = resolved or to_id - - for eid, entry in index.get("edges", {}).items(): - if types and entry.get("type") not in types: - continue - if from_id and entry.get("from_id") != from_id: - continue - if to_id and entry.get("to_id") != to_id: - continue - if query_lower: - searchable = f"{entry.get('from_title', '')} {entry.get('to_title', '')} {entry.get('type', '')}".lower() - if query_lower not in searchable: - continue - results.append({"id": eid, **entry}) - - results.sort(key=lambda x: x.get("created", ""), reverse=True) - return results[:limit] - - def edge_update( - self, - edge_id: str, - description: Optional[str] = None, - strength: Optional[float] = None, - ) -> bool: - """Update edge body/metadata, sync strength to memory frontmatter.""" - path = self._resolve_edge_path(edge_id) - if not path: - return False - - fm, body = self._read_memory_file(path) - now = datetime.now(timezone.utc).isoformat() - - if description is not None: - body = description - if strength is not None: - fm["strength"] = max(0.0, min(1.0, strength)) - fm["updated"] = now - - # Write edge file - edge_fm_str = serialize_edge_frontmatter(fm) - edge_body = body.strip() if body else "" - edge_content = ( - f"{edge_fm_str}\n\n{edge_body}\n" if edge_body else f"{edge_fm_str}\n" - ) - path.write_text(edge_content, encoding="utf-8") - - # Sync strength to memory frontmatter if changed - if strength is not None: - for mid_key in ("from_id", "to_id"): - mid = fm.get(mid_key) - if not mid: - continue - mem_path = self._resolve_memory_path(mid) - if not mem_path: - continue - mem_fm, mem_body = self._read_memory_file(mem_path) - for rel in mem_fm.get("relations", []): - if rel.get("edge_id") == edge_id: - rel["strength"] = fm["strength"] - mem_fm["updated"] = now - self._write_memory_file(mem_path, mem_fm, mem_body) - - # Update edge index - rel_path = str(path.relative_to(self.memory_dir)) - self._update_edge_index(edge_id, fm, rel_path) - self._git_commit(f"edge-update: {edge_id[:8]}", [path]) - return True - - def edge_delete(self, edge_id: str) -> bool: - """Remove edge file and clean frontmatter refs from both memories.""" - path = self._resolve_edge_path(edge_id) - if not path: - return False - - fm, _ = self._read_memory_file(path) - now = datetime.now(timezone.utc).isoformat() - files_to_commit: List[Path] = [] - - # Clean edge_id references from both memories - for mid_key in ("from_id", "to_id"): - mid = fm.get(mid_key) - if not mid: - continue - mem_path = self._resolve_memory_path(mid) - if not mem_path: - continue - mem_fm, mem_body = self._read_memory_file(mem_path) - original_rels = mem_fm.get("relations", []) - mem_fm["relations"] = [ - r for r in original_rels if r.get("edge_id") != edge_id - ] - if len(mem_fm["relations"]) != len(original_rels): - mem_fm["updated"] = now - self._write_memory_file(mem_path, mem_fm, mem_body) - rel_p = str(mem_path.relative_to(self.memory_dir)) - self._update_index_entry(mid, mem_fm, rel_p) - files_to_commit.append(mem_path) - - # Remove edge file - path.unlink() - self._remove_edge_index(edge_id) - - # Git stage deletion - try: - rel_path = path.relative_to(self.memory_dir) - subprocess.run( - ["git", "rm", "--cached", str(rel_path)], - cwd=str(self.memory_dir), - capture_output=True, - timeout=5, - ) - except Exception: - pass - self._git_commit(f"edge-delete: {edge_id[:8]}") - return True diff --git a/skills/cognitive-memory/embeddings.py b/skills/cognitive-memory/embeddings.py deleted file mode 100644 index 460c4d3..0000000 --- a/skills/cognitive-memory/embeddings.py +++ /dev/null @@ -1,233 +0,0 @@ -"""EmbeddingsMixin for CognitiveMemoryClient. - -Provides embedding generation and semantic search capabilities. Extracted from -client.py as part of the mixin refactor. Methods rely on shared state (memory_dir, -_load_index, _load_embeddings_cached) provided by the base class via MRO. -""" - -import json -import sys -from datetime import datetime, timezone -from typing import Any, Dict, List, Optional, Tuple - -from common import ( - EMBEDDING_MODEL, - EMBEDDINGS_PATH, - OPENAI_MODEL_DEFAULT, - _cosine_similarity, - _load_memory_config, - _ollama_embed, - _openai_embed, -) - - -class EmbeddingsMixin: - """Mixin providing embedding generation and semantic recall for memory clients.""" - - def _get_embedding_provider(self) -> Dict[str, Any]: - """Load embedding config from _config.json.""" - return _load_memory_config(self.memory_dir / "_config.json") - - def _embed_texts_with_fallback( - self, - texts: List[str], - timeout: int = 300, - ) -> Tuple[Optional[List[List[float]]], str, str]: - """Embed texts with fallback chain. Returns (vectors, provider_used, model_used).""" - config = self._get_embedding_provider() - provider = config.get("embedding_provider", "ollama") - - # Try configured provider first - if provider == "openai": - api_key = config.get("openai_api_key") - model = config.get("openai_model", OPENAI_MODEL_DEFAULT) - if api_key: - vectors = _openai_embed(texts, api_key, model, timeout=timeout) - if vectors is not None: - return vectors, "openai", model - # Fallback to ollama - ollama_model = config.get("ollama_model", EMBEDDING_MODEL) - vectors = _ollama_embed(texts, model=ollama_model, timeout=timeout) - if vectors is not None: - return vectors, "ollama", ollama_model - else: - # ollama first - ollama_model = config.get("ollama_model", EMBEDDING_MODEL) - vectors = _ollama_embed(texts, model=ollama_model, timeout=timeout) - if vectors is not None: - return vectors, "ollama", ollama_model - # Fallback to openai - api_key = config.get("openai_api_key") - model = config.get("openai_model", OPENAI_MODEL_DEFAULT) - if api_key: - vectors = _openai_embed(texts, api_key, model, timeout=timeout) - if vectors is not None: - return vectors, "openai", model - - return None, "", "" - - def embed(self, if_changed: bool = False) -> Dict[str, Any]: - """Generate embeddings for all memories using configured provider. - - Detects provider changes and re-embeds everything (dimension mismatch safety). - Stores vectors in _embeddings.json (not git-tracked). - - Args: - if_changed: If True, skip embedding if the set of memory IDs hasn't - changed since last run (no new/deleted memories). - """ - index = self._load_index() - entries = index.get("entries", {}) - if not entries: - return { - "embedded": 0, - "provider": "none", - "model": "", - "path": str(EMBEDDINGS_PATH), - } - - # Check for provider change - embeddings_path = self.memory_dir / "_embeddings.json" - old_provider = "" - if embeddings_path.exists(): - try: - old_data = json.loads(embeddings_path.read_text()) - old_provider = old_data.get("provider", "ollama") - except (json.JSONDecodeError, OSError): - pass - - config = self._get_embedding_provider() - new_provider = config.get("embedding_provider", "ollama") - provider_changed = old_provider and old_provider != new_provider - if provider_changed: - print( - f"Provider changed ({old_provider} -> {new_provider}), re-embedding all memories...", - file=sys.stderr, - ) - - # Skip if nothing changed (unless provider switched) - if if_changed and not provider_changed and embeddings_path.exists(): - try: - old_data = json.loads(embeddings_path.read_text()) - embedded_ids = set(old_data.get("entries", {}).keys()) - index_ids = set(entries.keys()) - if embedded_ids == index_ids: - return { - "embedded": 0, - "skipped": True, - "reason": "no new or deleted memories", - "path": str(embeddings_path), - } - except (json.JSONDecodeError, OSError): - pass # Can't read old data, re-embed - - # Build texts to embed - memory_ids = list(entries.keys()) - texts = [] - for mid in memory_ids: - entry = entries[mid] - title = entry.get("title", "") - preview = entry.get("content_preview", "") - texts.append(f"{title}. {preview}") - - # Batch embed in groups of 50 - all_embeddings: Dict[str, List[float]] = {} - batch_size = 50 - provider_used = "" - model_used = "" - for i in range(0, len(texts), batch_size): - batch_texts = texts[i : i + batch_size] - batch_ids = memory_ids[i : i + batch_size] - vectors, provider_used, model_used = self._embed_texts_with_fallback( - batch_texts, - timeout=300, - ) - if vectors is None: - return { - "error": "All embedding providers unavailable", - "embedded": len(all_embeddings), - } - for mid, vec in zip(batch_ids, vectors): - all_embeddings[mid] = vec - - # Write embeddings file with provider info - embeddings_data = { - "provider": provider_used, - "model": model_used, - "updated": datetime.now(timezone.utc).isoformat(), - "entries": all_embeddings, - } - embeddings_path.write_text(json.dumps(embeddings_data, default=str)) - - return { - "embedded": len(all_embeddings), - "provider": provider_used, - "model": model_used, - "path": str(embeddings_path), - } - - def semantic_recall(self, query: str, limit: int = 10) -> List[Dict[str, Any]]: - """Search memories by semantic similarity using embeddings. - - Uses the same provider that generated stored embeddings to embed the query. - Skips vectors with dimension mismatch as safety guard. - """ - emb_data = self._load_embeddings_cached() - if emb_data is None: - return [] - - stored = emb_data.get("entries", {}) - if not stored: - return [] - - # Embed query with matching provider - stored_provider = emb_data.get("provider", "ollama") - config = self._get_embedding_provider() - query_vec = None - - if stored_provider == "openai": - api_key = config.get("openai_api_key") - model = emb_data.get("model", OPENAI_MODEL_DEFAULT) - if api_key: - vecs = _openai_embed([query], api_key, model) - if vecs: - query_vec = vecs[0] - if query_vec is None and stored_provider == "ollama": - stored_model = emb_data.get("model", EMBEDDING_MODEL) - vecs = _ollama_embed([query], model=stored_model) - if vecs: - query_vec = vecs[0] - # Last resort: try any available provider - if query_vec is None: - vecs, _, _ = self._embed_texts_with_fallback([query], timeout=30) - if vecs: - query_vec = vecs[0] - - if query_vec is None: - return [] - - query_dim = len(query_vec) - - # Score all memories by cosine similarity - index = self._load_index() - scored = [] - for mid, vec in stored.items(): - # Skip dimension mismatch - if len(vec) != query_dim: - continue - sim = _cosine_similarity(query_vec, vec) - entry = index.get("entries", {}).get(mid) - if entry: - scored.append( - { - "id": mid, - "title": entry.get("title", ""), - "type": entry.get("type", "general"), - "tags": entry.get("tags", []), - "similarity": round(sim, 4), - "path": entry.get("path", ""), - } - ) - - scored.sort(key=lambda x: x["similarity"], reverse=True) - return scored[:limit] diff --git a/skills/cognitive-memory/feature.json b/skills/cognitive-memory/feature.json deleted file mode 100644 index cfa004f..0000000 --- a/skills/cognitive-memory/feature.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "name": "cognitive-memory", - "version": "3.1.0", - "description": "Markdown-based memory system with decay scoring, episodic logging, semantic search, reflection cycles, auto-curated CORE.md, native MCP server integration, rich edge files, and hybrid Ollama/OpenAI embeddings", - "created": "2026-02-13", - "migrated_from": "memorygraph", - "status": "active", - "files": { - "client.py": "CLI and Python API for all memory operations", - "mcp_server.py": "MCP server for Claude Code tool integration", - "SKILL.md": "Skill documentation and activation triggers", - "SCHEMA.md": "Format documentation for all file types", - "scripts/session_memory.py": "SessionEnd hook — auto-stores session learnings", - "scripts/ensure-symlinks.sh": "Refreshes MEMORY.md symlinks to CORE.md", - "systemd/": "Reference copies of systemd user timers (see systemd/README.md)", - "dev/migrate.py": "One-time migration from MemoryGraph SQLite", - "dev/PROJECT_PLAN.json": "Development roadmap and task tracking" - }, - "data_location": "$XDG_DATA_HOME/cognitive-memory/ (default: ~/.local/share/cognitive-memory/)", - "dependencies": "stdlib-only (no external packages; Ollama optional for semantic search)", - "features": [ - "store: Create markdown memory files with YAML frontmatter (--episode flag)", - "recall: Search memories ranked by relevance and decay score (--semantic flag)", - "get: Retrieve memory by ID with access tracking", - "relate: Create bidirectional relationships between memories", - "search: Filter by type, tags, importance with optional text query", - "update: Modify memory frontmatter and content", - "delete: Remove memory with relation cleanup", - "related: BFS traversal of memory relationships", - "stats: Memory system statistics and decay summary", - "recent: Recently created memories", - "decay: Recalculate all decay scores", - "core: Generate CORE.md auto-curated summary", - "episode: Append to daily session log", - "reindex: Rebuild index from markdown files", - "pin: Move memory to vault (never decays)", - "reflect: Cluster recent memories by tag overlap using union-find", - "reflection: Generate REFLECTION.md summary with themes and patterns", - "procedure: Store procedural memories with steps/preconditions/postconditions", - "embed: Generate Ollama embeddings for semantic search", - "tags list: Show all tags with usage counts", - "tags related: Find co-occurring tags", - "tags suggest: Recommend tags based on co-occurrence patterns", - "edge-get: Get full edge details by ID", - "edge-search: Search edges by query, type, from/to IDs", - "edge-update: Update edge description or strength", - "edge-delete: Remove edge and clean memory references", - "config: Manage embedding provider (ollama/openai) with fallback", - "MCP server: Native Claude Code tool integration via JSON-RPC stdio", - "Hybrid embeddings: Ollama (local) + OpenAI (optional) with automatic fallback", - "Rich edges: First-class edge files in graph/edges/ with descriptions" - ] -} diff --git a/skills/cognitive-memory/mcp_server.py b/skills/cognitive-memory/mcp_server.py deleted file mode 100644 index d7679a8..0000000 --- a/skills/cognitive-memory/mcp_server.py +++ /dev/null @@ -1,796 +0,0 @@ -#!/usr/bin/env python3 -""" -Cognitive Memory MCP Server - -JSON-RPC 2.0 stdio MCP server that wraps CognitiveMemoryClient. -Exposes 18 memory operations as MCP tools for Claude Code. -""" - -import json -import subprocess -import sys -from pathlib import Path -from typing import Any, Dict, Optional - -# Allow imports from this directory (client.py lives here) -sys.path.insert(0, str(Path(__file__).parent)) - -from client import CognitiveMemoryClient, _load_memory_config, MEMORY_DIR -from common import resolve_graph_path, list_graphs - -SYNC_SCRIPT = Path(__file__).parent / "scripts" / "memory-git-sync.sh" - -_clients: Dict[str, CognitiveMemoryClient] = {} - - -def get_client(graph: Optional[str] = None) -> CognitiveMemoryClient: - """Get or create a CognitiveMemoryClient for the given graph.""" - key = graph or "default" - if key not in _clients: - path = resolve_graph_path(graph) - _clients[key] = CognitiveMemoryClient(memory_dir=path) - return _clients[key] - - -def _trigger_git_sync(): - """Fire-and-forget git sync after a write operation.""" - if SYNC_SCRIPT.exists(): - try: - subprocess.Popen( - [str(SYNC_SCRIPT)], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - start_new_session=True, - ) - except Exception: - pass # Non-critical — daily timer is the fallback - - -def create_tools() -> list: - """Define all 18 MCP tool definitions with inputSchema.""" - return [ - { - "name": "memory_store", - "description": ( - "Store a new memory in the cognitive memory system. " - "Creates a markdown file with YAML frontmatter and returns the new memory UUID. " - "Valid types: solution, fix, decision, configuration, problem, workflow, " - "code_pattern, error, general, procedure, insight." - ), - "inputSchema": { - "type": "object", - "properties": { - "type": { - "type": "string", - "description": "Memory type (solution, fix, decision, configuration, problem, workflow, code_pattern, error, general, procedure, insight)", - }, - "title": { - "type": "string", - "description": "Short descriptive title for the memory", - }, - "content": { - "type": "string", - "description": "Full content/body of the memory in markdown", - }, - "tags": { - "type": "array", - "items": {"type": "string"}, - "description": "List of lowercase tags for categorisation (e.g. ['python', 'fix', 'discord'])", - }, - "importance": { - "type": "number", - "description": "Importance score from 0.0 to 1.0 (default 0.5)", - }, - "episode": { - "type": "boolean", - "description": "Also log an episode entry for this memory (default true)", - }, - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - "required": ["type", "title", "content"], - }, - }, - { - "name": "memory_recall", - "description": ( - "Search memories by a natural language query, ranked by relevance and decay score. " - "Semantic search is enabled by default when embeddings exist. Set semantic=false for keyword-only." - ), - "inputSchema": { - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "Natural language search query", - }, - "semantic": { - "type": "boolean", - "description": "Merge with semantic/vector similarity search (requires embeddings, default true)", - }, - "limit": { - "type": "integer", - "description": "Maximum number of results to return (default 10)", - }, - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - "required": ["query"], - }, - }, - { - "name": "memory_get", - "description": ( - "Retrieve a single memory by its UUID, including full content, frontmatter metadata, " - "relations, and current decay score." - ), - "inputSchema": { - "type": "object", - "properties": { - "memory_id": { - "type": "string", - "description": "UUID of the memory to retrieve", - }, - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - "required": ["memory_id"], - }, - }, - { - "name": "memory_search", - "description": ( - "Filter memories by type, tags, and/or minimum importance score. " - "Optionally include a text query. Returns results sorted by importance descending. " - "Use this for structured browsing; use memory_recall for ranked relevance search." - ), - "inputSchema": { - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "Optional text query to filter results", - }, - "memory_types": { - "type": "array", - "items": {"type": "string"}, - "description": "Filter by memory types (e.g. ['solution', 'fix'])", - }, - "tags": { - "type": "array", - "items": {"type": "string"}, - "description": "Filter by tags — memory must have at least one of these", - }, - "min_importance": { - "type": "number", - "description": "Minimum importance score (0.0 to 1.0)", - }, - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - }, - }, - { - "name": "memory_relate", - "description": ( - "Create a typed relationship (edge) between two memories. " - "Valid relation types: SOLVES, CAUSES, BUILDS_ON, ALTERNATIVE_TO, REQUIRES, FOLLOWS, RELATED_TO. " - "Returns the new edge UUID, or empty string if the relationship already exists." - ), - "inputSchema": { - "type": "object", - "properties": { - "from_id": { - "type": "string", - "description": "UUID of the source memory", - }, - "to_id": { - "type": "string", - "description": "UUID of the target memory", - }, - "rel_type": { - "type": "string", - "description": "Relationship type (SOLVES, CAUSES, BUILDS_ON, ALTERNATIVE_TO, REQUIRES, FOLLOWS, RELATED_TO)", - }, - "description": { - "type": "string", - "description": "Optional human-readable description of the relationship", - }, - "strength": { - "type": "number", - "description": "Relationship strength from 0.0 to 1.0 (default 0.8)", - }, - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - "required": ["from_id", "to_id", "rel_type"], - }, - }, - { - "name": "memory_related", - "description": ( - "Traverse the relationship graph from a given memory, returning connected memories " - "up to max_depth hops away. Optionally filter by relationship type." - ), - "inputSchema": { - "type": "object", - "properties": { - "memory_id": { - "type": "string", - "description": "UUID of the starting memory", - }, - "rel_types": { - "type": "array", - "items": {"type": "string"}, - "description": "Filter by relation types (e.g. ['SOLVES', 'BUILDS_ON'])", - }, - "max_depth": { - "type": "integer", - "description": "Maximum traversal depth (1-5, default 1)", - }, - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - "required": ["memory_id"], - }, - }, - { - "name": "memory_edge_get", - "description": ( - "Retrieve a single relationship edge by its UUID, including metadata and description body." - ), - "inputSchema": { - "type": "object", - "properties": { - "edge_id": { - "type": "string", - "description": "UUID of the edge to retrieve", - }, - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - "required": ["edge_id"], - }, - }, - { - "name": "memory_edge_search", - "description": ( - "Search relationship edges by type, connected memory IDs, or a text query " - "that matches against the from/to memory titles and relationship type." - ), - "inputSchema": { - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "Text query to match against edge titles and type", - }, - "types": { - "type": "array", - "items": {"type": "string"}, - "description": "Filter by relationship types (e.g. ['SOLVES'])", - }, - "from_id": { - "type": "string", - "description": "Filter to edges originating from this memory UUID", - }, - "to_id": { - "type": "string", - "description": "Filter to edges pointing at this memory UUID", - }, - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - }, - }, - { - "name": "memory_reflect", - "description": ( - "Review memories created since a given date, cluster them by shared tags, " - "and return consolidation recommendations. Does NOT auto-create new memories — " - "you review the output and decide what to store. Set dry_run=true to skip " - "updating state and logging an episode entry." - ), - "inputSchema": { - "type": "object", - "properties": { - "since": { - "type": "string", - "description": "ISO date (YYYY-MM-DD) to review memories from. Defaults to last reflection date or 30 days ago.", - }, - "dry_run": { - "type": "boolean", - "description": "If true, return analysis without persisting state changes (default false)", - }, - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - }, - }, - { - "name": "memory_reflection", - "description": ( - "Return the current REFLECTION.md summary — the auto-curated narrative of recent " - "memory themes, clusters, and activity. Use this to quickly orient at session start." - ), - "inputSchema": { - "type": "object", - "properties": { - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - }, - }, - { - "name": "memory_stats", - "description": ( - "Return statistics about the memory system: total count, breakdown by type, " - "relation count, decay distribution, embeddings count, and per-directory file counts." - ), - "inputSchema": { - "type": "object", - "properties": { - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - }, - }, - { - "name": "memory_episode", - "description": ( - "Append a timestamped entry to today's episode log file (episodes/YYYY-MM-DD.md in the active graph). " - "Use this to record significant session events, commits, or decisions without creating full memories." - ), - "inputSchema": { - "type": "object", - "properties": { - "type": { - "type": "string", - "description": "Episode entry type (e.g. fix, commit, decision, automation)", - }, - "title": { - "type": "string", - "description": "Short title for the episode entry", - }, - "tags": { - "type": "array", - "items": {"type": "string"}, - "description": "Tags for the episode entry", - }, - "summary": { - "type": "string", - "description": "Optional summary text for the entry", - }, - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - "required": ["type", "title"], - }, - }, - { - "name": "memory_tags_list", - "description": ( - "List all tags used across the memory system, sorted by usage frequency. " - "Returns tag name and count of memories using it." - ), - "inputSchema": { - "type": "object", - "properties": { - "limit": { - "type": "integer", - "description": "Maximum number of tags to return (0 = unlimited, default 0)", - }, - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - }, - }, - { - "name": "memory_tags_related", - "description": ( - "Find tags that frequently co-occur with a given tag, sorted by co-occurrence count. " - "Useful for discovering related topics and navigating the tag graph." - ), - "inputSchema": { - "type": "object", - "properties": { - "tag": { - "type": "string", - "description": "The tag to find co-occurring tags for", - }, - "limit": { - "type": "integer", - "description": "Maximum number of related tags to return (0 = unlimited, default 0)", - }, - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - "required": ["tag"], - }, - }, - { - "name": "memory_embed", - "description": ( - "Generate or refresh vector embeddings for all memories that do not yet have them. " - "Requires either Ollama (nomic-embed-text model) or an OpenAI API key configured. " - "Embeddings enable semantic recall via memory_recall with semantic=true." - ), - "inputSchema": { - "type": "object", - "properties": { - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - }, - }, - { - "name": "memory_core", - "description": ( - "Return the current CORE.md content — the auto-curated high-priority memory digest " - "used to seed Claude sessions. Lists critical solutions, active decisions, and key fixes." - ), - "inputSchema": { - "type": "object", - "properties": { - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - }, - }, - { - "name": "memory_decay", - "description": ( - "Run a decay pass over all memories: recalculate decay scores based on age, " - "access frequency, importance, and type weight. Archives memories whose score " - "drops below the dormant threshold. Returns a summary of updated scores." - ), - "inputSchema": { - "type": "object", - "properties": { - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - }, - }, - { - "name": "memory_config", - "description": ( - "View or update the cognitive memory embedding configuration (_config.json). " - "Set action='show' to display current config (API key is masked). " - "Provide provider='openai' or provider='ollama' to switch embedding backends. " - "Provide openai_api_key to set the OpenAI API key for embeddings." - ), - "inputSchema": { - "type": "object", - "properties": { - "action": { - "type": "string", - "description": "Set to 'show' to display current config without modifying it", - }, - "provider": { - "type": "string", - "description": "Embedding provider: 'ollama' or 'openai'", - }, - "openai_api_key": { - "type": "string", - "description": "OpenAI API key to store in config", - }, - "graph": { - "type": "string", - "description": "Named memory graph to use (default: 'default')", - }, - }, - }, - }, - { - "name": "memory_graphs", - "description": ( - "List all available memory graphs (named, segregated memory namespaces). " - "Returns each graph's name, path, and whether it exists on disk." - ), - "inputSchema": {"type": "object", "properties": {}}, - }, - ] - - -def handle_tool_call(tool_name: str, arguments: Dict[str, Any]) -> Dict[str, Any]: - """Dispatch MCP tool calls to the CognitiveMemoryClient.""" - - def ok(result: Any) -> Dict[str, Any]: - return { - "content": [ - {"type": "text", "text": json.dumps(result, indent=2, default=str)} - ] - } - - try: - graph = arguments.pop("graph", None) - client = get_client(graph) - - if tool_name == "memory_store": - mem_type = arguments["type"] - title = arguments["title"] - tags = arguments.get("tags") - memory_id = client.store( - type=mem_type, - title=title, - content=arguments["content"], - tags=tags, - importance=arguments.get("importance", 0.5), - ) - # Auto-log episode entry (default true, opt-out with episode=false) - episode_logged = False - if arguments.get("episode", True): - client.episode( - type=mem_type, - title=title, - tags=tags, - ) - episode_logged = True - _trigger_git_sync() - return ok( - { - "success": True, - "memory_id": memory_id, - "episode_logged": episode_logged, - } - ) - - elif tool_name == "memory_recall": - results = client.recall( - query=arguments["query"], - semantic=arguments.get("semantic", True), - limit=arguments.get("limit", 10), - ) - return ok(results) - - elif tool_name == "memory_get": - result = client.get(arguments["memory_id"]) - if result is None: - return ok({"error": f"Memory not found: {arguments['memory_id']}"}) - return ok(result) - - elif tool_name == "memory_search": - results = client.search( - query=arguments.get("query"), - memory_types=arguments.get("memory_types"), - tags=arguments.get("tags"), - min_importance=arguments.get("min_importance"), - ) - return ok(results) - - elif tool_name == "memory_relate": - edge_id = client.relate( - from_id=arguments["from_id"], - to_id=arguments["to_id"], - rel_type=arguments["rel_type"], - description=arguments.get("description"), - strength=arguments.get("strength", 0.8), - ) - if edge_id: - _trigger_git_sync() - return ok({"success": True, "edge_id": edge_id}) - return ok({"success": False, "message": "Relationship already exists"}) - - elif tool_name == "memory_related": - results = client.related( - memory_id=arguments["memory_id"], - rel_types=arguments.get("rel_types"), - max_depth=arguments.get("max_depth", 1), - ) - return ok(results) - - elif tool_name == "memory_edge_get": - result = client.edge_get(arguments["edge_id"]) - if result is None: - return ok({"error": f"Edge not found: {arguments['edge_id']}"}) - return ok(result) - - elif tool_name == "memory_edge_search": - results = client.edge_search( - query=arguments.get("query"), - types=arguments.get("types"), - from_id=arguments.get("from_id"), - to_id=arguments.get("to_id"), - ) - return ok(results) - - elif tool_name == "memory_reflect": - result = client.reflect( - since=arguments.get("since"), - dry_run=arguments.get("dry_run", False), - ) - return ok(result) - - elif tool_name == "memory_reflection": - text = client.reflection_summary() - return ok({"content": text}) - - elif tool_name == "memory_stats": - result = client.stats() - return ok(result) - - elif tool_name == "memory_episode": - client.episode( - type=arguments["type"], - title=arguments["title"], - tags=arguments.get("tags"), - summary=arguments.get("summary"), - ) - return ok({"success": True}) - - elif tool_name == "memory_tags_list": - results = client.tags_list(limit=arguments.get("limit", 0)) - return ok(results) - - elif tool_name == "memory_tags_related": - results = client.tags_related( - tag=arguments["tag"], - limit=arguments.get("limit", 0), - ) - return ok(results) - - elif tool_name == "memory_embed": - result = client.embed() - return ok(result) - - elif tool_name == "memory_core": - text = client.core() - return ok({"content": text}) - - elif tool_name == "memory_decay": - result = client.decay() - return ok(result) - - elif tool_name == "memory_config": - config_path = client.memory_dir / "_config.json" - config = _load_memory_config(config_path) - changed = False - - provider = arguments.get("provider") - openai_api_key = arguments.get("openai_api_key") - - if provider: - config["embedding_provider"] = provider - changed = True - if openai_api_key: - config["openai_api_key"] = openai_api_key - changed = True - - if changed: - config_path.write_text(json.dumps(config, indent=2)) - return ok({"success": True, "updated": True}) - else: - # Show config with masked API key - display = dict(config) - key = display.get("openai_api_key") - if key and isinstance(key, str) and len(key) > 8: - display["openai_api_key"] = key[:4] + "..." + key[-4:] - return ok(display) - - elif tool_name == "memory_graphs": - graphs = list_graphs() - # Enrich with existence check and memory count - for g in graphs: - p = Path(g["path"]) - g["exists"] = p.exists() - if g["exists"]: - index_path = p / "_index.json" - if index_path.exists(): - try: - idx = json.loads(index_path.read_text()) - g["memory_count"] = len(idx.get("entries", {})) - except (json.JSONDecodeError, OSError): - g["memory_count"] = 0 - else: - g["memory_count"] = 0 - return ok(graphs) - - else: - return { - "content": [{"type": "text", "text": f"Unknown tool: {tool_name}"}], - "isError": True, - } - - except Exception as e: - return { - "content": [{"type": "text", "text": f"Error: {str(e)}"}], - "isError": True, - } - - -def main(): - """MCP stdio server main loop (JSON-RPC 2.0).""" - - tools = create_tools() - - for line in sys.stdin: - line = line.strip() - if not line: - continue - - try: - message = json.loads(line) - - if message.get("method") == "initialize": - response = { - "jsonrpc": "2.0", - "id": message.get("id"), - "result": { - "protocolVersion": "2024-11-05", - "capabilities": {"tools": {}}, - "serverInfo": { - "name": "cognitive-memory-mcp-server", - "version": "3.1.0", - }, - }, - } - print(json.dumps(response), flush=True) - - elif message.get("method") == "tools/list": - response = { - "jsonrpc": "2.0", - "id": message.get("id"), - "result": {"tools": tools}, - } - print(json.dumps(response), flush=True) - - elif message.get("method") == "tools/call": - params = message.get("params", {}) - tool_name = params.get("name") - arguments = params.get("arguments", {}) - - result = handle_tool_call(tool_name, arguments) - - response = {"jsonrpc": "2.0", "id": message.get("id"), "result": result} - print(json.dumps(response), flush=True) - - elif message.get("method") == "notifications/initialized": - # Acknowledge but no response required for notifications - pass - - except Exception as e: - error_response = { - "jsonrpc": "2.0", - "id": message.get("id") if "message" in locals() else None, - "error": {"code": -32603, "message": str(e)}, - } - print(json.dumps(error_response), flush=True) - - -if __name__ == "__main__": - main() diff --git a/skills/cognitive-memory/scripts/edge-proposer.py b/skills/cognitive-memory/scripts/edge-proposer.py deleted file mode 100644 index 813e48a..0000000 --- a/skills/cognitive-memory/scripts/edge-proposer.py +++ /dev/null @@ -1,427 +0,0 @@ -#!/usr/bin/env python3 -"""Analyze cognitive memories and propose high-quality edges. - -Reads all active/fading memories (decay >= 0.2), groups by shared tags, -and scores candidate relationships based on three signals: - - Type heuristics (40%): e.g. fix+problem → SOLVES, decision+solution → BUILDS_ON - - Tag overlap (30%): Jaccard similarity of tag sets - - Content similarity (30%): Keyword overlap in memory body text - -Outputs ranked proposals to stdout and saves top 80 as JSON for -programmatic use by Claude Code sessions. - -Usage: - python3 edge-proposer.py - - # Then review stdout output, pick good candidates, and create edges via: - # MCP: memory_relate(from_id, to_id, rel_type, description, strength) - # CLI: claude-memory relate --description "..." - -Output: - - Ranked candidates printed to stdout (score, type, titles, shared tags) - - JSON file saved to ~/.claude/tmp/edge-candidates.json - -Scoring: - - Minimum threshold: 0.15 (below this, candidates are discarded) - - Importance boost: 1.2x multiplier when avg importance >= 0.7 - - Session-log memories tend to produce noise — review FOLLOWS edges carefully - -Relation types available: - SOLVES, CAUSES, BUILDS_ON, ALTERNATIVE_TO, REQUIRES, FOLLOWS, RELATED_TO - -First run: 2026-02-19 — produced 5186 candidates from 473 memories, - 20 high-quality edges were manually selected and created. -""" - -import json -import os -import re -from pathlib import Path -from collections import defaultdict -from itertools import combinations - -# Resolve data directory: COGNITIVE_MEMORY_DIR > XDG_DATA_HOME > default -_env_dir = os.environ.get("COGNITIVE_MEMORY_DIR", "") -if _env_dir: - MEMORY_DIR = Path(_env_dir).expanduser() -else: - _xdg_data = os.environ.get("XDG_DATA_HOME", "") or str( - Path.home() / ".local" / "share" - ) - MEMORY_DIR = Path(_xdg_data) / "cognitive-memory" - -STATE_FILE = MEMORY_DIR / "_state.json" -GRAPH_DIR = MEMORY_DIR / "graph" -EDGES_DIR = GRAPH_DIR / "edges" - -# Type-based heuristics: (type_a, type_b) -> (suggested_rel, direction, base_score) -# direction: "ab" means a->b, "ba" means b->a -TYPE_HEURISTICS = { - ("fix", "problem"): ("SOLVES", "ab", 0.6), - ("solution", "problem"): ("SOLVES", "ab", 0.7), - ("solution", "error"): ("SOLVES", "ab", 0.6), - ("fix", "error"): ("SOLVES", "ab", 0.6), - ("decision", "solution"): ("BUILDS_ON", "ab", 0.3), - ("decision", "decision"): ("ALTERNATIVE_TO", None, 0.2), - ("solution", "solution"): ("BUILDS_ON", None, 0.2), - ("configuration", "solution"): ("REQUIRES", "ab", 0.3), - ("workflow", "configuration"): ("REQUIRES", "ab", 0.3), - ("insight", "solution"): ("BUILDS_ON", "ab", 0.4), - ("insight", "decision"): ("BUILDS_ON", "ab", 0.4), - ("fix", "fix"): ("FOLLOWS", None, 0.15), - ("fix", "solution"): ("BUILDS_ON", "ab", 0.2), - ("code_pattern", "solution"): ("BUILDS_ON", "ab", 0.3), - ("procedure", "workflow"): ("BUILDS_ON", "ab", 0.3), - ("configuration", "configuration"): ("RELATED_TO", None, 0.1), -} - - -def parse_frontmatter(filepath: Path) -> dict | None: - """Parse YAML frontmatter from a markdown file.""" - try: - text = filepath.read_text(encoding="utf-8") - except Exception: - return None - - if not text.startswith("---"): - return None - - end = text.find("---", 3) - if end == -1: - return None - - fm = {} - body = text[end + 3 :].strip() - fm["_body"] = body[:500] # first 500 chars of content for matching - fm["_filepath"] = str(filepath) - - for line in text[3:end].strip().splitlines(): - if ":" not in line: - continue - key, _, val = line.partition(":") - key = key.strip() - val = val.strip().strip('"').strip("'") - - if key == "tags": - # Handle both [a, b] and "a, b" formats - val = val.strip("[]") - fm["tags"] = [ - t.strip().strip('"').strip("'") for t in val.split(",") if t.strip() - ] - elif key == "importance": - try: - fm["importance"] = float(val) - except ValueError: - pass - else: - fm[key] = val - - return fm - - -def load_memories() -> dict[str, dict]: - """Load all memories from graph subdirectories.""" - memories = {} - type_dirs = [ - "solutions", - "fixes", - "decisions", - "configurations", - "problems", - "workflows", - "code-patterns", - "errors", - "general", - "procedures", - "insights", - ] - - for type_dir in type_dirs: - dirpath = GRAPH_DIR / type_dir - if not dirpath.exists(): - continue - for f in dirpath.glob("*.md"): - fm = parse_frontmatter(f) - if fm and "id" in fm: - memories[fm["id"]] = fm - - return memories - - -def load_existing_edges() -> set[tuple[str, str]]: - """Load existing edges to avoid duplicates.""" - existing = set() - - if not EDGES_DIR.exists(): - return existing - - for f in EDGES_DIR.glob("*.md"): - fm = parse_frontmatter(f) - if fm and "from_id" in fm and "to_id" in fm: - existing.add((fm["from_id"], fm["to_id"])) - existing.add((fm["to_id"], fm["from_id"])) # bidirectional check - - return existing - - -def load_decay_state() -> dict[str, float]: - """Load decay scores from state file.""" - if not STATE_FILE.exists(): - return {} - try: - state = json.loads(STATE_FILE.read_text()) - return {mid: info.get("decay_score", 0) for mid, info in state.items()} - except Exception: - return {} - - -def tag_overlap_score(tags_a: list[str], tags_b: list[str]) -> float: - """Jaccard similarity of tag sets.""" - if not tags_a or not tags_b: - return 0.0 - set_a, set_b = set(tags_a), set(tags_b) - intersection = set_a & set_b - union = set_a | set_b - return len(intersection) / len(union) if union else 0.0 - - -def content_keyword_overlap(body_a: str, body_b: str) -> float: - """Simple keyword overlap between content bodies.""" - if not body_a or not body_b: - return 0.0 - - def extract_keywords(text: str) -> set[str]: - words = re.findall(r"[a-zA-Z_]{4,}", text.lower()) - # Filter common words - stopwords = { - "that", - "this", - "with", - "from", - "have", - "been", - "were", - "they", - "their", - "will", - "would", - "could", - "should", - "which", - "where", - "when", - "what", - "about", - "into", - "also", - "more", - "some", - "then", - "than", - "each", - "only", - "used", - "using", - "after", - "before", - "because", - "between", - "through", - "during", - "added", - "updated", - "fixed", - "error", - "issue", - "problem", - "solution", - "memory", - "memories", - "configuration", - "successfully", - "working", - "works", - } - return {w for w in words if w not in stopwords} - - kw_a = extract_keywords(body_a) - kw_b = extract_keywords(body_b) - - if not kw_a or not kw_b: - return 0.0 - - intersection = kw_a & kw_b - union = kw_a | kw_b - return len(intersection) / len(union) if union else 0.0 - - -def get_type_heuristic( - type_a: str, type_b: str -) -> tuple[str, str | None, float] | None: - """Look up type-based heuristic, checking both orderings.""" - key = (type_a, type_b) - if key in TYPE_HEURISTICS: - rel, direction, score = TYPE_HEURISTICS[key] - return rel, direction, score - - key_rev = (type_b, type_a) - if key_rev in TYPE_HEURISTICS: - rel, direction, score = TYPE_HEURISTICS[key_rev] - # Flip direction - if direction == "ab": - direction = "ba" - elif direction == "ba": - direction = "ab" - return rel, direction, score - - return None - - -def score_pair(mem_a: dict, mem_b: dict) -> dict | None: - """Score a candidate edge between two memories.""" - tags_a = mem_a.get("tags", []) - tags_b = mem_b.get("tags", []) - - # Must share at least one tag - shared_tags = set(tags_a) & set(tags_b) - if not shared_tags: - return None - - tag_score = tag_overlap_score(tags_a, tags_b) - content_score = content_keyword_overlap( - mem_a.get("_body", ""), mem_b.get("_body", "") - ) - - type_a = mem_a.get("type", "general") - type_b = mem_b.get("type", "general") - - heuristic = get_type_heuristic(type_a, type_b) - if heuristic: - suggested_rel, direction, type_score = heuristic - else: - suggested_rel = "RELATED_TO" - direction = None - type_score = 0.05 - - # Composite score - total = (tag_score * 0.4) + (content_score * 0.3) + (type_score * 0.3) - - # Boost for high importance memories - imp_a = mem_a.get("importance", 0.5) - imp_b = mem_b.get("importance", 0.5) - if isinstance(imp_a, str): - imp_a = float(imp_a) - if isinstance(imp_b, str): - imp_b = float(imp_b) - avg_importance = (imp_a + imp_b) / 2 - if avg_importance >= 0.7: - total *= 1.2 - - if total < 0.15: - return None - - # Determine from/to based on direction - if direction == "ab": - from_mem, to_mem = mem_a, mem_b - elif direction == "ba": - from_mem, to_mem = mem_b, mem_a - else: - # Default: higher importance is "from" - if imp_a >= imp_b: - from_mem, to_mem = mem_a, mem_b - else: - from_mem, to_mem = mem_b, mem_a - - return { - "score": round(total, 3), - "rel_type": suggested_rel, - "from_id": from_mem["id"], - "from_title": from_mem.get("title", "?"), - "from_type": from_mem.get("type", "?"), - "to_id": to_mem["id"], - "to_title": to_mem.get("title", "?"), - "to_type": to_mem.get("type", "?"), - "shared_tags": sorted(shared_tags), - "tag_score": round(tag_score, 3), - "content_score": round(content_score, 3), - "type_score": round(type_score, 3), - } - - -def main(): - print("Loading memories...") - memories = load_memories() - print(f" Found {len(memories)} memories") - - print("Loading decay state...") - decay_scores = load_decay_state() - - # Filter to active + fading only (decay >= 0.2) - active_ids = { - mid for mid, score in decay_scores.items() if score >= 0.2 and mid in memories - } - # Also include memories without decay state (new) - for mid in memories: - if mid not in decay_scores: - active_ids.add(mid) - - active_memories = {mid: memories[mid] for mid in active_ids} - print(f" {len(active_memories)} active/fading memories to analyze") - - print("Loading existing edges...") - existing = load_existing_edges() - print(f" {len(existing) // 2} existing edges") - - print("Scoring candidate pairs...") - candidates = [] - - # Group by shared tags first to reduce pair space - tag_groups = defaultdict(set) - for mid, mem in active_memories.items(): - for tag in mem.get("tags", []): - tag_groups[tag].add(mid) - - # Collect unique pairs that share at least one tag - seen_pairs = set() - for tag, mids in tag_groups.items(): - if len(mids) < 2 or len(mids) > 50: # skip too-common tags - continue - for a, b in combinations(mids, 2): - pair = tuple(sorted([a, b])) - if pair in seen_pairs: - continue - if (a, b) in existing or (b, a) in existing: - continue - seen_pairs.add(pair) - - result = score_pair(active_memories[a], active_memories[b]) - if result: - candidates.append(result) - - # Sort by score descending - candidates.sort(key=lambda x: x["score"], reverse=True) - - print(f"\n{'='*100}") - print(f"TOP EDGE CANDIDATES ({len(candidates)} total, showing top 80)") - print(f"{'='*100}\n") - - for i, c in enumerate(candidates[:80], 1): - print(f"#{i:3d} | Score: {c['score']:.3f} | {c['rel_type']}") - print(f" FROM [{c['from_type']}] {c['from_title']}") - print(f" TO [{c['to_type']}] {c['to_title']}") - print( - f" Tags: {', '.join(c['shared_tags'])} | " - f"tag={c['tag_score']:.2f} content={c['content_score']:.2f} type={c['type_score']:.2f}" - ) - print(f" IDs: {c['from_id']} -> {c['to_id']}") - print() - - # Also output as JSON for programmatic use - json_path = MEMORY_DIR.parent / "tmp" / "edge-candidates.json" - json_path.parent.mkdir(parents=True, exist_ok=True) - json_path.write_text(json.dumps(candidates[:80], indent=2)) - print(f"Full candidates saved to: {json_path}") - - -if __name__ == "__main__": - main() diff --git a/skills/cognitive-memory/scripts/ensure-symlinks.sh b/skills/cognitive-memory/scripts/ensure-symlinks.sh deleted file mode 100755 index 967c362..0000000 --- a/skills/cognitive-memory/scripts/ensure-symlinks.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# Ensure all Claude Code project MEMORY.md files symlink to cognitive memory CORE.md -# This makes CORE.md auto-load into every session's system prompt. -# Run by cognitive-memory-daily.service or manually. - -# Resolve data directory: COGNITIVE_MEMORY_DIR > XDG_DATA_HOME > default -MEMORY_DIR="${COGNITIVE_MEMORY_DIR:-${XDG_DATA_HOME:-$HOME/.local/share}/cognitive-memory}" -CORE="$MEMORY_DIR/CORE.md" -PROJECTS="$HOME/.claude/projects" - -if [ ! -f "$CORE" ]; then - echo "ERROR: CORE.md not found at $CORE" - exit 1 -fi - -for dir in "$PROJECTS"/*/; do - memdir="$dir/memory" - memfile="$memdir/MEMORY.md" - mkdir -p "$memdir" - # Only create/fix symlink if it doesn't already point to CORE.md - if [ -L "$memfile" ] && [ "$(readlink "$memfile")" = "$CORE" ]; then - continue - fi - # Remove existing file (regular file or broken symlink) - rm -f "$memfile" - ln -s "$CORE" "$memfile" - echo "Linked: $memfile" -done diff --git a/skills/cognitive-memory/scripts/memory-git-sync.sh b/skills/cognitive-memory/scripts/memory-git-sync.sh deleted file mode 100755 index a4ce16e..0000000 --- a/skills/cognitive-memory/scripts/memory-git-sync.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -# Commit and push cognitive memory changes to Gitea (cal/claude-memory) -# -# Called daily by cognitive-memory-daily.service after decay/core/symlinks. -# Only commits if there are actual changes. Safe to run multiple times. -# -# Location: ~/.claude/skills/cognitive-memory/scripts/memory-git-sync.sh -# Repo: cognitive-memory data dir -> https://git.manticorum.com/cal/claude-memory.git - -set -euo pipefail - -# Resolve data directory: COGNITIVE_MEMORY_DIR > XDG_DATA_HOME > default -MEMORY_DIR="${COGNITIVE_MEMORY_DIR:-${XDG_DATA_HOME:-$HOME/.local/share}/cognitive-memory}" - -cd "$MEMORY_DIR" - -# Check if there are any changes to commit -if git diff --quiet && git diff --cached --quiet && [ -z "$(git ls-files --others --exclude-standard)" ]; then - echo "memory-git-sync: no changes to commit" - exit 0 -fi - -# Stage all changes (.gitignore handles exclusions) -git add -A - -# Build a commit message from what changed -ADDED=$(git diff --cached --name-only --diff-filter=A | wc -l) -MODIFIED=$(git diff --cached --name-only --diff-filter=M | wc -l) -DELETED=$(git diff --cached --name-only --diff-filter=D | wc -l) -EDGES=$(git diff --cached --name-only --diff-filter=ACM | grep -c '^graph/edges/' || true) - -MSG="daily sync: ${ADDED} added, ${MODIFIED} modified, ${DELETED} deleted" -if [ "$EDGES" -gt 0 ]; then - MSG="$MSG (${EDGES} edges)" -fi - -git commit -m "$MSG" --no-gpg-sign 2>/dev/null || { - echo "memory-git-sync: commit failed (pre-commit hook?)" - exit 1 -} - -git push origin main 2>/dev/null || { - echo "memory-git-sync: push failed" - exit 1 -} - -echo "memory-git-sync: pushed to origin/main" diff --git a/skills/cognitive-memory/scripts/session_memory.py b/skills/cognitive-memory/scripts/session_memory.py deleted file mode 100755 index 32cf86e..0000000 --- a/skills/cognitive-memory/scripts/session_memory.py +++ /dev/null @@ -1,587 +0,0 @@ -#!/usr/bin/env python3 -""" -Session-end memory hook for Claude Code. - -Reads the session transcript, extracts significant events (commits, bug fixes, -architecture decisions, new patterns, configurations), and stores them as -cognitive memories via claude-memory CLI. -""" - -import json -import re -import subprocess -import sys -from datetime import datetime -from pathlib import Path - -LOG_FILE = Path("/tmp/session-memory-hook.log") - - -def log(msg: str): - """Append a timestamped message to the hook log file.""" - with open(LOG_FILE, "a") as f: - f.write(f"{datetime.now().isoformat(timespec='seconds')} {msg}\n") - - -def log_separator(): - """Write a visual separator to the log for readability between sessions.""" - with open(LOG_FILE, "a") as f: - f.write(f"\n{'='*72}\n") - f.write( - f" SESSION MEMORY HOOK — {datetime.now().isoformat(timespec='seconds')}\n" - ) - f.write(f"{'='*72}\n") - - -def read_stdin(): - """Read the hook input JSON from stdin.""" - try: - raw = sys.stdin.read() - log(f"[stdin] Raw input length: {len(raw)} chars") - data = json.loads(raw) - log(f"[stdin] Parsed keys: {list(data.keys())}") - return data - except (json.JSONDecodeError, EOFError) as e: - log(f"[stdin] ERROR: Failed to parse input: {e}") - return {} - - -def read_transcript(transcript_path: str) -> list[dict]: - """Read JSONL transcript file into a list of normalized message dicts. - - Claude Code transcripts use a wrapper format where each line is: - {"type": "user"|"assistant"|..., "message": {"role": ..., "content": ...}, ...} - This function unwraps them into the inner {"role": ..., "content": ...} dicts - that the rest of the code expects. Non-message entries (like file-history-snapshot) - are filtered out. - """ - messages = [] - path = Path(transcript_path) - if not path.exists(): - log(f"[transcript] ERROR: File does not exist: {transcript_path}") - return messages - file_size = path.stat().st_size - log(f"[transcript] Reading {transcript_path} ({file_size} bytes)") - parse_errors = 0 - skipped_types = {} - line_num = 0 - with open(path) as f: - for line_num, line in enumerate(f, 1): - line = line.strip() - if not line: - continue - try: - raw = json.loads(line) - except json.JSONDecodeError: - parse_errors += 1 - continue - - # Claude Code transcript format: wrapper with "type" and "message" keys - # Unwrap to get the inner message dict with "role" and "content" - if "message" in raw and isinstance(raw["message"], dict): - inner = raw["message"] - # Carry over the wrapper type for logging - wrapper_type = raw.get("type", "unknown") - if "role" not in inner: - inner["role"] = wrapper_type - messages.append(inner) - elif "role" in raw: - # Already in the expected format (future-proofing) - messages.append(raw) - else: - # Non-message entry (file-history-snapshot, etc.) - entry_type = raw.get("type", "unknown") - skipped_types[entry_type] = skipped_types.get(entry_type, 0) + 1 - - if parse_errors: - log(f"[transcript] WARNING: {parse_errors} lines failed to parse") - if skipped_types: - log(f"[transcript] Skipped non-message entries: {skipped_types}") - log(f"[transcript] Loaded {len(messages)} messages from {line_num} lines") - - # Log role breakdown - role_counts = {} - for msg in messages: - role = msg.get("role", "unknown") - role_counts[role] = role_counts.get(role, 0) + 1 - log(f"[transcript] Role breakdown: {role_counts}") - - return messages - - -def _is_memory_tool_use(block: dict) -> str | None: - """Check if a tool_use block is a memory operation. - - Detects both CLI calls (Bash with 'claude-memory') and MCP tool calls - (mcp__cognitive-memory__memory_*). Returns a short description of the - match or None. - """ - name = block.get("name", "") - - # MCP tool calls: mcp__cognitive-memory__memory_store, memory_recall, etc. - if name.startswith("mcp__cognitive-memory__memory_"): - return f"MCP:{name}" - - # Legacy/CLI: Bash commands containing 'claude-memory' - if name == "Bash": - cmd = block.get("input", {}).get("command", "") - if "claude-memory" in cmd: - return f"CLI:{cmd[:100]}" - - return None - - -def find_last_memory_command_index(messages: list[dict]) -> int: - """Find the index of the last message containing a memory operation. - - Scans for both MCP tool calls (mcp__cognitive-memory__memory_*) and - Bash tool_use blocks where the command contains 'claude-memory'. - Returns the index of that message so we can slice the transcript to - only process messages after the last memory operation, avoiding - duplicate storage. - - Returns -1 if no memory operations were found. - """ - last_index = -1 - found_commands = [] - for i, msg in enumerate(messages): - if msg.get("role") != "assistant": - continue - content = msg.get("content", []) - if not isinstance(content, list): - continue - for block in content: - if not isinstance(block, dict): - continue - if block.get("type") != "tool_use": - continue - match = _is_memory_tool_use(block) - if match: - last_index = i - found_commands.append(f"msg[{i}]: {match}") - if found_commands: - log(f"[cutoff] Found {len(found_commands)} memory operations:") - for fc in found_commands: - log(f"[cutoff] {fc}") - log(f"[cutoff] Will slice after message index {last_index}") - else: - log("[cutoff] No memory operations found — processing full transcript") - return last_index - - -def extract_text_content(message: dict) -> str: - """Extract plain text from a message's content blocks.""" - content = message.get("content", "") - if isinstance(content, str): - return content - if isinstance(content, list): - parts = [] - for block in content: - if isinstance(block, dict): - if block.get("type") == "text": - parts.append(block.get("text", "")) - elif block.get("type") == "tool_result": - # Recurse into tool result content - sub = block.get("content", "") - if isinstance(sub, str): - parts.append(sub) - elif isinstance(sub, list): - for sb in sub: - if isinstance(sb, dict) and sb.get("type") == "text": - parts.append(sb.get("text", "")) - elif isinstance(block, str): - parts.append(block) - return "\n".join(parts) - return "" - - -def extract_tool_uses(messages: list[dict]) -> list[dict]: - """Extract all tool_use blocks from assistant messages.""" - tool_uses = [] - for msg in messages: - if msg.get("role") != "assistant": - continue - content = msg.get("content", []) - if not isinstance(content, list): - continue - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - tool_uses.append(block) - - # Log tool use breakdown - tool_counts = {} - for tu in tool_uses: - name = tu.get("name", "unknown") - tool_counts[name] = tool_counts.get(name, 0) + 1 - log(f"[tools] Extracted {len(tool_uses)} tool uses: {tool_counts}") - - return tool_uses - - -def find_git_commits(tool_uses: list[dict]) -> list[str]: - """Find git commit commands from Bash tool uses.""" - commits = [] - for tu in tool_uses: - if tu.get("name") != "Bash": - continue - cmd = tu.get("input", {}).get("command", "") - if "git commit" in cmd: - commits.append(cmd) - log(f"[commits] Found {len(commits)} git commit commands") - return commits - - -def find_files_edited(tool_uses: list[dict]) -> set[str]: - """Find unique files edited via Edit/Write tools.""" - files = set() - for tu in tool_uses: - name = tu.get("name", "") - if name in ("Edit", "Write", "MultiEdit"): - fp = tu.get("input", {}).get("file_path", "") - if fp: - files.add(fp) - log(f"[files] Found {len(files)} edited files:") - for f in sorted(files): - log(f"[files] {f}") - return files - - -def find_errors_encountered(messages: list[dict]) -> list[str]: - """Find error messages from tool results.""" - errors = [] - for msg in messages: - if msg.get("role") != "user": - continue - content = msg.get("content", []) - if not isinstance(content, list): - continue - for block in content: - if not isinstance(block, dict): - continue - if block.get("type") == "tool_result" and block.get("is_error"): - error_text = extract_text_content({"content": block.get("content", "")}) - if error_text and len(error_text) > 10: - errors.append(error_text[:500]) - log(f"[errors] Found {len(errors)} error tool results") - return errors - - -def detect_project(cwd: str, files_edited: set[str]) -> str: - """Detect project name from cwd and edited files.""" - all_paths = [cwd] + list(files_edited) - project_indicators = { - "major-domo": "major-domo", - "paper-dynasty": "paper-dynasty", - "claude-home": "homelab", - "homelab": "homelab", - ".claude": "claude-config", - "openclaw": "openclaw", - "tdarr": "tdarr", - } - for path in all_paths: - for indicator, project in project_indicators.items(): - if indicator in path.lower(): - log( - f"[project] Detected '{project}' from path containing '{indicator}': {path}" - ) - return project - # Fall back to last directory component of cwd - fallback = Path(cwd).name - log(f"[project] No indicator matched, falling back to cwd name: {fallback}") - return fallback - - -def build_session_summary(messages: list[dict], cwd: str) -> dict | None: - """Analyze the transcript and build a summary of storable events.""" - log(f"[summary] Building summary from {len(messages)} messages, cwd={cwd}") - - if len(messages) < 4: - log(f"[summary] SKIP: only {len(messages)} messages, need at least 4") - return "too_short" - - tool_uses = extract_tool_uses(messages) - commits = find_git_commits(tool_uses) - files_edited = find_files_edited(tool_uses) - errors = find_errors_encountered(messages) - project = detect_project(cwd, files_edited) - - # Collect assistant text for topic extraction - assistant_texts = [] - for msg in messages: - if msg.get("role") == "assistant": - text = extract_text_content(msg) - if text: - assistant_texts.append(text) - - full_assistant_text = "\n".join(assistant_texts) - log( - f"[summary] Assistant text: {len(full_assistant_text)} chars from {len(assistant_texts)} messages" - ) - - # Detect what kind of work was done - work_types = set() - keyword_checks = { - "commit": lambda: bool(commits), - "debugging": lambda: bool(errors), - "testing": lambda: any("test" in f.lower() for f in files_edited), - "fix": lambda: any( - kw in full_assistant_text.lower() for kw in ["bug", "fix", "error", "issue"] - ), - "refactoring": lambda: any( - kw in full_assistant_text.lower() - for kw in ["refactor", "restructure", "reorganize"] - ), - "feature": lambda: any( - kw in full_assistant_text.lower() - for kw in ["new feature", "implement", "add support"] - ), - "deployment": lambda: any( - kw in full_assistant_text.lower() - for kw in ["deploy", "production", "release"] - ), - "configuration": lambda: any( - kw in full_assistant_text.lower() - for kw in ["config", "setup", "install", "configure"] - ), - "automation": lambda: any( - kw in full_assistant_text.lower() for kw in ["hook", "script", "automat"] - ), - "tooling": lambda: any( - kw in full_assistant_text.lower() - for kw in [ - "skill", - "command", - "slash command", - "commit-push", - "claude code command", - ] - ), - "creation": lambda: any( - kw in full_assistant_text.lower() - for kw in ["create a ", "created", "new file", "wrote a"] - ), - } - - for work_type, check_fn in keyword_checks.items(): - matched = check_fn() - if matched: - work_types.add(work_type) - log(f"[work_type] MATCH: {work_type}") - else: - log(f"[work_type] no match: {work_type}") - - if not work_types and not files_edited: - log("[summary] SKIP: no work types detected and no files edited") - # Log a snippet of assistant text to help debug missed keywords - snippet = full_assistant_text[:500].replace("\n", " ") - log(f"[summary] Assistant text preview: {snippet}") - return "no_work" - - log( - f"[summary] Result: project={project}, work_types={sorted(work_types)}, " - f"commits={len(commits)}, files={len(files_edited)}, errors={len(errors)}" - ) - - return { - "project": project, - "work_types": work_types, - "commits": commits, - "files_edited": sorted(files_edited), - "errors": errors[:5], # Cap at 5 - "assistant_text_snippet": full_assistant_text[:3000], - "message_count": len(messages), - "tool_use_count": len(tool_uses), - } - - -def build_memory_content(summary: dict) -> str: - """Build a concise memory content string from the summary.""" - parts = [] - - if summary["commits"]: - parts.append(f"Commits made: {len(summary['commits'])}") - for c in summary["commits"][:3]: - msg = extract_commit_message(c) - if msg: - parts.append(f" - {msg}") - - if summary["files_edited"]: - parts.append(f"Files edited ({len(summary['files_edited'])}):") - for f in summary["files_edited"][:10]: - parts.append(f" - {f}") - - if summary["errors"]: - parts.append(f"Errors encountered ({len(summary['errors'])}):") - for e in summary["errors"][:3]: - parts.append(f" - {e[:150]}") - - work_desc = ", ".join(sorted(summary["work_types"])) - parts.append(f"Work types: {work_desc}") - parts.append( - f"Session size: {summary['message_count']} messages, {summary['tool_use_count']} tool calls" - ) - - return "\n".join(parts) - - -def determine_memory_type(summary: dict) -> str: - """Pick the best memory type based on work done.""" - wt = summary["work_types"] - if "fix" in wt or "debugging" in wt: - return "fix" - if "configuration" in wt: - return "configuration" - if "feature" in wt: - return "workflow" - if "refactoring" in wt: - return "code_pattern" - if "deployment" in wt: - return "workflow" - if "automation" in wt or "tooling" in wt: - return "workflow" - if "creation" in wt: - return "workflow" - return "general" - - -def extract_commit_message(commit_cmd: str) -> str | None: - """Extract the commit message from a git commit command string. - - Handles both simple quoted (-m "msg") and heredoc (-m "$(cat <<'EOF'...EOF)") - formats. Tries heredoc first since that's the standard Claude Code format. - """ - # Try heredoc format first (standard Claude Code format) - match = re.search(r"<<'?EOF'?\n(.+?)(?:\nEOF|\n\s*EOF)", commit_cmd, re.DOTALL) - if match: - # Get first non-empty line as the message - for line in match.group(1).strip().split("\n"): - line = line.strip() - if line and not line.startswith("Co-Authored-By:"): - return line[:200] - - # Fall back to simple quoted message (matching same quote type) - match = re.search(r'-m\s+"([^"]+)"', commit_cmd) - if not match: - match = re.search(r"-m\s+'([^']+)'", commit_cmd) - if match: - return match.group(1).split("\n")[0][:200] - - return None - - -def build_title(summary: dict) -> str: - """Generate a descriptive title for the memory.""" - project = summary["project"] - work = ", ".join(sorted(summary["work_types"])) - if summary["commits"]: - msg = extract_commit_message(summary["commits"][0]) - if msg: - return f"[{project}] {msg}" - return f"[{project}] Session: {work}" - - -def store_memory(summary: dict): - """Store the session memory via claude-memory CLI.""" - title = build_title(summary) - content = build_memory_content(summary) - mem_type = determine_memory_type(summary) - importance = "0.4" - - # Boost importance for commits or significant work - if summary["commits"]: - importance = "0.6" - if len(summary["files_edited"]) > 5: - importance = "0.6" - if "deployment" in summary["work_types"]: - importance = "0.7" - - # Build tags - tags = [summary["project"]] - tags.extend(sorted(summary["work_types"])) - tags.append("session-log") - tag_str = ",".join(tags) - - cmd = [ - "claude-memory", - "store", - "--type", - mem_type, - "--title", - title, - "--content", - content, - "--tags", - tag_str, - "--importance", - importance, - "--episode", - ] - - log(f"[store] Memory type: {mem_type}, importance: {importance}") - log(f"[store] Title: {title}") - log(f"[store] Tags: {tag_str}") - log(f"[store] Content length: {len(content)} chars") - log(f"[store] Command: {' '.join(cmd)}") - - try: - result = subprocess.run(cmd, capture_output=True, text=True, timeout=10) - if result.returncode == 0: - log(f"[store] SUCCESS: {title}") - if result.stdout.strip(): - log(f"[store] stdout: {result.stdout.strip()[:200]}") - else: - log(f"[store] FAILED (rc={result.returncode}): {result.stderr.strip()}") - if result.stdout.strip(): - log(f"[store] stdout: {result.stdout.strip()[:200]}") - except subprocess.TimeoutExpired: - log("[store] FAILED: claude-memory timed out after 10s") - except FileNotFoundError: - log("[store] FAILED: claude-memory command not found in PATH") - except Exception as e: - log(f"[store] FAILED: {type(e).__name__}: {e}") - - -def main(): - log_separator() - - hook_input = read_stdin() - transcript_path = hook_input.get("transcript_path", "") - cwd = hook_input.get("cwd", "") - - log(f"[main] cwd: {cwd}") - log(f"[main] transcript_path: {transcript_path}") - - if not transcript_path: - log("[main] ABORT: no transcript path provided") - sys.exit(0) - - messages = read_transcript(transcript_path) - if not messages: - log("[main] ABORT: empty transcript") - sys.exit(0) - - total_messages = len(messages) - - # Only process messages after the last claude-memory command to avoid - # duplicating memories that were already stored during the session. - cutoff = find_last_memory_command_index(messages) - if cutoff >= 0: - messages = messages[cutoff + 1 :] - log(f"[main] After cutoff: {len(messages)} of {total_messages} messages remain") - if not messages: - log("[main] ABORT: no new messages after last claude-memory command") - sys.exit(0) - else: - log(f"[main] Processing all {total_messages} messages (no cutoff)") - - summary = build_session_summary(messages, cwd) - if not isinstance(summary, dict): - log(f"[main] ABORT: build_session_summary returned '{summary}'") - sys.exit(0) - - store_memory(summary) - log("[main] Done") - - -if __name__ == "__main__": - main() diff --git a/skills/cognitive-memory/systemd/README.md b/skills/cognitive-memory/systemd/README.md deleted file mode 100644 index 349f642..0000000 --- a/skills/cognitive-memory/systemd/README.md +++ /dev/null @@ -1,34 +0,0 @@ -# Cognitive Memory Systemd Timers - -Reference copies of the systemd user units that automate memory maintenance. - -## Services - -| Unit | Schedule | What it does | -|------|----------|-------------| -| `cognitive-memory-daily` | daily | Decay scores, regenerate CORE.md, refresh MEMORY.md symlinks | -| `cognitive-memory-embed` | hourly | Refresh embeddings (skips if unchanged) | -| `cognitive-memory-weekly` | weekly | Run reflection cycle | - -## Install / Update - -```bash -# Copy units into place -cp ~/.claude/skills/cognitive-memory/systemd/*.service \ - ~/.claude/skills/cognitive-memory/systemd/*.timer \ - ~/.config/systemd/user/ - -# Reload and enable -systemctl --user daemon-reload -systemctl --user enable --now cognitive-memory-daily.timer -systemctl --user enable --now cognitive-memory-embed.timer -systemctl --user enable --now cognitive-memory-weekly.timer -``` - -## Verify - -```bash -systemctl --user list-timers 'cognitive-memory-*' -systemctl --user start cognitive-memory-daily.service # manual test run -journalctl --user -u cognitive-memory-daily.service --since today -``` diff --git a/skills/cognitive-memory/systemd/cognitive-memory-daily.service b/skills/cognitive-memory/systemd/cognitive-memory-daily.service deleted file mode 100644 index 0850b51..0000000 --- a/skills/cognitive-memory/systemd/cognitive-memory-daily.service +++ /dev/null @@ -1,6 +0,0 @@ -[Unit] -Description=Cognitive Memory daily maintenance (decay, core, git sync) - -[Service] -Type=oneshot -ExecStart=/bin/bash -c 'export PATH="/home/cal/.local/bin:$PATH" && /home/cal/.local/bin/claude-memory decay && /home/cal/.local/bin/claude-memory core && /home/cal/.claude/skills/cognitive-memory/scripts/memory-git-sync.sh' diff --git a/skills/cognitive-memory/systemd/cognitive-memory-daily.timer b/skills/cognitive-memory/systemd/cognitive-memory-daily.timer deleted file mode 100644 index 21d12e6..0000000 --- a/skills/cognitive-memory/systemd/cognitive-memory-daily.timer +++ /dev/null @@ -1,9 +0,0 @@ -[Unit] -Description=Run cognitive memory daily maintenance - -[Timer] -OnCalendar=daily -Persistent=true - -[Install] -WantedBy=timers.target diff --git a/skills/cognitive-memory/systemd/cognitive-memory-embed.service b/skills/cognitive-memory/systemd/cognitive-memory-embed.service deleted file mode 100644 index a2d8fb1..0000000 --- a/skills/cognitive-memory/systemd/cognitive-memory-embed.service +++ /dev/null @@ -1,6 +0,0 @@ -[Unit] -Description=Cognitive Memory hourly embedding refresh (skips if unchanged) - -[Service] -Type=oneshot -ExecStart=/bin/bash -c 'export PATH="/home/cal/.local/bin:$PATH" && /home/cal/.local/bin/claude-memory embed --if-changed' diff --git a/skills/cognitive-memory/systemd/cognitive-memory-embed.timer b/skills/cognitive-memory/systemd/cognitive-memory-embed.timer deleted file mode 100644 index c6bf5b8..0000000 --- a/skills/cognitive-memory/systemd/cognitive-memory-embed.timer +++ /dev/null @@ -1,9 +0,0 @@ -[Unit] -Description=Run cognitive memory embedding refresh hourly - -[Timer] -OnCalendar=hourly -Persistent=true - -[Install] -WantedBy=timers.target diff --git a/skills/cognitive-memory/systemd/cognitive-memory-weekly.service b/skills/cognitive-memory/systemd/cognitive-memory-weekly.service deleted file mode 100644 index eecd737..0000000 --- a/skills/cognitive-memory/systemd/cognitive-memory-weekly.service +++ /dev/null @@ -1,6 +0,0 @@ -[Unit] -Description=Cognitive Memory weekly reflection - -[Service] -Type=oneshot -ExecStart=/home/cal/.local/bin/claude-memory reflect diff --git a/skills/cognitive-memory/systemd/cognitive-memory-weekly.timer b/skills/cognitive-memory/systemd/cognitive-memory-weekly.timer deleted file mode 100644 index eb4d221..0000000 --- a/skills/cognitive-memory/systemd/cognitive-memory-weekly.timer +++ /dev/null @@ -1,9 +0,0 @@ -[Unit] -Description=Run cognitive memory weekly reflection - -[Timer] -OnCalendar=weekly -Persistent=true - -[Install] -WantedBy=timers.target