From 48df2a89ce5665595874155d192bd9007c81c6b0 Mon Sep 17 00:00:00 2001 From: Cal Corum Date: Sat, 28 Feb 2026 16:02:28 -0600 Subject: [PATCH] Initial commit: extract cognitive-memory app from skill directory Moved application code from ~/.claude/skills/cognitive-memory/ to its own project directory. The skill layer (SKILL.md, SCHEMA.md) remains in the skill directory for Claude Code to read. Co-Authored-By: Claude Opus 4.6 --- .gitignore | 11 + analysis.py | 861 ++++++++++++++++++ cli.py | 500 +++++++++++ client.py | 1091 +++++++++++++++++++++++ common.py | 581 ++++++++++++ edges.py | 297 ++++++ embeddings.py | 233 +++++ feature.json | 53 ++ mcp_server.py | 796 +++++++++++++++++ scripts/edge-proposer.py | 427 +++++++++ scripts/memory-git-sync.sh | 47 + scripts/session_memory.py | 587 ++++++++++++ systemd/README.md | 34 + systemd/cognitive-memory-daily.service | 6 + systemd/cognitive-memory-daily.timer | 9 + systemd/cognitive-memory-embed.service | 6 + systemd/cognitive-memory-embed.timer | 9 + systemd/cognitive-memory-weekly.service | 6 + systemd/cognitive-memory-weekly.timer | 9 + 19 files changed, 5563 insertions(+) create mode 100644 .gitignore create mode 100644 analysis.py create mode 100644 cli.py create mode 100644 client.py create mode 100644 common.py create mode 100644 edges.py create mode 100644 embeddings.py create mode 100644 feature.json create mode 100644 mcp_server.py create mode 100644 scripts/edge-proposer.py create mode 100755 scripts/memory-git-sync.sh create mode 100755 scripts/session_memory.py create mode 100644 systemd/README.md create mode 100644 systemd/cognitive-memory-daily.service create mode 100644 systemd/cognitive-memory-daily.timer create mode 100644 systemd/cognitive-memory-embed.service create mode 100644 systemd/cognitive-memory-embed.timer create mode 100644 systemd/cognitive-memory-weekly.service create mode 100644 systemd/cognitive-memory-weekly.timer diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bce4fac --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +__pycache__/ +*.pyc +*.pyo +*.egg-info/ +dist/ +build/ +.venv/ +*.swp +*.swo +*~ +.claude/ diff --git a/analysis.py b/analysis.py new file mode 100644 index 0000000..3bf1c4e --- /dev/null +++ b/analysis.py @@ -0,0 +1,861 @@ +""" +Cognitive Memory - Analysis Mixin + +Decay scoring, CORE.md generation, tag analysis, reflection clustering, +memory merging, and REFLECTION.md generation. +""" + +import json +import subprocess +import sys +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +from common import ( + CORE_MAX_CHARS, + MEMORY_DIR, + THRESHOLD_ACTIVE, + THRESHOLD_DORMANT, + THRESHOLD_FADING, + TYPE_WEIGHTS, + calculate_decay_score, +) + + +class AnalysisMixin: + """Mixin providing analysis operations for CognitiveMemoryClient.""" + + def decay(self) -> Dict[str, Any]: + """Recalculate all decay scores. Updates _state.json.""" + index = self._load_index() + state = self._load_state() + now = datetime.now(timezone.utc) + updated_count = 0 + + for mid, entry in index.get("entries", {}).items(): + s = state.setdefault("entries", {}).setdefault( + mid, + { + "access_count": 0, + "last_accessed": entry.get("created", now.isoformat()), + "decay_score": 0.5, + }, + ) + + # Calculate days since last access + last_str = s.get("last_accessed", entry.get("created", "")) + try: + last_dt = datetime.fromisoformat(last_str.replace("Z", "+00:00")) + if last_dt.tzinfo is None: + last_dt = last_dt.replace(tzinfo=timezone.utc) + days = (now - last_dt).total_seconds() / 86400 + except (ValueError, AttributeError): + days = 30 # Default to 30 days if unparseable + + importance = entry.get("importance", 0.5) + mem_type = entry.get("type", "general") + type_weight = TYPE_WEIGHTS.get(mem_type, 1.0) + access_count = s.get("access_count", 0) + + # Check if pinned (vault) + path = entry.get("path", "") + if path.startswith("vault/"): + s["decay_score"] = 999.0 # Pinned memories never decay + else: + s["decay_score"] = round( + calculate_decay_score(importance, days, access_count, type_weight), + 4, + ) + + updated_count += 1 + + self._save_state(state) + + # Summary + state_entries = state.get("entries", {}) + return { + "updated": updated_count, + "active": sum( + 1 + for s in state_entries.values() + if s.get("decay_score", 0) >= THRESHOLD_ACTIVE + ), + "fading": sum( + 1 + for s in state_entries.values() + if THRESHOLD_FADING <= s.get("decay_score", 0) < THRESHOLD_ACTIVE + ), + "dormant": sum( + 1 + for s in state_entries.values() + if THRESHOLD_DORMANT <= s.get("decay_score", 0) < THRESHOLD_FADING + ), + "archived": sum( + 1 + for s in state_entries.values() + if 0 < s.get("decay_score", 0) < THRESHOLD_DORMANT + ), + } + + def core(self) -> str: + """Generate CORE.md from top memories by decay score.""" + index = self._load_index() + state = self._load_state() + + def _extract_summary(body: str, max_len: int = 80) -> str: + """Extract first meaningful sentence from memory body.""" + if not body or not body.strip(): + return "" + # Skip leading code blocks, headings, and list markers + for ln in body.strip().split("\n"): + stripped = ln.strip() + if not stripped: + continue + if stripped.startswith("```") or stripped.startswith("#"): + continue + first_line = stripped.lstrip("- *>").strip() + break + else: + return "" + if not first_line: + return "" + # Extract first sentence (split on '. ') + dot_pos = first_line.find(". ") + if 0 < dot_pos < max_len: + sentence = first_line[: dot_pos + 1] + else: + sentence = first_line + if len(sentence) > max_len: + sentence = sentence[: max_len - 3].rstrip() + "..." + return sentence + + # Collect all memories with decay scores + memories = [] + for mid, entry in index.get("entries", {}).items(): + s = state.get("entries", {}).get(mid, {}) + decay = s.get("decay_score", 0.5) + if decay < THRESHOLD_FADING: + continue # Skip low-relevance + memories.append( + { + "id": mid, + "title": entry.get("title", ""), + "type": entry.get("type", "general"), + "path": entry.get("path", ""), + "importance": entry.get("importance", 0.5), + "decay_score": decay, + "tags": entry.get("tags", []), + } + ) + + # Sort by decay score descending + memories.sort(key=lambda x: x["decay_score"], reverse=True) + + # Group by type category + categories = { + "Critical Solutions": ["solution"], + "Active Decisions": ["decision"], + "Key Fixes": ["fix"], + "Configurations": ["configuration"], + "Key Procedures": ["procedure"], + "Patterns & Workflows": ["code_pattern", "workflow"], + "Known Issues": ["problem", "error"], + "Insights": ["insight"], + "General": ["general"], + } + + now_str = datetime.now(timezone.utc).strftime("%Y-%m-%d") + active_count = sum(1 for m in memories if m["decay_score"] >= THRESHOLD_ACTIVE) + total_count = len(index.get("entries", {})) + + lines = [ + "# Memory Core (auto-generated)", + f"> Last updated: {now_str} | Active memories: {active_count}/{total_count} | Next refresh: daily (systemd timer)", + "", + ] + + char_count = sum(len(l) for l in lines) + + for cat_name, cat_types in categories.items(): + cat_memories = [m for m in memories if m["type"] in cat_types] + if not cat_memories: + continue + + section_lines = [f"## {cat_name}", ""] + for mem in cat_memories[:10]: # Cap per section (reduced for summaries) + path = mem["path"] + title = mem["title"] + tags = ", ".join(mem["tags"][:3]) if mem["tags"] else "" + + # Read body to extract summary + summary = "" + mem_path = self.memory_dir / path + if mem_path.exists(): + try: + _, body = self._read_memory_file(mem_path) + summary = _extract_summary(body) + except Exception: + pass + + line = f"- [{title}]({path})" + if summary: + line += f" - {summary}" + if tags: + line += f" ({tags})" + section_lines.append(line) + + # Check budget + added = sum(len(l) for l in section_lines) + 2 + if char_count + added > CORE_MAX_CHARS: + break + + section_lines.append("") + section_text = "\n".join(section_lines) + if char_count + len(section_text) > CORE_MAX_CHARS: + break + lines.extend(section_lines) + char_count += len(section_text) + + core_content = "\n".join(lines) + core_path = self.memory_dir / "CORE.md" + core_path.write_text(core_content, encoding="utf-8") + self._git_commit("core: regenerate CORE.md", [core_path]) + + return core_content + + def tags_list(self, limit: int = 0) -> List[Dict[str, Any]]: + """List all tags with occurrence counts across all memories. + + Returns list of {"tag": str, "count": int} sorted by count descending. + """ + index = self._load_index() + tag_counts: Dict[str, int] = {} + + for entry in index.get("entries", {}).values(): + for tag in entry.get("tags", []): + normalized = tag.lower().strip() + if normalized: + tag_counts[normalized] = tag_counts.get(normalized, 0) + 1 + + results = [{"tag": tag, "count": count} for tag, count in tag_counts.items()] + results.sort(key=lambda x: x["count"], reverse=True) + + if limit > 0: + results = results[:limit] + return results + + def tags_related(self, tag: str, limit: int = 0) -> List[Dict[str, Any]]: + """Find tags that co-occur with the given tag. + + Returns list of {"tag": str, "co_occurrences": int, "memories_with_both": int} + sorted by co_occurrences descending. + """ + tag = tag.lower().strip() + index = self._load_index() + co_counts: Dict[str, int] = {} + + for entry in index.get("entries", {}).values(): + entry_tags = [t.lower().strip() for t in entry.get("tags", [])] + if tag not in entry_tags: + continue + for other_tag in entry_tags: + if other_tag != tag and other_tag: + co_counts[other_tag] = co_counts.get(other_tag, 0) + 1 + + results = [ + {"tag": t, "co_occurrences": count, "memories_with_both": count} + for t, count in co_counts.items() + ] + results.sort(key=lambda x: x["co_occurrences"], reverse=True) + + if limit > 0: + results = results[:limit] + return results + + def tags_suggest(self, memory_id: str) -> List[Dict[str, Any]]: + """Suggest tags for a memory based on co-occurrence patterns. + + Returns top 10 suggestions as {"tag": str, "score": int, "reason": str}. + """ + index = self._load_index() + entry = index.get("entries", {}).get(memory_id) + if not entry: + return [] + + existing_tags = set(t.lower().strip() for t in entry.get("tags", [])) + if not existing_tags: + return [] + + # For each existing tag, find co-occurring tags and accumulate scores + candidate_scores: Dict[str, int] = {} + candidate_sources: Dict[str, set] = {} + + for existing_tag in existing_tags: + co_tags = self.tags_related(existing_tag) + for co_entry in co_tags: + candidate = co_entry["tag"] + if candidate in existing_tags: + continue + candidate_scores[candidate] = ( + candidate_scores.get(candidate, 0) + co_entry["co_occurrences"] + ) + if candidate not in candidate_sources: + candidate_sources[candidate] = set() + candidate_sources[candidate].add(existing_tag) + + results = [] + for candidate, score in candidate_scores.items(): + sources = sorted(candidate_sources[candidate]) + reason = "co-occurs with: " + ", ".join(sources) + results.append({"tag": candidate, "score": score, "reason": reason}) + + results.sort(key=lambda x: x["score"], reverse=True) + return results[:10] + + def reflect( + self, + since: Optional[str] = None, + dry_run: bool = False, + ) -> Dict[str, Any]: + """Review recent memories, identify tag-based clusters, and output consolidation recommendations. + + Clusters memories that share 2+ tags using iterative union-find merging. + Does NOT auto-create insights; the agent reads output and decides what to store. + + Args: + since: ISO date string (YYYY-MM-DD) to filter memories from. Falls back to + _state.json's last_reflection, then 30 days ago. + dry_run: If True, return analysis without updating _state.json or logging episode. + + Returns: + Dict with clusters, total_memories_reviewed, clusters_found, and since date. + """ + now = datetime.now(timezone.utc) + state = self._load_state() + + # Determine time window + if since: + since_date = since + elif state.get("last_reflection"): + # last_reflection may be a full ISO timestamp; extract date portion + since_date = state["last_reflection"][:10] + else: + since_dt = now - timedelta(days=30) + since_date = since_dt.strftime("%Y-%m-%d") + + # Load recent memories from index + index = self._load_index() + recent_memories = [] + for mid, entry in index.get("entries", {}).items(): + created = entry.get("created", "") + # Compare date portion only (YYYY-MM-DD) + created_date = created[:10] if created else "" + if created_date >= since_date: + recent_memories.append( + { + "id": mid, + "title": entry.get("title", ""), + "type": entry.get("type", "general"), + "tags": [t.lower().strip() for t in entry.get("tags", [])], + } + ) + + total_reviewed = len(recent_memories) + + # Union-find clustering by tag overlap >= 2 + # parent[i] = index of parent in recent_memories list + n = len(recent_memories) + parent = list(range(n)) + + def find(x: int) -> int: + while parent[x] != x: + parent[x] = parent[parent[x]] # path compression + x = parent[x] + return x + + def union(a: int, b: int): + ra, rb = find(a), find(b) + if ra != rb: + parent[rb] = ra + + # Compare all pairs, check tag overlap + for i in range(n): + tags_i = set(recent_memories[i]["tags"]) + for j in range(i + 1, n): + tags_j = set(recent_memories[j]["tags"]) + if len(tags_i & tags_j) >= 2: + union(i, j) + + # Group by root + clusters_map: Dict[int, List[int]] = {} + for i in range(n): + root = find(i) + clusters_map.setdefault(root, []).append(i) + + # Build output clusters (only clusters with 2+ members) + clusters = [] + cluster_id = 0 + for indices in clusters_map.values(): + if len(indices) < 2: + continue + cluster_id += 1 + members = [] + all_tag_sets = [] + types_seen = set() + for idx in indices: + mem = recent_memories[idx] + members.append( + { + "id": mem["id"], + "title": mem["title"], + "type": mem["type"], + "tags": mem["tags"], + } + ) + all_tag_sets.append(set(mem["tags"])) + types_seen.add(mem["type"]) + + # Common tags = intersection of ALL members + common_tags = ( + sorted(set.intersection(*all_tag_sets)) if all_tag_sets else [] + ) + + # Shared tags = tags appearing in 2+ members + tag_counts: Dict[str, int] = {} + for ts in all_tag_sets: + for t in ts: + tag_counts[t] = tag_counts.get(t, 0) + 1 + shared_tags = sorted(t for t, c in tag_counts.items() if c >= 2) + + # Suggested topic + tag_label = ( + ", ".join(common_tags[:4]) + if common_tags + else ", ".join(shared_tags[:4]) + ) + type_label = ", ".join(sorted(types_seen)) + suggested_topic = f"Pattern: {tag_label} across {type_label}" + + clusters.append( + { + "cluster_id": cluster_id, + "members": members, + "common_tags": common_tags, + "shared_tags": shared_tags, + "suggested_topic": suggested_topic, + "member_count": len(members), + } + ) + + # Sort clusters by member count descending + clusters.sort(key=lambda c: c["member_count"], reverse=True) + # Re-number after sorting + for i, c in enumerate(clusters): + c["cluster_id"] = i + 1 + + result = { + "clusters": clusters, + "total_memories_reviewed": total_reviewed, + "clusters_found": len(clusters), + "since": since_date, + } + + if not dry_run: + # Update state with last_reflection timestamp + state["last_reflection"] = now.isoformat() + self._save_state(state) + + # Log an episode entry + self.episode( + type="reflection", + title=f"Reflection: {len(clusters)} clusters from {total_reviewed} memories", + tags=["reflection", "cognitive-memory"], + summary=f"Reviewed {total_reviewed} memories since {since_date}, found {len(clusters)} clusters", + ) + + # Regenerate REFLECTION.md + self.reflection_summary() + + return result + + def merge( + self, + keep_id: str, + absorb_id: str, + dry_run: bool = False, + ) -> Dict[str, Any]: + """Merge two memories: absorb one into another. + + The 'keep' memory absorbs the content, tags, and relations from + the 'absorb' memory. The absorb memory is then deleted. + All other memories referencing absorb_id are updated to point to keep_id. + + If dry_run=True, returns what would change without writing anything. + """ + # Resolve both memory file paths + keep_path = self._resolve_memory_path(keep_id) + if not keep_path: + raise ValueError(f"Keep memory not found: {keep_id}") + absorb_path = self._resolve_memory_path(absorb_id) + if not absorb_path: + raise ValueError(f"Absorb memory not found: {absorb_id}") + + # Read both memory files + keep_fm, keep_body = self._read_memory_file(keep_path) + absorb_fm, absorb_body = self._read_memory_file(absorb_path) + + keep_title = keep_fm.get("title", keep_id[:8]) + absorb_title = absorb_fm.get("title", absorb_id[:8]) + + # Combine content + merged_body = keep_body + if absorb_body.strip(): + merged_body = ( + keep_body.rstrip() + + f"\n\n---\n*Merged from: {absorb_title}*\n\n" + + absorb_body.strip() + ) + + # Merge tags (sorted, deduplicated) + keep_tags = keep_fm.get("tags", []) + absorb_tags = absorb_fm.get("tags", []) + merged_tags = sorted(list(set(keep_tags + absorb_tags))) + + # importance = max of both + merged_importance = max( + keep_fm.get("importance", 0.5), + absorb_fm.get("importance", 0.5), + ) + + # Merge relations: combine both relation lists + keep_rels = list(keep_fm.get("relations", [])) + absorb_rels = list(absorb_fm.get("relations", [])) + combined_rels = keep_rels + absorb_rels + + # Replace any relation targeting absorb_id with keep_id + for rel in combined_rels: + if rel.get("target") == absorb_id: + rel["target"] = keep_id + + # Deduplicate by (target, type) tuple + seen = set() + deduped_rels = [] + for rel in combined_rels: + key = (rel.get("target"), rel.get("type")) + if key not in seen: + seen.add(key) + deduped_rels.append(rel) + + # Remove self-referential relations (target == keep_id) + merged_rels = [r for r in deduped_rels if r.get("target") != keep_id] + + # Scan all other memories for relations targeting absorb_id + index = self._load_index() + updated_others = [] + for mid, entry in index.get("entries", {}).items(): + if mid in (keep_id, absorb_id): + continue + rels = entry.get("relations", []) + needs_update = any(r.get("target") == absorb_id for r in rels) + if needs_update: + updated_others.append(mid) + + if dry_run: + return { + "dry_run": True, + "keep_id": keep_id, + "keep_title": keep_title, + "absorb_id": absorb_id, + "absorb_title": absorb_title, + "merged_tags": merged_tags, + "merged_importance": merged_importance, + "merged_relations_count": len(merged_rels), + "other_memories_updated": len(updated_others), + "other_memory_ids": updated_others, + } + + # Write updated keep memory file + now = datetime.now(timezone.utc).isoformat() + keep_fm["tags"] = merged_tags + keep_fm["importance"] = merged_importance + keep_fm["relations"] = merged_rels + keep_fm["updated"] = now + self._write_memory_file(keep_path, keep_fm, merged_body) + + # Update index for keep memory (refresh content_preview with merged body) + keep_rel_path = str(keep_path.relative_to(self.memory_dir)) + preview = merged_body.strip()[:200] + if len(merged_body.strip()) > 200: + last_space = preview.rfind(" ") + if last_space > 0: + preview = preview[:last_space] + self._update_index_entry( + keep_id, keep_fm, keep_rel_path, content_preview=preview + ) + + # Update all other memories that reference absorb_id + for mid in updated_others: + other_path = self._resolve_memory_path(mid) + if not other_path: + continue + try: + other_fm, other_body = self._read_memory_file(other_path) + other_rels = other_fm.get("relations", []) + for rel in other_rels: + if rel.get("target") == absorb_id: + rel["target"] = keep_id + # Deduplicate after replacement + seen_other = set() + deduped_other = [] + for rel in other_rels: + key = (rel.get("target"), rel.get("type")) + if key not in seen_other: + seen_other.add(key) + deduped_other.append(rel) + # Remove self-referential + other_fm["relations"] = [ + r for r in deduped_other if r.get("target") != mid + ] + other_fm["updated"] = now + self._write_memory_file(other_path, other_fm, other_body) + other_rel_path = str(other_path.relative_to(self.memory_dir)) + self._update_index_entry(mid, other_fm, other_rel_path) + except Exception: + pass + + # Delete absorb memory file and remove from index/state + absorb_path.unlink() + self._remove_index_entry(absorb_id) + state = self._load_state() + state.get("entries", {}).pop(absorb_id, None) + self._save_state(state) + + # Git: stage absorb deletion + try: + absorb_rel = absorb_path.relative_to(self.memory_dir) + subprocess.run( + ["git", "rm", "--cached", str(absorb_rel)], + cwd=str(self.memory_dir), + capture_output=True, + timeout=5, + ) + except Exception: + pass + + self._git_commit(f"merge: {keep_title} absorbed {absorb_title}") + + return { + "success": True, + "keep_id": keep_id, + "keep_title": keep_title, + "absorb_id": absorb_id, + "absorb_title": absorb_title, + "merged_tags": merged_tags, + "merged_importance": merged_importance, + "merged_relations_count": len(merged_rels), + "other_memories_updated": len(updated_others), + } + + def reflection_summary(self) -> str: + """Generate REFLECTION.md with patterns and insights from the memory system. + + Writes to REFLECTION.md in the memory data directory and git-commits the file. + Returns the generated content. + """ + index = self._load_index() + state = self._load_state() + entries = index.get("entries", {}) + state_entries = state.get("entries", {}) + now_str = datetime.now(timezone.utc).strftime("%Y-%m-%d") + + # Last reflection date + last_reflection = state.get("last_reflection", "") + last_reflection_date = last_reflection[:10] if last_reflection else "never" + + # Count total reflections from episode files + total_reflections = 0 + episodes_dir = self.memory_dir / "episodes" + if episodes_dir.exists(): + for ep_file in sorted(episodes_dir.glob("*.md")): + try: + text = ep_file.read_text(encoding="utf-8") + # Count lines that look like reflection episode entries + for line in text.split("\n"): + if "Reflection:" in line and line.strip().startswith("## "): + total_reflections += 1 + except OSError: + pass + + lines = [ + "# Reflection Summary (auto-generated)", + f"> Last updated: {now_str} | Last reflection: {last_reflection_date} | Total reflections: {total_reflections}", + "", + ] + + # === Themes section === + # Get top tags, then find co-occurrence pairs + lines.append("## Themes") + lines.append("Top tag co-occurrences revealing recurring themes:") + lines.append("") + + top_tags = self.tags_list(limit=10) + # Build co-occurrence pairs across top tags + pair_data: Dict[Tuple[str, str], List[str]] = {} # (tag1, tag2) -> [titles] + for tag_info in top_tags[:5]: + tag = tag_info["tag"] + co_tags = self.tags_related(tag, limit=5) + for co in co_tags: + other = co["tag"] + # Canonical pair ordering + pair = tuple(sorted([tag, other])) + if pair in pair_data: + continue + # Find memories with both tags and collect titles + titles = [] + for mid, entry in entries.items(): + mem_tags = [t.lower().strip() for t in entry.get("tags", [])] + if pair[0] in mem_tags and pair[1] in mem_tags: + titles.append(entry.get("title", "untitled")) + if titles: + pair_data[pair] = titles + + # Sort pairs by memory count descending, show top 8 + sorted_pairs = sorted(pair_data.items(), key=lambda x: len(x[1]), reverse=True) + for (tag_a, tag_b), titles in sorted_pairs[:8]: + example_titles = ", ".join(f'"{t}"' for t in titles[:3]) + lines.append( + f"- **{tag_a} + {tag_b}**: {len(titles)} memories ({example_titles})" + ) + + if not sorted_pairs: + lines.append("- No co-occurrence data available yet.") + lines.append("") + + # === Cross-Project Patterns section === + lines.append("## Cross-Project Patterns") + lines.append("Tags that span multiple projects:") + lines.append("") + + known_projects = [ + "major-domo", + "paper-dynasty", + "homelab", + "vagabond-rpg", + "foundryvtt", + "strat-gameplay-webapp", + ] + # Build tag -> {project -> count} mapping + tag_project_map: Dict[str, Dict[str, int]] = {} + for mid, entry in entries.items(): + mem_tags = [t.lower().strip() for t in entry.get("tags", [])] + # Identify which projects this memory belongs to + mem_projects = [t for t in mem_tags if t in known_projects] + # For non-project tags, record which projects they appear in + non_project_tags = [t for t in mem_tags if t not in known_projects] + for tag in non_project_tags: + if tag not in tag_project_map: + tag_project_map[tag] = {} + for proj in mem_projects: + tag_project_map[tag][proj] = tag_project_map[tag].get(proj, 0) + 1 + + # Filter to tags spanning 2+ projects, sort by project count then total + cross_project = [] + for tag, projects in tag_project_map.items(): + if len(projects) >= 2: + total = sum(projects.values()) + cross_project.append((tag, projects, total)) + cross_project.sort(key=lambda x: (len(x[1]), x[2]), reverse=True) + + for tag, projects, total in cross_project[:10]: + proj_parts = ", ".join( + f"{p} ({c})" + for p, c in sorted(projects.items(), key=lambda x: x[1], reverse=True) + ) + lines.append(f"- **{tag}**: appears in {proj_parts}") + + if not cross_project: + lines.append("- No cross-project patterns detected yet.") + lines.append("") + + # === Most Accessed section === + lines.append("## Most Accessed") + lines.append("Top 10 memories by access count:") + lines.append("") + + # Sort state entries by access_count descending + accessed = [] + for mid, s in state_entries.items(): + count = s.get("access_count", 0) + if count > 0: + entry = entries.get(mid) + if entry: + accessed.append( + ( + mid, + entry.get("title", "untitled"), + entry.get("path", ""), + count, + ) + ) + accessed.sort(key=lambda x: x[3], reverse=True) + + for mid, title, path, count in accessed[:10]: + lines.append(f"1. [{title}]({path}) - {count} accesses") + + if not accessed: + lines.append("- No access data recorded yet.") + lines.append("") + + # === Recent Insights section === + lines.append("## Recent Insights") + lines.append("Insight-type memories:") + lines.append("") + + insights = [] + for mid, entry in entries.items(): + if entry.get("type") == "insight": + insights.append((mid, entry)) + # Sort by created date descending + insights.sort(key=lambda x: x[1].get("created", ""), reverse=True) + + for mid, entry in insights[:10]: + title = entry.get("title", "untitled") + path = entry.get("path", "") + preview = entry.get("content_preview", "") + if preview: + lines.append(f"- [{title}]({path}) - {preview[:80]}") + else: + lines.append(f"- [{title}]({path})") + + if not insights: + lines.append("- No insight memories stored yet.") + lines.append("") + + # === Consolidation History section === + lines.append("## Consolidation History") + lines.append("") + + merge_count = 0 + if episodes_dir.exists(): + for ep_file in sorted(episodes_dir.glob("*.md")): + try: + text = ep_file.read_text(encoding="utf-8") + for line_text in text.split("\n"): + stripped = line_text.strip() + if stripped.startswith("## ") and "merge" in stripped.lower(): + merge_count += 1 + except OSError: + pass + + lines.append(f"- Total merges performed: {merge_count}") + lines.append("") + + content = "\n".join(lines) + + # Write REFLECTION.md + reflection_path = self.memory_dir / "REFLECTION.md" + reflection_path.write_text(content, encoding="utf-8") + self._git_commit("reflection: regenerate REFLECTION.md", [reflection_path]) + + return content diff --git a/cli.py b/cli.py new file mode 100644 index 0000000..01575cb --- /dev/null +++ b/cli.py @@ -0,0 +1,500 @@ +#!/usr/bin/env python3 +""" +Cognitive Memory - CLI Interface + +Command-line interface for the cognitive memory system. +""" + +import argparse +import json +import sys + +from client import CognitiveMemoryClient +from common import ( + MEMORY_DIR, + VALID_RELATION_TYPES, + VALID_TYPES, + _load_memory_config, + resolve_graph_path, + list_graphs, +) + + +def main(): + parser = argparse.ArgumentParser( + description="Cognitive Memory - Markdown-based memory system with decay scoring", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--graph", + default=None, + help="Named memory graph to use (default: 'default')", + ) + subparsers = parser.add_subparsers(dest="command", help="Commands") + + # store + sp = subparsers.add_parser("store", help="Store a new memory") + sp.add_argument( + "--type", "-t", required=True, choices=VALID_TYPES, help="Memory type" + ) + sp.add_argument("--title", required=True, help="Memory title") + sp.add_argument("--content", "-c", required=True, help="Memory content") + sp.add_argument("--tags", help="Comma-separated tags") + sp.add_argument( + "--importance", "-i", type=float, default=0.5, help="Importance 0.0-1.0" + ) + sp.add_argument("--confidence", type=float, default=0.8, help="Confidence 0.0-1.0") + sp.add_argument( + "--episode", + action="store_true", + default=False, + help="Also log an episode entry", + ) + + # recall + sp = subparsers.add_parser("recall", help="Search memories by query") + sp.add_argument("query", help="Search query") + sp.add_argument("--types", help="Comma-separated memory types") + sp.add_argument("--limit", "-n", type=int, default=10, help="Max results") + sp.add_argument( + "--no-semantic", + action="store_true", + default=False, + help="Disable semantic search (keyword-only, faster)", + ) + + # get + sp = subparsers.add_parser("get", help="Get memory by ID") + sp.add_argument("memory_id", help="Memory UUID") + + # relate + sp = subparsers.add_parser("relate", help="Create relationship") + sp.add_argument("from_id", help="Source memory UUID") + sp.add_argument("to_id", help="Target memory UUID") + sp.add_argument("rel_type", choices=VALID_RELATION_TYPES, help="Relationship type") + sp.add_argument("--strength", type=float, default=0.8, help="Strength 0.0-1.0") + sp.add_argument("--context", help="Context description") + sp.add_argument("--description", help="Rich edge description body") + + # edge-get + sp = subparsers.add_parser("edge-get", help="Get edge by ID") + sp.add_argument("edge_id", help="Edge UUID") + + # edge-search + sp = subparsers.add_parser("edge-search", help="Search edges") + sp.add_argument("--query", "-q", help="Text query") + sp.add_argument("--types", help="Comma-separated relation types") + sp.add_argument("--from-id", help="Filter by source memory ID") + sp.add_argument("--to-id", help="Filter by target memory ID") + sp.add_argument("--limit", "-n", type=int, default=20, help="Max results") + + # edge-update + sp = subparsers.add_parser("edge-update", help="Update an edge") + sp.add_argument("edge_id", help="Edge UUID") + sp.add_argument("--description", help="New description body") + sp.add_argument("--strength", type=float, help="New strength 0.0-1.0") + + # edge-delete + sp = subparsers.add_parser("edge-delete", help="Delete an edge") + sp.add_argument("edge_id", help="Edge UUID") + + # search + sp = subparsers.add_parser("search", help="Filter memories") + sp.add_argument("--query", "-q", help="Text query") + sp.add_argument("--types", help="Comma-separated memory types") + sp.add_argument("--tags", help="Comma-separated tags") + sp.add_argument("--min-importance", type=float, help="Minimum importance") + sp.add_argument("--limit", "-n", type=int, default=20, help="Max results") + + # update + sp = subparsers.add_parser("update", help="Update a memory") + sp.add_argument("memory_id", help="Memory UUID") + sp.add_argument("--title", help="New title") + sp.add_argument("--content", help="New content") + sp.add_argument("--tags", help="New tags (comma-separated)") + sp.add_argument("--importance", type=float, help="New importance") + + # delete + sp = subparsers.add_parser("delete", help="Delete a memory") + sp.add_argument("memory_id", help="Memory UUID") + sp.add_argument("--force", "-f", action="store_true", help="Skip confirmation") + + # related + sp = subparsers.add_parser("related", help="Get related memories") + sp.add_argument("memory_id", help="Memory UUID") + sp.add_argument("--types", help="Comma-separated relationship types") + sp.add_argument("--depth", type=int, default=1, help="Traversal depth 1-5") + + # stats + subparsers.add_parser("stats", help="Show statistics") + + # recent + sp = subparsers.add_parser("recent", help="Recently created memories") + sp.add_argument("--limit", "-n", type=int, default=20, help="Max results") + + # decay + subparsers.add_parser("decay", help="Recalculate all decay scores") + + # core + subparsers.add_parser("core", help="Generate CORE.md") + + # episode + sp = subparsers.add_parser("episode", help="Log episode entry") + sp.add_argument("--type", "-t", required=True, help="Entry type") + sp.add_argument("--title", required=True, help="Entry title") + sp.add_argument("--tags", help="Comma-separated tags") + sp.add_argument("--summary", "-s", help="Summary text") + sp.add_argument("--memory-link", help="Path to related memory file") + + # reindex + subparsers.add_parser("reindex", help="Rebuild index from files") + + # embed + embed_parser = subparsers.add_parser( + "embed", help="Generate embeddings for all memories via Ollama" + ) + embed_parser.add_argument( + "--if-changed", + action="store_true", + help="Skip if no memories were added or deleted since last embed", + ) + + # pin + sp = subparsers.add_parser("pin", help="Move memory to vault (never decays)") + sp.add_argument("memory_id", help="Memory UUID") + + # reflect + sp = subparsers.add_parser( + "reflect", help="Review recent memories and identify clusters" + ) + sp.add_argument("--since", help="ISO date (YYYY-MM-DD) to review from") + sp.add_argument( + "--dry-run", action="store_true", help="Preview without updating state" + ) + + # merge + sp = subparsers.add_parser( + "merge", help="Merge two memories (absorb one into another)" + ) + sp.add_argument("keep_id", help="Memory UUID to keep") + sp.add_argument("absorb_id", help="Memory UUID to absorb and delete") + sp.add_argument( + "--dry-run", action="store_true", help="Preview merge without writing" + ) + + # reflection + subparsers.add_parser("reflection", help="Generate REFLECTION.md summary") + + # tags + sp = subparsers.add_parser("tags", help="Tag analysis commands") + tags_sub = sp.add_subparsers(dest="tags_command") + sp2 = tags_sub.add_parser("list", help="List all tags with counts") + sp2.add_argument("--limit", "-n", type=int, default=0, help="Max results (0=all)") + sp3 = tags_sub.add_parser("related", help="Find co-occurring tags") + sp3.add_argument("tag", help="Tag to analyze") + sp3.add_argument("--limit", "-n", type=int, default=0, help="Max results (0=all)") + sp4 = tags_sub.add_parser("suggest", help="Suggest tags for a memory") + sp4.add_argument("memory_id", help="Memory UUID") + + # procedure + sp = subparsers.add_parser( + "procedure", help="Store a procedure memory (convenience wrapper)" + ) + sp.add_argument("--title", required=True, help="Procedure title") + sp.add_argument("--content", "-c", required=True, help="Procedure description") + sp.add_argument("--steps", help="Comma-separated ordered steps") + sp.add_argument("--preconditions", help="Comma-separated preconditions") + sp.add_argument("--postconditions", help="Comma-separated postconditions") + sp.add_argument("--tags", help="Comma-separated tags") + sp.add_argument( + "--importance", "-i", type=float, default=0.5, help="Importance 0.0-1.0" + ) + + # config + sp = subparsers.add_parser("config", help="Manage embedding config") + sp.add_argument("--show", action="store_true", help="Display current config") + sp.add_argument( + "--provider", choices=["ollama", "openai"], help="Set embedding provider" + ) + sp.add_argument("--openai-key", help="Set OpenAI API key") + sp.add_argument( + "--ollama-model", help="Set Ollama model name (e.g. qwen3-embedding:8b)" + ) + + # graphs + subparsers.add_parser("graphs", help="List available memory graphs") + + args = parser.parse_args() + + if not args.command: + parser.print_help() + sys.exit(1) + + graph_path = resolve_graph_path(args.graph) + client = CognitiveMemoryClient(memory_dir=graph_path) + result = None + + if args.command == "store": + tags = [t.strip() for t in args.tags.split(",")] if args.tags else None + memory_id = client.store( + type=args.type, + title=args.title, + content=args.content, + tags=tags, + importance=args.importance, + confidence=args.confidence, + ) + result = {"success": True, "memory_id": memory_id} + + if args.episode: + # Get the relative path from the index for memory_link + index = client._load_index() + entry = index.get("entries", {}).get(memory_id, {}) + rel_path = entry.get("path", "") + + # Truncate content at word boundary for summary (max 100 chars) + summary = args.content.strip()[:100] + if len(args.content.strip()) > 100: + last_space = summary.rfind(" ") + if last_space > 0: + summary = summary[:last_space] + + client.episode( + type=args.type, + title=args.title, + tags=tags or [], + summary=summary, + memory_link=rel_path, + ) + result["episode_logged"] = True + + elif args.command == "recall": + types = [t.strip() for t in args.types.split(",")] if args.types else None + result = client.recall( + args.query, + memory_types=types, + limit=args.limit, + semantic=not args.no_semantic, + ) + + elif args.command == "get": + result = client.get(args.memory_id) + if not result: + result = {"error": "Memory not found"} + + elif args.command == "relate": + edge_id = client.relate( + args.from_id, + args.to_id, + args.rel_type, + strength=args.strength, + context=args.context, + description=args.description, + ) + result = {"success": bool(edge_id), "edge_id": edge_id} + + elif args.command == "edge-get": + result = client.edge_get(args.edge_id) + if not result: + result = {"error": "Edge not found"} + + elif args.command == "edge-search": + types = [t.strip() for t in args.types.split(",")] if args.types else None + result = client.edge_search( + query=args.query, + types=types, + from_id=getattr(args, "from_id", None), + to_id=getattr(args, "to_id", None), + limit=args.limit, + ) + + elif args.command == "edge-update": + success = client.edge_update( + args.edge_id, + description=args.description, + strength=args.strength, + ) + result = {"success": success} + + elif args.command == "edge-delete": + success = client.edge_delete(args.edge_id) + result = {"success": success} + + elif args.command == "search": + types = [t.strip() for t in args.types.split(",")] if args.types else None + tags = [t.strip() for t in args.tags.split(",")] if args.tags else None + result = client.search( + query=args.query, + memory_types=types, + tags=tags, + min_importance=args.min_importance, + limit=args.limit, + ) + + elif args.command == "update": + tags = [t.strip() for t in args.tags.split(",")] if args.tags else None + success = client.update( + args.memory_id, + title=args.title, + content=args.content, + tags=tags, + importance=args.importance, + ) + result = {"success": success} + + elif args.command == "delete": + if not args.force: + mem = client.get(args.memory_id) + if mem: + print(f"Deleting: {mem.get('title')}", file=sys.stderr) + success = client.delete(args.memory_id) + result = {"success": success} + + elif args.command == "related": + types = [t.strip() for t in args.types.split(",")] if args.types else None + result = client.related(args.memory_id, rel_types=types, max_depth=args.depth) + + elif args.command == "stats": + result = client.stats() + + elif args.command == "recent": + result = client.recent(limit=args.limit) + + elif args.command == "decay": + result = client.decay() + + elif args.command == "core": + content = client.core() + # Print path, not content (content is written to file) + result = { + "success": True, + "path": str(client.memory_dir / "CORE.md"), + "chars": len(content), + } + + elif args.command == "episode": + tags = [t.strip() for t in args.tags.split(",")] if args.tags else None + client.episode( + type=args.type, + title=args.title, + tags=tags, + summary=args.summary, + memory_link=args.memory_link, + ) + result = {"success": True} + + elif args.command == "reindex": + count = client.reindex() + result = {"success": True, "indexed": count} + + elif args.command == "embed": + if_changed = getattr(args, "if_changed", False) + if not if_changed: + print( + "Generating embeddings (this may take a while if model needs to be pulled)...", + file=sys.stderr, + ) + result = client.embed(if_changed=if_changed) + + elif args.command == "pin": + success = client.pin(args.memory_id) + result = {"success": success} + + elif args.command == "reflect": + result = client.reflect( + since=args.since, + dry_run=args.dry_run, + ) + + elif args.command == "merge": + result = client.merge( + keep_id=args.keep_id, + absorb_id=args.absorb_id, + dry_run=args.dry_run, + ) + + elif args.command == "reflection": + content = client.reflection_summary() + result = { + "success": True, + "path": str(client.memory_dir / "REFLECTION.md"), + "chars": len(content), + } + + elif args.command == "tags": + if args.tags_command == "list": + result = client.tags_list(limit=args.limit) + elif args.tags_command == "related": + result = client.tags_related(args.tag, limit=args.limit) + elif args.tags_command == "suggest": + result = client.tags_suggest(args.memory_id) + else: + # No subcommand given, print tags help + # Re-parse to get the tags subparser for help output + for action in parser._subparsers._actions: + if isinstance(action, argparse._SubParsersAction): + tags_parser = action.choices.get("tags") + if tags_parser: + tags_parser.print_help() + break + sys.exit(1) + + elif args.command == "procedure": + tags = [t.strip() for t in args.tags.split(",")] if args.tags else None + steps = [s.strip() for s in args.steps.split(",")] if args.steps else None + preconditions = ( + [p.strip() for p in args.preconditions.split(",")] + if args.preconditions + else None + ) + postconditions = ( + [p.strip() for p in args.postconditions.split(",")] + if args.postconditions + else None + ) + memory_id = client.store( + type="procedure", + title=args.title, + content=args.content, + tags=tags, + importance=args.importance, + steps=steps, + preconditions=preconditions, + postconditions=postconditions, + ) + result = {"success": True, "memory_id": memory_id} + + elif args.command == "graphs": + result = list_graphs() + + elif args.command == "config": + config_path = client.memory_dir / "_config.json" + config = _load_memory_config(config_path) + changed = False + + if args.provider: + config["embedding_provider"] = args.provider + changed = True + if args.openai_key: + config["openai_api_key"] = args.openai_key + changed = True + if args.ollama_model: + config["ollama_model"] = args.ollama_model + changed = True + + if changed: + config_path.write_text(json.dumps(config, indent=2)) + result = {"success": True, "updated": True} + elif args.show or not changed: + # Mask API key for display + display = dict(config) + key = display.get("openai_api_key") + if key and isinstance(key, str) and len(key) > 8: + display["openai_api_key"] = key[:4] + "..." + key[-4:] + result = display + + print(json.dumps(result, indent=2, default=str)) + + +if __name__ == "__main__": + main() diff --git a/client.py b/client.py new file mode 100644 index 0000000..c26885a --- /dev/null +++ b/client.py @@ -0,0 +1,1091 @@ +#!/usr/bin/env python3 +""" +Cognitive Memory Client + +Markdown-based memory system with decay scoring, episodic logging, and auto-curated CORE.md. +Stores memories as human-readable markdown files with YAML frontmatter. + +Usage: + # CLI + python client.py store --type solution --title "Fixed X" --content "Details..." --tags "python,fix" + python client.py recall "timeout error" + python client.py get + python client.py relate SOLVES + python client.py core + python client.py episode --type fix --title "Fixed reconnection" --tags "discord,python" + + # Python + from client import CognitiveMemoryClient + client = CognitiveMemoryClient() + memory_id = client.store(type="solution", title="...", content="...") +""" + +import json +import os +import subprocess +import sys +import tempfile +import uuid +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +# Import everything from common for backward compatibility +from common import ( + CORE_MAX_CHARS, + CONFIG_PATH, + DECAY_LAMBDA, + EDGE_FIELD_ORDER, + EDGES_DIR_NAME, + EMBEDDING_MODEL, + EMBEDDING_TIMEOUT, + EMBEDDINGS_PATH, + FIELD_ORDER, + INDEX_PATH, + MEMORY_DIR, + OLLAMA_URL, + OPENAI_EMBED_URL, + OPENAI_MODEL_DEFAULT, + STATE_PATH, + THRESHOLD_ACTIVE, + THRESHOLD_DORMANT, + THRESHOLD_FADING, + TYPE_DIRS, + TYPE_WEIGHTS, + VALID_RELATION_TYPES, + VALID_TYPES, + _cosine_similarity, + _format_yaml_value, + _load_memory_config, + _make_edge_filename, + _needs_quoting, + _ollama_embed, + _openai_embed, + _parse_relations_block, + _parse_scalar, + _quote_yaml, + calculate_decay_score, + make_filename, + parse_frontmatter, + serialize_edge_frontmatter, + serialize_frontmatter, + slugify, +) + +# Import mixins +from edges import EdgesMixin +from embeddings import EmbeddingsMixin +from analysis import AnalysisMixin + +# ============================================================================= +# CLIENT +# ============================================================================= + + +class CognitiveMemoryClient(EdgesMixin, EmbeddingsMixin, AnalysisMixin): + """Client for markdown-based cognitive memory system.""" + + def __init__(self, memory_dir: Optional[Path] = None): + self.memory_dir = memory_dir or MEMORY_DIR + self.index_path = self.memory_dir / "_index.json" + self.state_path = self.memory_dir / "_state.json" + self._embeddings_cache: Optional[Dict] = None + self._embeddings_mtime: float = 0.0 + self._ensure_dirs() + + def _ensure_dirs(self): + """Create directory structure if needed.""" + for type_dir in TYPE_DIRS.values(): + (self.memory_dir / "graph" / type_dir).mkdir(parents=True, exist_ok=True) + (self.memory_dir / "graph" / EDGES_DIR_NAME).mkdir(parents=True, exist_ok=True) + (self.memory_dir / "episodes").mkdir(parents=True, exist_ok=True) + (self.memory_dir / "vault").mkdir(parents=True, exist_ok=True) + + def _load_embeddings_cached(self) -> Optional[Dict]: + """Load _embeddings.json with mtime-based caching. + + Returns the parsed dict, or None if the file doesn't exist or fails to parse. + Only re-reads from disk when the file's mtime has changed. + """ + embeddings_path = self.memory_dir / "_embeddings.json" + if not embeddings_path.exists(): + return None + try: + current_mtime = embeddings_path.stat().st_mtime + except OSError: + return None + if ( + self._embeddings_cache is not None + and current_mtime == self._embeddings_mtime + ): + return self._embeddings_cache + try: + data = json.loads(embeddings_path.read_text()) + self._embeddings_cache = data + self._embeddings_mtime = current_mtime + return data + except (json.JSONDecodeError, OSError): + return None + + # ------------------------------------------------------------------------- + # Index and State management + # ------------------------------------------------------------------------- + + def _load_index(self) -> Dict: + """Load _index.json, return empty structure if missing.""" + if self.index_path.exists(): + try: + return json.loads(self.index_path.read_text()) + except (json.JSONDecodeError, OSError): + pass + return {"version": 2, "updated": "", "count": 0, "entries": {}, "edges": {}} + + def _save_index(self, index: Dict): + """Write _index.json.""" + index["version"] = 2 + index.setdefault("edges", {}) + index["updated"] = datetime.now(timezone.utc).isoformat() + index["count"] = len(index.get("entries", {})) + self.index_path.write_text(json.dumps(index, indent=2, default=str)) + + def _load_state(self) -> Dict: + """Load _state.json, return empty structure if missing.""" + if self.state_path.exists(): + try: + return json.loads(self.state_path.read_text()) + except (json.JSONDecodeError, OSError): + pass + return {"version": 1, "updated": "", "entries": {}} + + def _save_state(self, state: Dict): + """Write _state.json atomically, merging top-level keys to prevent race conditions.""" + # Merge with existing state to preserve keys written by concurrent processes + if self.state_path.exists(): + try: + existing = json.loads(self.state_path.read_text()) + existing.update(state) + state = existing + except (json.JSONDecodeError, OSError): + pass + state["updated"] = datetime.now(timezone.utc).isoformat() + # Atomic write: write to temp file then rename + fd, tmp_path = tempfile.mkstemp(dir=self.memory_dir, suffix=".tmp") + try: + with os.fdopen(fd, "w") as f: + json.dump(state, f, indent=2, default=str) + os.replace(tmp_path, self.state_path) + except Exception: + try: + os.unlink(tmp_path) + except OSError: + pass + raise + + # ------------------------------------------------------------------------- + # File I/O + # ------------------------------------------------------------------------- + + def _read_memory_file(self, path: Path) -> Tuple[Dict[str, Any], str]: + """Read a memory markdown file, return (frontmatter, body).""" + text = path.read_text(encoding="utf-8") + return parse_frontmatter(text) + + def _write_memory_file(self, path: Path, frontmatter: Dict[str, Any], body: str): + """Write a memory markdown file with frontmatter and body.""" + fm_str = serialize_frontmatter(frontmatter) + content = f"{fm_str}\n\n{body.strip()}\n" if body.strip() else f"{fm_str}\n" + path.write_text(content, encoding="utf-8") + + @staticmethod + def _resolve_prefix(partial_id: str, keys) -> Optional[str]: + """Resolve a partial UUID prefix to a full ID (git-style). + + Returns the full key if exactly one match is found, None if zero + matches, or raises ValueError on ambiguous (multiple) matches. + """ + if partial_id in keys: + return partial_id + matches = [k for k in keys if k.startswith(partial_id)] + if len(matches) == 1: + return matches[0] + if len(matches) > 1: + raise ValueError( + f"Ambiguous ID prefix '{partial_id}' matches {len(matches)} " + f"entries: {', '.join(sorted(matches)[:5])}" + ) + return None + + def _resolve_memory_path(self, memory_id: str) -> Optional[Path]: + """Find the file path for a memory by ID using the index.""" + index = self._load_index() + entries = index.get("entries", {}) + full_id = self._resolve_prefix(memory_id, entries) + if full_id: + entry = entries[full_id] + path = self.memory_dir / entry["path"] + if path.exists(): + return path + # Fallback: scan files (slow but reliable) + return self._scan_for_memory(memory_id) + + def _scan_for_memory(self, memory_id: str) -> Optional[Path]: + """Scan graph/ and vault/ directories for a memory file by ID.""" + search_dirs = [self.memory_dir / "graph", self.memory_dir / "vault"] + for search_dir in search_dirs: + if not search_dir.exists(): + continue + for md_file in search_dir.rglob("*.md"): + try: + fm, _ = self._read_memory_file(md_file) + if fm.get("id") == memory_id: + return md_file + except Exception: + continue + return None + + # ------------------------------------------------------------------------- + # Git operations + # ------------------------------------------------------------------------- + + def _git_commit(self, message: str, files: Optional[List[Path]] = None): + """Stage and commit files in the memory repo.""" + try: + # Check if it's a git repo + result = subprocess.run( + ["git", "rev-parse", "--git-dir"], + cwd=str(self.memory_dir), + capture_output=True, + timeout=5, + ) + if result.returncode != 0: + return # Not a git repo, skip + + if files: + for f in files: + rel = f.relative_to(self.memory_dir) + subprocess.run( + ["git", "add", str(rel)], + cwd=str(self.memory_dir), + capture_output=True, + timeout=5, + ) + else: + subprocess.run( + ["git", "add", "-A"], + cwd=str(self.memory_dir), + capture_output=True, + timeout=5, + ) + + subprocess.run( + ["git", "commit", "-m", message, "--allow-empty"], + cwd=str(self.memory_dir), + capture_output=True, + timeout=10, + ) + except (subprocess.TimeoutExpired, FileNotFoundError, OSError): + pass # Git operations are best-effort + + # ------------------------------------------------------------------------- + # Index helpers + # ------------------------------------------------------------------------- + + def _update_index_entry( + self, + memory_id: str, + frontmatter: Dict[str, Any], + rel_path: str, + content_preview: str = "", + ): + """Add or update an entry in the index.""" + index = self._load_index() + entry = { + "title": frontmatter.get("title", ""), + "type": frontmatter.get("type", "general"), + "tags": frontmatter.get("tags", []), + "importance": frontmatter.get("importance", 0.5), + "confidence": frontmatter.get("confidence", 0.8), + "created": frontmatter.get("created", ""), + "updated": frontmatter.get("updated", ""), + "path": rel_path, + "relations": frontmatter.get("relations", []), + "content_preview": content_preview, + } + # Preserve existing content_preview if not provided + if not content_preview: + existing = index.get("entries", {}).get(memory_id, {}) + entry["content_preview"] = existing.get("content_preview", "") + index.setdefault("entries", {})[memory_id] = entry + self._save_index(index) + + def _remove_index_entry(self, memory_id: str): + """Remove an entry from the index.""" + index = self._load_index() + index.get("entries", {}).pop(memory_id, None) + self._save_index(index) + + def _maybe_refresh_decay(self): + """Auto-refresh all decay scores if state is older than 24 hours.""" + if not self.state_path.exists(): + self.decay() + return + try: + state = json.loads(self.state_path.read_text()) + updated_str = state.get("updated", "") + if not updated_str: + self.decay() + return + updated_dt = datetime.fromisoformat(updated_str.replace("Z", "+00:00")) + if updated_dt.tzinfo is None: + updated_dt = updated_dt.replace(tzinfo=timezone.utc) + age_hours = (datetime.now(timezone.utc) - updated_dt).total_seconds() / 3600 + if age_hours > 24: + self.decay() + except (json.JSONDecodeError, ValueError, OSError): + self.decay() + + # ------------------------------------------------------------------------- + # Edge index helpers + # ------------------------------------------------------------------------- + + def _update_edge_index( + self, edge_id: str, edge_data: Dict[str, Any], rel_path: str + ): + """Add or update an edge entry in the index.""" + index = self._load_index() + index.setdefault("edges", {})[edge_id] = { + "type": edge_data.get("type", ""), + "from_id": edge_data.get("from_id", ""), + "from_title": edge_data.get("from_title", ""), + "to_id": edge_data.get("to_id", ""), + "to_title": edge_data.get("to_title", ""), + "strength": edge_data.get("strength", 0.8), + "created": edge_data.get("created", ""), + "updated": edge_data.get("updated", ""), + "path": rel_path, + } + self._save_index(index) + + def _remove_edge_index(self, edge_id: str): + """Remove an edge entry from the index.""" + index = self._load_index() + index.get("edges", {}).pop(edge_id, None) + self._save_index(index) + + def _scan_for_edge(self, edge_id: str) -> Optional[Path]: + """Fallback file scan for an edge by ID if index is stale.""" + edges_dir = self.memory_dir / "graph" / EDGES_DIR_NAME + if not edges_dir.exists(): + return None + for md_file in edges_dir.glob("*.md"): + try: + fm, _ = self._read_memory_file(md_file) + if fm.get("id") == edge_id: + return md_file + except Exception: + continue + return None + + def _resolve_edge_path(self, edge_id: str) -> Optional[Path]: + """Find the file path for an edge by ID using the index.""" + index = self._load_index() + edges = index.get("edges", {}) + full_id = self._resolve_prefix(edge_id, edges) + if full_id: + entry = edges[full_id] + path = self.memory_dir / entry["path"] + if path.exists(): + return path + return self._scan_for_edge(edge_id) + + # ========================================================================= + # PUBLIC API + # ========================================================================= + + def store( + self, + type: str, + title: str, + content: str, + tags: Optional[List[str]] = None, + importance: float = 0.5, + confidence: float = 0.8, + steps: Optional[List[str]] = None, + preconditions: Optional[List[str]] = None, + postconditions: Optional[List[str]] = None, + ) -> str: + """Store a new memory as a markdown file. + + Returns the memory UUID. + """ + if type not in VALID_TYPES: + raise ValueError(f"Invalid type: {type}. Valid: {VALID_TYPES}") + + memory_id = str(uuid.uuid4()) + now = datetime.now(timezone.utc).isoformat() + tags = [t.lower().strip() for t in (tags or [])] + + frontmatter = { + "id": memory_id, + "type": type, + "title": title, + "tags": tags, + "importance": max(0.0, min(1.0, importance)), + "confidence": max(0.0, min(1.0, confidence)), + "created": now, + "updated": now, + } + + # Add procedure-specific fields when type is "procedure" + if type == "procedure": + if steps: + frontmatter["steps"] = steps + if preconditions: + frontmatter["preconditions"] = preconditions + if postconditions: + frontmatter["postconditions"] = postconditions + + # Determine file path + type_dir = TYPE_DIRS[type] + filename = make_filename(title, memory_id) + rel_path = f"graph/{type_dir}/{filename}" + full_path = self.memory_dir / rel_path + + # Write file + self._write_memory_file(full_path, frontmatter, content) + + # Update index with content preview (truncate at word boundary) + preview = content.strip()[:200] + if len(content.strip()) > 200: + last_space = preview.rfind(" ") + if last_space > 0: + preview = preview[:last_space] + self._update_index_entry( + memory_id, frontmatter, rel_path, content_preview=preview + ) + + # Init state entry + state = self._load_state() + state.setdefault("entries", {})[memory_id] = { + "access_count": 0, + "last_accessed": now, + "decay_score": importance * TYPE_WEIGHTS.get(type, 1.0) * 0.5, + } + self._save_state(state) + + # Git commit + self._git_commit(f"store: {title}", [full_path]) + + return memory_id + + def recall( + self, + query: str, + memory_types: Optional[List[str]] = None, + limit: int = 10, + semantic: bool = True, + ) -> List[Dict[str, Any]]: + """Search memories by query, ranked by relevance and decay score. + + When semantic=True and embeddings exist, merges keyword and semantic results. + """ + self._maybe_refresh_decay() + query_lower = query.lower().strip() + terms = query_lower.split() + if not terms: + return [] + + index = self._load_index() + state = self._load_state() + results = [] + + for mid, entry in index.get("entries", {}).items(): + # Filter by type + if memory_types and entry.get("type") not in memory_types: + continue + + # Check decay threshold - skip archived + s = state.get("entries", {}).get(mid, {}) + decay = s.get("decay_score", 0.5) + if decay < THRESHOLD_DORMANT: + continue + + # Score based on term matches + title = (entry.get("title") or "").lower() + tags_str = " ".join(entry.get("tags") or []).lower() + + title_matches = sum(1 for t in terms if t in title) * 3 + tag_matches = sum(1 for t in terms if t in tags_str) * 2 + + score = title_matches + tag_matches + if score == 0: + # Check content_preview from index (no file I/O needed) + preview = (entry.get("content_preview") or "").lower() + preview_matches = sum(1 for t in terms if t in preview) + if preview_matches > 0: + score = preview_matches + else: + continue + + # Weight by decay score + weighted_score = score * (1 + decay) + + results.append( + { + "id": mid, + "type": entry.get("type"), + "title": entry.get("title"), + "tags": entry.get("tags", []), + "importance": entry.get("importance"), + "decay_score": round(decay, 3), + "path": entry.get("path"), + "created": entry.get("created"), + "_score": weighted_score, + } + ) + + results.sort(key=lambda x: x.pop("_score", 0), reverse=True) + keyword_results = results[:limit] + + # Merge with semantic results (on by default) + # Weights: semantic 60%, keyword 40% + # Conceptual matching dominates; keyword acts as precision boost for exact terms + if semantic: + sem_results = self.semantic_recall(query, limit=limit) + if sem_results: + score_map: Dict[str, float] = {} + result_map: Dict[str, Dict] = {} + + # Keyword: normalize rank to 0-1 (rank 1 = 1.0, last = ~0.1) + kw_weight = 0.4 + for i, r in enumerate(keyword_results): + mid = r["id"] + normalized = (limit - i) / limit + score_map[mid] = normalized * kw_weight + result_map[mid] = r + + # Semantic: similarity is already 0-1 + sem_weight = 0.6 + for r in sem_results: + mid = r["id"] + sim = r.get("similarity", 0.0) + sem_score = sim * sem_weight + if mid in score_map: + score_map[mid] += sem_score + result_map[mid]["similarity"] = sim + else: + score_map[mid] = sem_score + idx_entry = index.get("entries", {}).get(mid, {}) + s = state.get("entries", {}).get(mid, {}) + result_map[mid] = { + "id": mid, + "type": r.get("type"), + "title": r.get("title"), + "tags": r.get("tags", []), + "importance": idx_entry.get("importance"), + "decay_score": round(s.get("decay_score", 0.5), 3), + "similarity": sim, + "path": r.get("path"), + "created": idx_entry.get("created"), + } + + # Sort by merged score + merged = sorted( + result_map.values(), + key=lambda x: score_map.get(x["id"], 0), + reverse=True, + ) + return merged[:limit] + + return keyword_results + + def get(self, memory_id: str) -> Optional[Dict[str, Any]]: + """Get a memory by ID, update access count.""" + path = self._resolve_memory_path(memory_id) + if not path: + return None + + fm, body = self._read_memory_file(path) + + # Update access count in state + state = self._load_state() + now = datetime.now(timezone.utc).isoformat() + entry = state.setdefault("entries", {}).setdefault( + memory_id, + { + "access_count": 0, + "last_accessed": now, + "decay_score": 0.5, + }, + ) + entry["access_count"] = entry.get("access_count", 0) + 1 + entry["last_accessed"] = now + + # Recalculate decay score for this single memory (just accessed, so days=0) + importance = fm.get("importance", 0.5) + mem_type = fm.get("type", "general") + type_weight = TYPE_WEIGHTS.get(mem_type, 1.0) + entry["decay_score"] = round( + calculate_decay_score(importance, 0, entry["access_count"], type_weight), 4 + ) + + self._save_state(state) + + return { + "id": fm.get("id", memory_id), + "type": fm.get("type"), + "title": fm.get("title"), + "content": body, + "tags": fm.get("tags", []), + "importance": fm.get("importance"), + "confidence": fm.get("confidence"), + "created": fm.get("created"), + "updated": fm.get("updated"), + "relations": fm.get("relations", []), + "steps": fm.get("steps", []), + "preconditions": fm.get("preconditions", []), + "postconditions": fm.get("postconditions", []), + "access_count": entry["access_count"], + "decay_score": entry.get("decay_score", 0.5), + "path": str(path.relative_to(self.memory_dir)), + } + + def search( + self, + query: Optional[str] = None, + memory_types: Optional[List[str]] = None, + tags: Optional[List[str]] = None, + min_importance: Optional[float] = None, + limit: int = 20, + ) -> List[Dict[str, Any]]: + """Filter memories by type, tags, importance. Optional text query.""" + self._maybe_refresh_decay() + index = self._load_index() + state = self._load_state() + query_terms = query.lower().strip().split() if query else [] + filter_tags = set(t.lower() for t in tags) if tags else None + + results = [] + for mid, entry in index.get("entries", {}).items(): + # Type filter + if memory_types and entry.get("type") not in memory_types: + continue + # Importance filter + if min_importance and entry.get("importance", 0) < min_importance: + continue + # Tag filter + if filter_tags: + mem_tags = set(t.lower() for t in entry.get("tags", [])) + if not mem_tags.intersection(filter_tags): + continue + # Query filter + if query_terms: + title = (entry.get("title") or "").lower() + tags_str = " ".join(entry.get("tags") or []).lower() + searchable = f"{title} {tags_str}" + if not any(t in searchable for t in query_terms): + # Check content_preview from index (no file I/O needed) + preview = (entry.get("content_preview") or "").lower() + if not any(t in preview for t in query_terms): + continue + + s = state.get("entries", {}).get(mid, {}) + results.append( + { + "id": mid, + "type": entry.get("type"), + "title": entry.get("title"), + "tags": entry.get("tags", []), + "importance": entry.get("importance"), + "decay_score": round(s.get("decay_score", 0.5), 3), + "path": entry.get("path"), + "created": entry.get("created"), + } + ) + + # Sort by importance descending + results.sort(key=lambda x: x.get("importance", 0), reverse=True) + return results[:limit] + + def update( + self, + memory_id: str, + title: Optional[str] = None, + content: Optional[str] = None, + tags: Optional[List[str]] = None, + importance: Optional[float] = None, + ) -> bool: + """Update fields of an existing memory.""" + path = self._resolve_memory_path(memory_id) + if not path: + return False + + fm, body = self._read_memory_file(path) + now = datetime.now(timezone.utc).isoformat() + + if title is not None: + fm["title"] = title + if tags is not None: + fm["tags"] = [t.lower().strip() for t in tags] + if importance is not None: + fm["importance"] = max(0.0, min(1.0, importance)) + if content is not None: + body = content + + fm["updated"] = now + self._write_memory_file(path, fm, body) + + # Update index (refresh content_preview if content changed) + rel_path = str(path.relative_to(self.memory_dir)) + preview = "" + if content is not None: + preview = body.strip()[:200] + if len(body.strip()) > 200: + last_space = preview.rfind(" ") + if last_space > 0: + preview = preview[:last_space] + self._update_index_entry(memory_id, fm, rel_path, content_preview=preview) + + self._git_commit(f"update: {fm.get('title', memory_id[:8])}", [path]) + return True + + def delete(self, memory_id: str) -> bool: + """Delete a memory file, cascade-delete edges, remove from index.""" + path = self._resolve_memory_path(memory_id) + if not path: + return False + + fm, _ = self._read_memory_file(path) + title = fm.get("title", memory_id[:8]) + + # Cascade-delete edges where from_id or to_id matches + index = self._load_index() + edges_to_delete = [] + for eid, edata in index.get("edges", {}).items(): + if edata.get("from_id") == memory_id or edata.get("to_id") == memory_id: + edges_to_delete.append(eid) + for eid in edges_to_delete: + self.edge_delete(eid) + + # Remove file + path.unlink() + + # Remove from index and state + self._remove_index_entry(memory_id) + state = self._load_state() + state.get("entries", {}).pop(memory_id, None) + self._save_state(state) + + # Remove incoming references from other memories + index = self._load_index() + for mid, entry in index.get("entries", {}).items(): + rels = entry.get("relations", []) + original_count = len(rels) + rels = [r for r in rels if r.get("target") != memory_id] + if len(rels) != original_count: + entry["relations"] = rels + other_path = self._resolve_memory_path(mid) + if other_path: + try: + other_fm, other_body = self._read_memory_file(other_path) + other_fm["relations"] = [ + r + for r in other_fm.get("relations", []) + if r.get("target") != memory_id + ] + self._write_memory_file(other_path, other_fm, other_body) + except Exception: + pass + self._save_index(index) + + # Git: stage deletion + try: + rel_path = path.relative_to(self.memory_dir) + subprocess.run( + ["git", "rm", "--cached", str(rel_path)], + cwd=str(self.memory_dir), + capture_output=True, + timeout=5, + ) + except Exception: + pass + self._git_commit(f"delete: {title}") + + return True + + def related( + self, + memory_id: str, + rel_types: Optional[List[str]] = None, + max_depth: int = 1, + ) -> List[Dict[str, Any]]: + """Traverse relations from a memory, depth-limited BFS.""" + max_depth = max(1, min(5, max_depth)) + index = self._load_index() + visited = set() + results = [] + + # Resolve partial ID prefix to full UUID + entries = index.get("entries", {}) + resolved = self._resolve_prefix(memory_id, entries) + if resolved: + memory_id = resolved + + def traverse(mid: str, depth: int): + if depth > max_depth or mid in visited: + return + visited.add(mid) + + entry = entries.get(mid) + if not entry: + return + + for rel in entry.get("relations", []): + target_id = rel.get("target") + if not target_id or target_id in visited: + continue + if rel_types and rel.get("type") not in rel_types: + continue + + target_entry = index.get("entries", {}).get(target_id) + if target_entry: + results.append( + { + "id": target_id, + "type": target_entry.get("type"), + "title": target_entry.get("title"), + "relationship": rel.get("type"), + "direction": rel.get("direction", "outgoing"), + "strength": rel.get("strength"), + "depth": depth, + } + ) + traverse(target_id, depth + 1) + + traverse(memory_id, 1) + return results + + def stats(self) -> Dict[str, Any]: + """Get statistics about the memory system.""" + index = self._load_index() + state = self._load_state() + entries = index.get("entries", {}) + + by_type = {} + total_relations = 0 + for entry in entries.values(): + t = entry.get("type", "unknown") + by_type[t] = by_type.get(t, 0) + 1 + total_relations += len(entry.get("relations", [])) + + # Count files per directory + dir_counts = {} + for type_dir in TYPE_DIRS.values(): + d = self.memory_dir / "graph" / type_dir + if d.exists(): + dir_counts[type_dir] = len(list(d.glob("*.md"))) + vault_count = ( + len(list((self.memory_dir / "vault").glob("*.md"))) + if (self.memory_dir / "vault").exists() + else 0 + ) + + # Decay stats + state_entries = state.get("entries", {}) + active = sum( + 1 + for s in state_entries.values() + if s.get("decay_score", 0) >= THRESHOLD_ACTIVE + ) + fading = sum( + 1 + for s in state_entries.values() + if THRESHOLD_FADING <= s.get("decay_score", 0) < THRESHOLD_ACTIVE + ) + dormant = sum( + 1 + for s in state_entries.values() + if THRESHOLD_DORMANT <= s.get("decay_score", 0) < THRESHOLD_FADING + ) + archived = sum( + 1 + for s in state_entries.values() + if s.get("decay_score", 0) < THRESHOLD_DORMANT + ) + + # Unique outgoing relations only (avoid double-counting) + unique_relations = total_relations // 2 if total_relations > 0 else 0 + + return { + "total_memories": len(entries), + "by_type": by_type, + "dir_counts": dir_counts, + "vault_count": vault_count, + "total_relations": unique_relations, + "decay_summary": { + "active": active, + "fading": fading, + "dormant": dormant, + "archived": archived, + }, + "memory_dir": str(self.memory_dir), + } + + def recent(self, limit: int = 20) -> List[Dict[str, Any]]: + """Get most recently created memories.""" + index = self._load_index() + entries = [ + {"id": mid, **entry} for mid, entry in index.get("entries", {}).items() + ] + entries.sort(key=lambda x: x.get("created", ""), reverse=True) + return entries[:limit] + + def episode( + self, + type: str, + title: str, + tags: Optional[List[str]] = None, + summary: Optional[str] = None, + memory_link: Optional[str] = None, + ): + """Append an entry to today's episode file.""" + now_local = datetime.now().astimezone() + today = now_local.strftime("%Y-%m-%d") + time_str = now_local.strftime("%H:%M") + ep_path = self.memory_dir / "episodes" / f"{today}.md" + + tags = tags or [] + tags_str = ", ".join(tags) + + entry_lines = [ + f"## {time_str} - {title}", + f"- **Type:** {type}", + ] + if tags_str: + entry_lines.append(f"- **Tags:** {tags_str}") + if memory_link: + # Extract name from link path + name = Path(memory_link).stem + entry_lines.append(f"- **Memory:** [{name}]({memory_link})") + if summary: + entry_lines.append(f"- **Summary:** {summary}") + + entry_text = "\n".join(entry_lines) + + if ep_path.exists(): + existing = ep_path.read_text(encoding="utf-8") + new_content = f"{existing.rstrip()}\n\n{entry_text}\n" + else: + new_content = f"# {today}\n\n{entry_text}\n" + + ep_path.write_text(new_content, encoding="utf-8") + self._git_commit(f"episode: {title}", [ep_path]) + + def reindex(self) -> int: + """Rebuild _index.json from all markdown files. Recovery command.""" + index = {"version": 2, "updated": "", "count": 0, "entries": {}, "edges": {}} + count = 0 + + search_dirs = [ + ("graph", self.memory_dir / "graph"), + ("vault", self.memory_dir / "vault"), + ] + + edges_dir = self.memory_dir / "graph" / EDGES_DIR_NAME + + for _prefix, search_dir in search_dirs: + if not search_dir.exists(): + continue + for md_file in search_dir.rglob("*.md"): + # Skip edge files — handled separately + if edges_dir.exists() and md_file.parent == edges_dir: + continue + try: + fm, body = self._read_memory_file(md_file) + mid = fm.get("id") + if not mid: + continue + rel_path = str(md_file.relative_to(self.memory_dir)) + preview = body.strip()[:200] + if len(body.strip()) > 200: + last_space = preview.rfind(" ") + if last_space > 0: + preview = preview[:last_space] + index["entries"][mid] = { + "title": fm.get("title", ""), + "type": fm.get("type", "general"), + "tags": fm.get("tags", []), + "importance": fm.get("importance", 0.5), + "confidence": fm.get("confidence", 0.8), + "created": fm.get("created", ""), + "updated": fm.get("updated", ""), + "path": rel_path, + "relations": fm.get("relations", []), + "content_preview": preview, + } + count += 1 + except Exception as e: + print(f"Warning: Failed to index {md_file}: {e}", file=sys.stderr) + + # Scan edge files + if edges_dir.exists(): + for md_file in edges_dir.glob("*.md"): + try: + fm, _ = self._read_memory_file(md_file) + eid = fm.get("id") + if not eid: + continue + rel_path = str(md_file.relative_to(self.memory_dir)) + index["edges"][eid] = { + "type": fm.get("type", ""), + "from_id": fm.get("from_id", ""), + "from_title": fm.get("from_title", ""), + "to_id": fm.get("to_id", ""), + "to_title": fm.get("to_title", ""), + "strength": fm.get("strength", 0.8), + "created": fm.get("created", ""), + "updated": fm.get("updated", ""), + "path": rel_path, + } + except Exception as e: + print( + f"Warning: Failed to index edge {md_file}: {e}", file=sys.stderr + ) + + self._save_index(index) + return count + + def pin(self, memory_id: str) -> bool: + """Move a memory to vault/ (never decays).""" + path = self._resolve_memory_path(memory_id) + if not path: + return False + + fm, body = self._read_memory_file(path) + vault_dir = self.memory_dir / "vault" + vault_dir.mkdir(parents=True, exist_ok=True) + + new_path = vault_dir / path.name + self._write_memory_file(new_path, fm, body) + + # Remove old file + old_rel = str(path.relative_to(self.memory_dir)) + path.unlink() + + # Update index with new path + new_rel = str(new_path.relative_to(self.memory_dir)) + self._update_index_entry(memory_id, fm, new_rel) + + # Set decay to infinity in state + state = self._load_state() + state.setdefault("entries", {}).setdefault(memory_id, {})["decay_score"] = 999.0 + self._save_state(state) + + self._git_commit(f"pin: {fm.get('title', memory_id[:8])}", [new_path]) + return True + + +if __name__ == "__main__": + from cli import main + + main() diff --git a/common.py b/common.py new file mode 100644 index 0000000..3d587fe --- /dev/null +++ b/common.py @@ -0,0 +1,581 @@ +""" +Cognitive Memory - Common Constants & Helpers + +Module-level constants, YAML parsing, slug generation, decay calculation, +embedding helpers, and cosine similarity. Shared by all other modules. +""" + +import json +import math +import os +import re +import urllib.request +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple +from urllib.error import URLError + +# ============================================================================= +# CONSTANTS +# ============================================================================= + +# Data directory resolution order: +# 1. COGNITIVE_MEMORY_DIR env var (explicit override) +# 2. XDG_DATA_HOME/cognitive-memory/ (Linux standard) +# 3. ~/.local/share/cognitive-memory/ (XDG default) +_env_dir = os.environ.get("COGNITIVE_MEMORY_DIR", "") +if _env_dir: + MEMORY_DIR = Path(_env_dir).expanduser() +else: + _xdg_data = os.environ.get("XDG_DATA_HOME", "") or str( + Path.home() / ".local" / "share" + ) + MEMORY_DIR = Path(_xdg_data) / "cognitive-memory" + +INDEX_PATH = MEMORY_DIR / "_index.json" +STATE_PATH = MEMORY_DIR / "_state.json" +EMBEDDINGS_PATH = MEMORY_DIR / "_embeddings.json" +OLLAMA_URL = "http://localhost:11434" +EMBEDDING_MODEL = "nomic-embed-text" +EMBEDDING_TIMEOUT = 5 # seconds +CONFIG_PATH = MEMORY_DIR / "_config.json" +OPENAI_EMBED_URL = "https://api.openai.com/v1/embeddings" +OPENAI_MODEL_DEFAULT = "text-embedding-3-small" + +# Memory type -> directory name mapping +TYPE_DIRS = { + "solution": "solutions", + "fix": "fixes", + "decision": "decisions", + "configuration": "configurations", + "problem": "problems", + "workflow": "workflows", + "code_pattern": "code-patterns", + "error": "errors", + "general": "general", + "procedure": "procedures", + "insight": "insights", +} + +VALID_TYPES = list(TYPE_DIRS.keys()) + +# Decay model type weights +TYPE_WEIGHTS = { + "decision": 1.3, + "solution": 1.2, + "insight": 1.25, + "code_pattern": 1.1, + "configuration": 1.1, + "fix": 1.0, + "workflow": 1.0, + "problem": 0.9, + "error": 0.8, + "general": 0.8, + "procedure": 1.4, +} + +DECAY_LAMBDA = 0.03 # Half-life ~23 days + +# Decay score thresholds +THRESHOLD_ACTIVE = 0.5 +THRESHOLD_FADING = 0.2 +THRESHOLD_DORMANT = 0.05 + +# Relationship types (subset from MemoryGraph, focused on most useful) +VALID_RELATION_TYPES = [ + "SOLVES", + "CAUSES", + "BUILDS_ON", + "ALTERNATIVE_TO", + "REQUIRES", + "FOLLOWS", + "RELATED_TO", +] + +# Edge file constants +EDGES_DIR_NAME = "edges" +EDGE_FIELD_ORDER = [ + "id", + "type", + "from_id", + "from_title", + "to_id", + "to_title", + "strength", + "created", + "updated", +] + +# Frontmatter field order for consistent output +FIELD_ORDER = [ + "id", + "type", + "title", + "tags", + "importance", + "confidence", + "steps", + "preconditions", + "postconditions", + "created", + "updated", + "relations", +] + +# CORE.md token budget (approximate, 1 token ~= 4 chars) +CORE_MAX_CHARS = 12000 # ~3K tokens + +GRAPHS_BASE_DIR = MEMORY_DIR.parent / "cognitive-memory-graphs" + + +# ============================================================================= +# YAML FRONTMATTER PARSING (stdlib only) +# ============================================================================= + + +def _needs_quoting(s: str) -> bool: + """Check if a YAML string value needs quoting.""" + if not s: + return True + if any(c in s for c in ":#{}[]&*?|>!%@`"): + return True + try: + float(s) + return True + except ValueError: + pass + if s.lower() in ("true", "false", "null", "yes", "no", "on", "off"): + return True + return False + + +def _quote_yaml(s: str) -> str: + """Quote a string for YAML, escaping internal quotes.""" + escaped = s.replace("\\", "\\\\").replace('"', '\\"') + return f'"{escaped}"' + + +def _format_yaml_value(value: Any, force_quote: bool = False) -> str: + """Format a Python value for YAML output.""" + if value is None: + return "null" + if isinstance(value, bool): + return "true" if value else "false" + if isinstance(value, (int, float)): + return str(value) + s = str(value) + if force_quote or _needs_quoting(s): + return _quote_yaml(s) + return s + + +def _parse_scalar(value: str) -> Any: + """Parse a YAML scalar value to Python type.""" + v = value.strip() + if not v or v == "null": + return None + if v == "true": + return True + if v == "false": + return False + # Try numeric + try: + if "." in v: + return float(v) + return int(v) + except ValueError: + pass + # Strip quotes + if (v.startswith('"') and v.endswith('"')) or ( + v.startswith("'") and v.endswith("'") + ): + return v[1:-1] + return v + + +def serialize_frontmatter(data: Dict[str, Any]) -> str: + """Serialize a dict to YAML frontmatter string (between --- markers).""" + lines = ["---"] + + for key in FIELD_ORDER: + if key not in data: + continue + value = data[key] + + if key == "tags" and isinstance(value, list): + if value: + items = ", ".join(_format_yaml_value(t) for t in value) + lines.append(f"tags: [{items}]") + else: + lines.append("tags: []") + + elif key in ("steps", "preconditions", "postconditions") and isinstance( + value, list + ): + if not value: + continue + lines.append(f"{key}:") + for item in value: + lines.append(f" - {_format_yaml_value(str(item), force_quote=True)}") + + elif key == "relations" and isinstance(value, list): + if not value: + continue + lines.append("relations:") + for rel in value: + first = True + for rk in [ + "target", + "type", + "direction", + "strength", + "context", + "edge_id", + ]: + if rk not in rel: + continue + rv = rel[rk] + prefix = " - " if first else " " + force_q = rk in ("context",) + lines.append( + f"{prefix}{rk}: {_format_yaml_value(rv, force_quote=force_q)}" + ) + first = False + + elif key == "title": + lines.append(f"title: {_format_yaml_value(value, force_quote=True)}") + + else: + lines.append(f"{key}: {_format_yaml_value(value)}") + + lines.append("---") + return "\n".join(lines) + + +def parse_frontmatter(text: str) -> Tuple[Dict[str, Any], str]: + """Parse YAML frontmatter and body from markdown text. + + Returns (frontmatter_dict, body_text). + """ + if not text.startswith("---\n"): + return {}, text + + # Find closing --- + end_match = re.search(r"\n---\s*\n", text[3:]) + if not end_match: + # Try end of string + if text.rstrip().endswith("---"): + end_pos = text.rstrip().rfind("\n---") + if end_pos <= 3: + return {}, text + fm_text = text[4:end_pos] + body = "" + else: + return {}, text + else: + end_pos = end_match.start() + 3 # Offset from text[3:] + fm_text = text[4:end_pos] + body = text[end_pos + end_match.end() - end_match.start() :] + + body = body.lstrip("\n") + data = {} + lines = fm_text.split("\n") + i = 0 + + while i < len(lines): + line = lines[i] + + # Skip empty lines + if not line.strip(): + i += 1 + continue + + # Must be a top-level key (no leading whitespace) + if line[0] == " ": + i += 1 + continue + + if ":" not in line: + i += 1 + continue + + key, _, rest = line.partition(":") + key = key.strip() + rest = rest.strip() + + if not rest: + # Block value - collect indented lines + block_lines = [] + j = i + 1 + while j < len(lines) and lines[j] and lines[j][0] == " ": + block_lines.append(lines[j]) + j += 1 + + if key == "relations": + data["relations"] = _parse_relations_block(block_lines) + elif block_lines and block_lines[0].strip().startswith("- "): + # Simple list + data[key] = [ + _parse_scalar(bl.strip().lstrip("- ")) + for bl in block_lines + if bl.strip().startswith("- ") + ] + else: + data[key] = None + i = j + continue + + # Inline list: [a, b, c] + if rest.startswith("[") and rest.endswith("]"): + inner = rest[1:-1] + if inner.strip(): + data[key] = [ + _parse_scalar(v.strip()) for v in inner.split(",") if v.strip() + ] + else: + data[key] = [] + else: + data[key] = _parse_scalar(rest) + + i += 1 + + return data, body + + +def _parse_relations_block(lines: List[str]) -> List[Dict[str, Any]]: + """Parse a YAML block list of relation dicts.""" + relations = [] + current = None + + for line in lines: + stripped = line.strip() + if not stripped: + continue + + if stripped.startswith("- "): + # New relation entry + current = {} + relations.append(current) + # Parse key:value on same line as - + rest = stripped[2:] + if ":" in rest: + k, _, v = rest.partition(":") + current[k.strip()] = _parse_scalar(v.strip()) + elif current is not None and ":" in stripped: + k, _, v = stripped.partition(":") + current[k.strip()] = _parse_scalar(v.strip()) + + return relations + + +# ============================================================================= +# HELPER FUNCTIONS +# ============================================================================= + + +def slugify(text: str, max_length: int = 60) -> str: + """Convert text to a URL-friendly slug.""" + text = text.lower().strip() + text = re.sub(r"[^\w\s-]", "", text) + text = re.sub(r"[\s_]+", "-", text) + text = re.sub(r"-+", "-", text) + text = text.strip("-") + if len(text) > max_length: + text = text[:max_length].rstrip("-") + return text or "untitled" + + +def make_filename(title: str, memory_id: str) -> str: + """Create a filename from title and UUID suffix.""" + slug = slugify(title) + suffix = memory_id[:6] + return f"{slug}-{suffix}.md" + + +def calculate_decay_score( + importance: float, days_since_access: float, access_count: int, type_weight: float +) -> float: + """Calculate decay score for a memory. + + decay_score = importance * e^(-lambda * days) * log2(access_count + 1) * type_weight + """ + time_factor = math.exp(-DECAY_LAMBDA * days_since_access) + usage_factor = math.log2(access_count + 1) if access_count > 0 else 1.0 + return importance * time_factor * usage_factor * type_weight + + +def _ollama_embed( + texts: List[str], + model: str = EMBEDDING_MODEL, + timeout: int = EMBEDDING_TIMEOUT, +) -> Optional[List[List[float]]]: + """Get embeddings from Ollama for a list of texts. + + Returns list of embedding vectors, or None if Ollama is unavailable. + """ + try: + payload = json.dumps({"model": model, "input": texts}).encode("utf-8") + req = urllib.request.Request( + f"{OLLAMA_URL}/api/embed", + data=payload, + headers={"Content-Type": "application/json"}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + if resp.status != 200: + return None + data = json.loads(resp.read().decode("utf-8")) + embeddings = data.get("embeddings") + if embeddings and isinstance(embeddings, list): + return embeddings + return None + except ( + ConnectionRefusedError, + URLError, + TimeoutError, + OSError, + json.JSONDecodeError, + ValueError, + KeyError, + ): + return None + + +def _load_memory_config(config_path: Optional[Path] = None) -> Dict[str, Any]: + """Read _config.json, return defaults if missing.""" + path = config_path or CONFIG_PATH + defaults = { + "embedding_provider": "ollama", + "openai_api_key": None, + "ollama_model": EMBEDDING_MODEL, + "openai_model": OPENAI_MODEL_DEFAULT, + } + if path.exists(): + try: + data = json.loads(path.read_text()) + for k, v in defaults.items(): + data.setdefault(k, v) + return data + except (json.JSONDecodeError, OSError): + pass + return defaults + + +def _openai_embed( + texts: List[str], + api_key: str, + model: str = OPENAI_MODEL_DEFAULT, + timeout: int = 30, +) -> Optional[List[List[float]]]: + """Get embeddings from OpenAI API (stdlib-only, same interface as _ollama_embed).""" + try: + payload = json.dumps({"input": texts, "model": model}).encode("utf-8") + req = urllib.request.Request( + OPENAI_EMBED_URL, + data=payload, + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}", + }, + method="POST", + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + if resp.status != 200: + return None + data = json.loads(resp.read().decode("utf-8")) + items = data.get("data", []) + if items and isinstance(items, list): + # Sort by index to ensure order matches input + items.sort(key=lambda x: x.get("index", 0)) + return [item["embedding"] for item in items] + return None + except ( + ConnectionRefusedError, + URLError, + TimeoutError, + OSError, + json.JSONDecodeError, + ValueError, + KeyError, + ): + return None + + +def _cosine_similarity(a: List[float], b: List[float]) -> float: + """Compute cosine similarity between two vectors.""" + dot = sum(x * y for x, y in zip(a, b)) + norm_a = math.sqrt(sum(x * x for x in a)) + norm_b = math.sqrt(sum(x * x for x in b)) + if norm_a == 0.0 or norm_b == 0.0: + return 0.0 + return dot / (norm_a * norm_b) + + +def _make_edge_filename( + from_title: str, rel_type: str, to_title: str, edge_id: str +) -> str: + """Produce edge filename: {from-slug}--{TYPE}--{to-slug}-{6char}.md""" + from_slug = slugify(from_title, max_length=30) + to_slug = slugify(to_title, max_length=30) + suffix = edge_id[:6] + return f"{from_slug}--{rel_type}--{to_slug}-{suffix}.md" + + +def serialize_edge_frontmatter(data: Dict[str, Any]) -> str: + """Serialize an edge dict to YAML frontmatter string.""" + lines = ["---"] + for key in EDGE_FIELD_ORDER: + if key not in data: + continue + value = data[key] + if key in ("from_title", "to_title"): + lines.append(f"{key}: {_format_yaml_value(value, force_quote=True)}") + else: + lines.append(f"{key}: {_format_yaml_value(value)}") + lines.append("---") + return "\n".join(lines) + + +def load_graph_config(config_path: Optional[Path] = None) -> Dict[str, Dict[str, Any]]: + """Load named graphs config from _config.json 'graphs' key.""" + cfg = _load_memory_config(config_path) + return cfg.get("graphs", {}) + + +def resolve_graph_path( + graph_name: Optional[str], config_path: Optional[Path] = None +) -> Path: + """Resolve graph name to directory path. None/'default' → MEMORY_DIR.""" + if not graph_name or graph_name == "default": + return MEMORY_DIR + graphs = load_graph_config(config_path) + if graph_name in graphs: + p = graphs[graph_name].get("path", "") + if p: + return Path(p).expanduser() + # Convention: sibling of MEMORY_DIR + return GRAPHS_BASE_DIR / graph_name + + +def list_graphs(config_path: Optional[Path] = None) -> List[Dict[str, Any]]: + """List all known graphs: default + configured + discovered on disk.""" + result = [{"name": "default", "path": str(MEMORY_DIR)}] + seen = {"default"} + + # From config + graphs = load_graph_config(config_path) + for name, cfg in graphs.items(): + if name not in seen: + p = cfg.get("path", "") + path = str(Path(p).expanduser()) if p else str(GRAPHS_BASE_DIR / name) + result.append({"name": name, "path": path}) + seen.add(name) + + # Discover on disk + if GRAPHS_BASE_DIR.exists(): + for d in sorted(GRAPHS_BASE_DIR.iterdir()): + if d.is_dir() and d.name not in seen: + result.append({"name": d.name, "path": str(d)}) + seen.add(d.name) + + return result diff --git a/edges.py b/edges.py new file mode 100644 index 0000000..033acdb --- /dev/null +++ b/edges.py @@ -0,0 +1,297 @@ +"""EdgesMixin: edge/relationship operations for CognitiveMemoryClient. + +This module is part of the client.py mixin refactor. EdgesMixin provides all +edge CRUD and search operations. It relies on helper methods resolved at runtime +via MRO from the composed CognitiveMemoryClient class. +""" + +import uuid +import subprocess +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional, List, Dict, Any + +from common import ( + VALID_RELATION_TYPES, + EDGES_DIR_NAME, + _make_edge_filename, + serialize_edge_frontmatter, +) + + +class EdgesMixin: + """Mixin providing edge/relationship operations for CognitiveMemoryClient.""" + + def relate( + self, + from_id: str, + to_id: str, + rel_type: str, + strength: float = 0.8, + context: Optional[str] = None, + description: Optional[str] = None, + ) -> str: + """Create a relationship between two memories with an edge file. + + Returns edge_id string, or empty string if duplicate. + """ + if rel_type not in VALID_RELATION_TYPES: + raise ValueError( + f"Invalid relation type: {rel_type}. Valid: {VALID_RELATION_TYPES}" + ) + + from_path = self._resolve_memory_path(from_id) + to_path = self._resolve_memory_path(to_id) + if not from_path or not to_path: + raise ValueError(f"Memory not found: {from_id if not from_path else to_id}") + + # Read source memory + fm, body = self._read_memory_file(from_path) + relations = fm.get("relations", []) + + # Check for duplicate + for r in relations: + if r.get("target") == to_id and r.get("type") == rel_type: + return "" # Already exists + + # Read target memory for title + to_fm, to_body = self._read_memory_file(to_path) + + # Create edge file + edge_id = str(uuid.uuid4()) + now = datetime.now(timezone.utc).isoformat() + from_title = fm.get("title", from_id[:8]) + to_title = to_fm.get("title", to_id[:8]) + clamped_strength = max(0.0, min(1.0, strength)) + + edge_data = { + "id": edge_id, + "type": rel_type, + "from_id": from_id, + "from_title": from_title, + "to_id": to_id, + "to_title": to_title, + "strength": clamped_strength, + "created": now, + "updated": now, + } + edge_filename = _make_edge_filename(from_title, rel_type, to_title, edge_id) + edge_path = self.memory_dir / "graph" / EDGES_DIR_NAME / edge_filename + edge_fm_str = serialize_edge_frontmatter(edge_data) + edge_body = description.strip() if description else "" + edge_content = ( + f"{edge_fm_str}\n\n{edge_body}\n" if edge_body else f"{edge_fm_str}\n" + ) + edge_path.write_text(edge_content, encoding="utf-8") + + # Update source memory frontmatter with edge_id + new_rel = { + "target": to_id, + "type": rel_type, + "direction": "outgoing", + "strength": clamped_strength, + "edge_id": edge_id, + } + if context: + new_rel["context"] = context + relations.append(new_rel) + fm["relations"] = relations + fm["updated"] = now + self._write_memory_file(from_path, fm, body) + + # Add incoming relation to target with edge_id + to_relations = to_fm.get("relations", []) + has_incoming = any( + r.get("target") == from_id + and r.get("type") == rel_type + and r.get("direction") == "incoming" + for r in to_relations + ) + if not has_incoming: + incoming_rel = { + "target": from_id, + "type": rel_type, + "direction": "incoming", + "strength": clamped_strength, + "edge_id": edge_id, + } + if context: + incoming_rel["context"] = context + to_relations.append(incoming_rel) + to_fm["relations"] = to_relations + to_fm["updated"] = now + self._write_memory_file(to_path, to_fm, to_body) + + # Update memory index + rel_from = str(from_path.relative_to(self.memory_dir)) + rel_to = str(to_path.relative_to(self.memory_dir)) + self._update_index_entry(from_id, fm, rel_from) + self._update_index_entry(to_id, to_fm, rel_to) + + # Update edge index + self._update_edge_index( + edge_id, edge_data, f"graph/{EDGES_DIR_NAME}/{edge_filename}" + ) + + self._git_commit( + f"relate: {from_id[:8]} --{rel_type}--> {to_id[:8]}", + [from_path, to_path, edge_path], + ) + return edge_id + + def edge_get(self, edge_id: str) -> Optional[Dict[str, Any]]: + """Read full edge file (frontmatter + body).""" + path = self._resolve_edge_path(edge_id) + if not path: + return None + fm, body = self._read_memory_file(path) + return { + "id": fm.get("id", edge_id), + "type": fm.get("type", ""), + "from_id": fm.get("from_id", ""), + "from_title": fm.get("from_title", ""), + "to_id": fm.get("to_id", ""), + "to_title": fm.get("to_title", ""), + "strength": fm.get("strength", 0.8), + "created": fm.get("created", ""), + "updated": fm.get("updated", ""), + "description": body.strip(), + "path": str(path.relative_to(self.memory_dir)), + } + + def edge_search( + self, + query: Optional[str] = None, + types: Optional[List[str]] = None, + from_id: Optional[str] = None, + to_id: Optional[str] = None, + limit: int = 20, + ) -> List[Dict[str, Any]]: + """Search edges via index.""" + index = self._load_index() + results = [] + query_lower = query.lower().strip() if query else "" + + # Resolve partial IDs to full UUIDs via prefix match + if from_id: + entries = index.get("entries", {}) + resolved = self._resolve_prefix(from_id, entries) + from_id = resolved or from_id + if to_id: + entries = index.get("entries", {}) + resolved = self._resolve_prefix(to_id, entries) + to_id = resolved or to_id + + for eid, entry in index.get("edges", {}).items(): + if types and entry.get("type") not in types: + continue + if from_id and entry.get("from_id") != from_id: + continue + if to_id and entry.get("to_id") != to_id: + continue + if query_lower: + searchable = f"{entry.get('from_title', '')} {entry.get('to_title', '')} {entry.get('type', '')}".lower() + if query_lower not in searchable: + continue + results.append({"id": eid, **entry}) + + results.sort(key=lambda x: x.get("created", ""), reverse=True) + return results[:limit] + + def edge_update( + self, + edge_id: str, + description: Optional[str] = None, + strength: Optional[float] = None, + ) -> bool: + """Update edge body/metadata, sync strength to memory frontmatter.""" + path = self._resolve_edge_path(edge_id) + if not path: + return False + + fm, body = self._read_memory_file(path) + now = datetime.now(timezone.utc).isoformat() + + if description is not None: + body = description + if strength is not None: + fm["strength"] = max(0.0, min(1.0, strength)) + fm["updated"] = now + + # Write edge file + edge_fm_str = serialize_edge_frontmatter(fm) + edge_body = body.strip() if body else "" + edge_content = ( + f"{edge_fm_str}\n\n{edge_body}\n" if edge_body else f"{edge_fm_str}\n" + ) + path.write_text(edge_content, encoding="utf-8") + + # Sync strength to memory frontmatter if changed + if strength is not None: + for mid_key in ("from_id", "to_id"): + mid = fm.get(mid_key) + if not mid: + continue + mem_path = self._resolve_memory_path(mid) + if not mem_path: + continue + mem_fm, mem_body = self._read_memory_file(mem_path) + for rel in mem_fm.get("relations", []): + if rel.get("edge_id") == edge_id: + rel["strength"] = fm["strength"] + mem_fm["updated"] = now + self._write_memory_file(mem_path, mem_fm, mem_body) + + # Update edge index + rel_path = str(path.relative_to(self.memory_dir)) + self._update_edge_index(edge_id, fm, rel_path) + self._git_commit(f"edge-update: {edge_id[:8]}", [path]) + return True + + def edge_delete(self, edge_id: str) -> bool: + """Remove edge file and clean frontmatter refs from both memories.""" + path = self._resolve_edge_path(edge_id) + if not path: + return False + + fm, _ = self._read_memory_file(path) + now = datetime.now(timezone.utc).isoformat() + files_to_commit: List[Path] = [] + + # Clean edge_id references from both memories + for mid_key in ("from_id", "to_id"): + mid = fm.get(mid_key) + if not mid: + continue + mem_path = self._resolve_memory_path(mid) + if not mem_path: + continue + mem_fm, mem_body = self._read_memory_file(mem_path) + original_rels = mem_fm.get("relations", []) + mem_fm["relations"] = [ + r for r in original_rels if r.get("edge_id") != edge_id + ] + if len(mem_fm["relations"]) != len(original_rels): + mem_fm["updated"] = now + self._write_memory_file(mem_path, mem_fm, mem_body) + rel_p = str(mem_path.relative_to(self.memory_dir)) + self._update_index_entry(mid, mem_fm, rel_p) + files_to_commit.append(mem_path) + + # Remove edge file + path.unlink() + self._remove_edge_index(edge_id) + + # Git stage deletion + try: + rel_path = path.relative_to(self.memory_dir) + subprocess.run( + ["git", "rm", "--cached", str(rel_path)], + cwd=str(self.memory_dir), + capture_output=True, + timeout=5, + ) + except Exception: + pass + self._git_commit(f"edge-delete: {edge_id[:8]}") + return True diff --git a/embeddings.py b/embeddings.py new file mode 100644 index 0000000..460c4d3 --- /dev/null +++ b/embeddings.py @@ -0,0 +1,233 @@ +"""EmbeddingsMixin for CognitiveMemoryClient. + +Provides embedding generation and semantic search capabilities. Extracted from +client.py as part of the mixin refactor. Methods rely on shared state (memory_dir, +_load_index, _load_embeddings_cached) provided by the base class via MRO. +""" + +import json +import sys +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional, Tuple + +from common import ( + EMBEDDING_MODEL, + EMBEDDINGS_PATH, + OPENAI_MODEL_DEFAULT, + _cosine_similarity, + _load_memory_config, + _ollama_embed, + _openai_embed, +) + + +class EmbeddingsMixin: + """Mixin providing embedding generation and semantic recall for memory clients.""" + + def _get_embedding_provider(self) -> Dict[str, Any]: + """Load embedding config from _config.json.""" + return _load_memory_config(self.memory_dir / "_config.json") + + def _embed_texts_with_fallback( + self, + texts: List[str], + timeout: int = 300, + ) -> Tuple[Optional[List[List[float]]], str, str]: + """Embed texts with fallback chain. Returns (vectors, provider_used, model_used).""" + config = self._get_embedding_provider() + provider = config.get("embedding_provider", "ollama") + + # Try configured provider first + if provider == "openai": + api_key = config.get("openai_api_key") + model = config.get("openai_model", OPENAI_MODEL_DEFAULT) + if api_key: + vectors = _openai_embed(texts, api_key, model, timeout=timeout) + if vectors is not None: + return vectors, "openai", model + # Fallback to ollama + ollama_model = config.get("ollama_model", EMBEDDING_MODEL) + vectors = _ollama_embed(texts, model=ollama_model, timeout=timeout) + if vectors is not None: + return vectors, "ollama", ollama_model + else: + # ollama first + ollama_model = config.get("ollama_model", EMBEDDING_MODEL) + vectors = _ollama_embed(texts, model=ollama_model, timeout=timeout) + if vectors is not None: + return vectors, "ollama", ollama_model + # Fallback to openai + api_key = config.get("openai_api_key") + model = config.get("openai_model", OPENAI_MODEL_DEFAULT) + if api_key: + vectors = _openai_embed(texts, api_key, model, timeout=timeout) + if vectors is not None: + return vectors, "openai", model + + return None, "", "" + + def embed(self, if_changed: bool = False) -> Dict[str, Any]: + """Generate embeddings for all memories using configured provider. + + Detects provider changes and re-embeds everything (dimension mismatch safety). + Stores vectors in _embeddings.json (not git-tracked). + + Args: + if_changed: If True, skip embedding if the set of memory IDs hasn't + changed since last run (no new/deleted memories). + """ + index = self._load_index() + entries = index.get("entries", {}) + if not entries: + return { + "embedded": 0, + "provider": "none", + "model": "", + "path": str(EMBEDDINGS_PATH), + } + + # Check for provider change + embeddings_path = self.memory_dir / "_embeddings.json" + old_provider = "" + if embeddings_path.exists(): + try: + old_data = json.loads(embeddings_path.read_text()) + old_provider = old_data.get("provider", "ollama") + except (json.JSONDecodeError, OSError): + pass + + config = self._get_embedding_provider() + new_provider = config.get("embedding_provider", "ollama") + provider_changed = old_provider and old_provider != new_provider + if provider_changed: + print( + f"Provider changed ({old_provider} -> {new_provider}), re-embedding all memories...", + file=sys.stderr, + ) + + # Skip if nothing changed (unless provider switched) + if if_changed and not provider_changed and embeddings_path.exists(): + try: + old_data = json.loads(embeddings_path.read_text()) + embedded_ids = set(old_data.get("entries", {}).keys()) + index_ids = set(entries.keys()) + if embedded_ids == index_ids: + return { + "embedded": 0, + "skipped": True, + "reason": "no new or deleted memories", + "path": str(embeddings_path), + } + except (json.JSONDecodeError, OSError): + pass # Can't read old data, re-embed + + # Build texts to embed + memory_ids = list(entries.keys()) + texts = [] + for mid in memory_ids: + entry = entries[mid] + title = entry.get("title", "") + preview = entry.get("content_preview", "") + texts.append(f"{title}. {preview}") + + # Batch embed in groups of 50 + all_embeddings: Dict[str, List[float]] = {} + batch_size = 50 + provider_used = "" + model_used = "" + for i in range(0, len(texts), batch_size): + batch_texts = texts[i : i + batch_size] + batch_ids = memory_ids[i : i + batch_size] + vectors, provider_used, model_used = self._embed_texts_with_fallback( + batch_texts, + timeout=300, + ) + if vectors is None: + return { + "error": "All embedding providers unavailable", + "embedded": len(all_embeddings), + } + for mid, vec in zip(batch_ids, vectors): + all_embeddings[mid] = vec + + # Write embeddings file with provider info + embeddings_data = { + "provider": provider_used, + "model": model_used, + "updated": datetime.now(timezone.utc).isoformat(), + "entries": all_embeddings, + } + embeddings_path.write_text(json.dumps(embeddings_data, default=str)) + + return { + "embedded": len(all_embeddings), + "provider": provider_used, + "model": model_used, + "path": str(embeddings_path), + } + + def semantic_recall(self, query: str, limit: int = 10) -> List[Dict[str, Any]]: + """Search memories by semantic similarity using embeddings. + + Uses the same provider that generated stored embeddings to embed the query. + Skips vectors with dimension mismatch as safety guard. + """ + emb_data = self._load_embeddings_cached() + if emb_data is None: + return [] + + stored = emb_data.get("entries", {}) + if not stored: + return [] + + # Embed query with matching provider + stored_provider = emb_data.get("provider", "ollama") + config = self._get_embedding_provider() + query_vec = None + + if stored_provider == "openai": + api_key = config.get("openai_api_key") + model = emb_data.get("model", OPENAI_MODEL_DEFAULT) + if api_key: + vecs = _openai_embed([query], api_key, model) + if vecs: + query_vec = vecs[0] + if query_vec is None and stored_provider == "ollama": + stored_model = emb_data.get("model", EMBEDDING_MODEL) + vecs = _ollama_embed([query], model=stored_model) + if vecs: + query_vec = vecs[0] + # Last resort: try any available provider + if query_vec is None: + vecs, _, _ = self._embed_texts_with_fallback([query], timeout=30) + if vecs: + query_vec = vecs[0] + + if query_vec is None: + return [] + + query_dim = len(query_vec) + + # Score all memories by cosine similarity + index = self._load_index() + scored = [] + for mid, vec in stored.items(): + # Skip dimension mismatch + if len(vec) != query_dim: + continue + sim = _cosine_similarity(query_vec, vec) + entry = index.get("entries", {}).get(mid) + if entry: + scored.append( + { + "id": mid, + "title": entry.get("title", ""), + "type": entry.get("type", "general"), + "tags": entry.get("tags", []), + "similarity": round(sim, 4), + "path": entry.get("path", ""), + } + ) + + scored.sort(key=lambda x: x["similarity"], reverse=True) + return scored[:limit] diff --git a/feature.json b/feature.json new file mode 100644 index 0000000..cfa004f --- /dev/null +++ b/feature.json @@ -0,0 +1,53 @@ +{ + "name": "cognitive-memory", + "version": "3.1.0", + "description": "Markdown-based memory system with decay scoring, episodic logging, semantic search, reflection cycles, auto-curated CORE.md, native MCP server integration, rich edge files, and hybrid Ollama/OpenAI embeddings", + "created": "2026-02-13", + "migrated_from": "memorygraph", + "status": "active", + "files": { + "client.py": "CLI and Python API for all memory operations", + "mcp_server.py": "MCP server for Claude Code tool integration", + "SKILL.md": "Skill documentation and activation triggers", + "SCHEMA.md": "Format documentation for all file types", + "scripts/session_memory.py": "SessionEnd hook — auto-stores session learnings", + "scripts/ensure-symlinks.sh": "Refreshes MEMORY.md symlinks to CORE.md", + "systemd/": "Reference copies of systemd user timers (see systemd/README.md)", + "dev/migrate.py": "One-time migration from MemoryGraph SQLite", + "dev/PROJECT_PLAN.json": "Development roadmap and task tracking" + }, + "data_location": "$XDG_DATA_HOME/cognitive-memory/ (default: ~/.local/share/cognitive-memory/)", + "dependencies": "stdlib-only (no external packages; Ollama optional for semantic search)", + "features": [ + "store: Create markdown memory files with YAML frontmatter (--episode flag)", + "recall: Search memories ranked by relevance and decay score (--semantic flag)", + "get: Retrieve memory by ID with access tracking", + "relate: Create bidirectional relationships between memories", + "search: Filter by type, tags, importance with optional text query", + "update: Modify memory frontmatter and content", + "delete: Remove memory with relation cleanup", + "related: BFS traversal of memory relationships", + "stats: Memory system statistics and decay summary", + "recent: Recently created memories", + "decay: Recalculate all decay scores", + "core: Generate CORE.md auto-curated summary", + "episode: Append to daily session log", + "reindex: Rebuild index from markdown files", + "pin: Move memory to vault (never decays)", + "reflect: Cluster recent memories by tag overlap using union-find", + "reflection: Generate REFLECTION.md summary with themes and patterns", + "procedure: Store procedural memories with steps/preconditions/postconditions", + "embed: Generate Ollama embeddings for semantic search", + "tags list: Show all tags with usage counts", + "tags related: Find co-occurring tags", + "tags suggest: Recommend tags based on co-occurrence patterns", + "edge-get: Get full edge details by ID", + "edge-search: Search edges by query, type, from/to IDs", + "edge-update: Update edge description or strength", + "edge-delete: Remove edge and clean memory references", + "config: Manage embedding provider (ollama/openai) with fallback", + "MCP server: Native Claude Code tool integration via JSON-RPC stdio", + "Hybrid embeddings: Ollama (local) + OpenAI (optional) with automatic fallback", + "Rich edges: First-class edge files in graph/edges/ with descriptions" + ] +} diff --git a/mcp_server.py b/mcp_server.py new file mode 100644 index 0000000..d7679a8 --- /dev/null +++ b/mcp_server.py @@ -0,0 +1,796 @@ +#!/usr/bin/env python3 +""" +Cognitive Memory MCP Server + +JSON-RPC 2.0 stdio MCP server that wraps CognitiveMemoryClient. +Exposes 18 memory operations as MCP tools for Claude Code. +""" + +import json +import subprocess +import sys +from pathlib import Path +from typing import Any, Dict, Optional + +# Allow imports from this directory (client.py lives here) +sys.path.insert(0, str(Path(__file__).parent)) + +from client import CognitiveMemoryClient, _load_memory_config, MEMORY_DIR +from common import resolve_graph_path, list_graphs + +SYNC_SCRIPT = Path(__file__).parent / "scripts" / "memory-git-sync.sh" + +_clients: Dict[str, CognitiveMemoryClient] = {} + + +def get_client(graph: Optional[str] = None) -> CognitiveMemoryClient: + """Get or create a CognitiveMemoryClient for the given graph.""" + key = graph or "default" + if key not in _clients: + path = resolve_graph_path(graph) + _clients[key] = CognitiveMemoryClient(memory_dir=path) + return _clients[key] + + +def _trigger_git_sync(): + """Fire-and-forget git sync after a write operation.""" + if SYNC_SCRIPT.exists(): + try: + subprocess.Popen( + [str(SYNC_SCRIPT)], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True, + ) + except Exception: + pass # Non-critical — daily timer is the fallback + + +def create_tools() -> list: + """Define all 18 MCP tool definitions with inputSchema.""" + return [ + { + "name": "memory_store", + "description": ( + "Store a new memory in the cognitive memory system. " + "Creates a markdown file with YAML frontmatter and returns the new memory UUID. " + "Valid types: solution, fix, decision, configuration, problem, workflow, " + "code_pattern, error, general, procedure, insight." + ), + "inputSchema": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "Memory type (solution, fix, decision, configuration, problem, workflow, code_pattern, error, general, procedure, insight)", + }, + "title": { + "type": "string", + "description": "Short descriptive title for the memory", + }, + "content": { + "type": "string", + "description": "Full content/body of the memory in markdown", + }, + "tags": { + "type": "array", + "items": {"type": "string"}, + "description": "List of lowercase tags for categorisation (e.g. ['python', 'fix', 'discord'])", + }, + "importance": { + "type": "number", + "description": "Importance score from 0.0 to 1.0 (default 0.5)", + }, + "episode": { + "type": "boolean", + "description": "Also log an episode entry for this memory (default true)", + }, + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + "required": ["type", "title", "content"], + }, + }, + { + "name": "memory_recall", + "description": ( + "Search memories by a natural language query, ranked by relevance and decay score. " + "Semantic search is enabled by default when embeddings exist. Set semantic=false for keyword-only." + ), + "inputSchema": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Natural language search query", + }, + "semantic": { + "type": "boolean", + "description": "Merge with semantic/vector similarity search (requires embeddings, default true)", + }, + "limit": { + "type": "integer", + "description": "Maximum number of results to return (default 10)", + }, + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + "required": ["query"], + }, + }, + { + "name": "memory_get", + "description": ( + "Retrieve a single memory by its UUID, including full content, frontmatter metadata, " + "relations, and current decay score." + ), + "inputSchema": { + "type": "object", + "properties": { + "memory_id": { + "type": "string", + "description": "UUID of the memory to retrieve", + }, + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + "required": ["memory_id"], + }, + }, + { + "name": "memory_search", + "description": ( + "Filter memories by type, tags, and/or minimum importance score. " + "Optionally include a text query. Returns results sorted by importance descending. " + "Use this for structured browsing; use memory_recall for ranked relevance search." + ), + "inputSchema": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Optional text query to filter results", + }, + "memory_types": { + "type": "array", + "items": {"type": "string"}, + "description": "Filter by memory types (e.g. ['solution', 'fix'])", + }, + "tags": { + "type": "array", + "items": {"type": "string"}, + "description": "Filter by tags — memory must have at least one of these", + }, + "min_importance": { + "type": "number", + "description": "Minimum importance score (0.0 to 1.0)", + }, + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + }, + }, + { + "name": "memory_relate", + "description": ( + "Create a typed relationship (edge) between two memories. " + "Valid relation types: SOLVES, CAUSES, BUILDS_ON, ALTERNATIVE_TO, REQUIRES, FOLLOWS, RELATED_TO. " + "Returns the new edge UUID, or empty string if the relationship already exists." + ), + "inputSchema": { + "type": "object", + "properties": { + "from_id": { + "type": "string", + "description": "UUID of the source memory", + }, + "to_id": { + "type": "string", + "description": "UUID of the target memory", + }, + "rel_type": { + "type": "string", + "description": "Relationship type (SOLVES, CAUSES, BUILDS_ON, ALTERNATIVE_TO, REQUIRES, FOLLOWS, RELATED_TO)", + }, + "description": { + "type": "string", + "description": "Optional human-readable description of the relationship", + }, + "strength": { + "type": "number", + "description": "Relationship strength from 0.0 to 1.0 (default 0.8)", + }, + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + "required": ["from_id", "to_id", "rel_type"], + }, + }, + { + "name": "memory_related", + "description": ( + "Traverse the relationship graph from a given memory, returning connected memories " + "up to max_depth hops away. Optionally filter by relationship type." + ), + "inputSchema": { + "type": "object", + "properties": { + "memory_id": { + "type": "string", + "description": "UUID of the starting memory", + }, + "rel_types": { + "type": "array", + "items": {"type": "string"}, + "description": "Filter by relation types (e.g. ['SOLVES', 'BUILDS_ON'])", + }, + "max_depth": { + "type": "integer", + "description": "Maximum traversal depth (1-5, default 1)", + }, + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + "required": ["memory_id"], + }, + }, + { + "name": "memory_edge_get", + "description": ( + "Retrieve a single relationship edge by its UUID, including metadata and description body." + ), + "inputSchema": { + "type": "object", + "properties": { + "edge_id": { + "type": "string", + "description": "UUID of the edge to retrieve", + }, + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + "required": ["edge_id"], + }, + }, + { + "name": "memory_edge_search", + "description": ( + "Search relationship edges by type, connected memory IDs, or a text query " + "that matches against the from/to memory titles and relationship type." + ), + "inputSchema": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Text query to match against edge titles and type", + }, + "types": { + "type": "array", + "items": {"type": "string"}, + "description": "Filter by relationship types (e.g. ['SOLVES'])", + }, + "from_id": { + "type": "string", + "description": "Filter to edges originating from this memory UUID", + }, + "to_id": { + "type": "string", + "description": "Filter to edges pointing at this memory UUID", + }, + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + }, + }, + { + "name": "memory_reflect", + "description": ( + "Review memories created since a given date, cluster them by shared tags, " + "and return consolidation recommendations. Does NOT auto-create new memories — " + "you review the output and decide what to store. Set dry_run=true to skip " + "updating state and logging an episode entry." + ), + "inputSchema": { + "type": "object", + "properties": { + "since": { + "type": "string", + "description": "ISO date (YYYY-MM-DD) to review memories from. Defaults to last reflection date or 30 days ago.", + }, + "dry_run": { + "type": "boolean", + "description": "If true, return analysis without persisting state changes (default false)", + }, + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + }, + }, + { + "name": "memory_reflection", + "description": ( + "Return the current REFLECTION.md summary — the auto-curated narrative of recent " + "memory themes, clusters, and activity. Use this to quickly orient at session start." + ), + "inputSchema": { + "type": "object", + "properties": { + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + }, + }, + { + "name": "memory_stats", + "description": ( + "Return statistics about the memory system: total count, breakdown by type, " + "relation count, decay distribution, embeddings count, and per-directory file counts." + ), + "inputSchema": { + "type": "object", + "properties": { + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + }, + }, + { + "name": "memory_episode", + "description": ( + "Append a timestamped entry to today's episode log file (episodes/YYYY-MM-DD.md in the active graph). " + "Use this to record significant session events, commits, or decisions without creating full memories." + ), + "inputSchema": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "Episode entry type (e.g. fix, commit, decision, automation)", + }, + "title": { + "type": "string", + "description": "Short title for the episode entry", + }, + "tags": { + "type": "array", + "items": {"type": "string"}, + "description": "Tags for the episode entry", + }, + "summary": { + "type": "string", + "description": "Optional summary text for the entry", + }, + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + "required": ["type", "title"], + }, + }, + { + "name": "memory_tags_list", + "description": ( + "List all tags used across the memory system, sorted by usage frequency. " + "Returns tag name and count of memories using it." + ), + "inputSchema": { + "type": "object", + "properties": { + "limit": { + "type": "integer", + "description": "Maximum number of tags to return (0 = unlimited, default 0)", + }, + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + }, + }, + { + "name": "memory_tags_related", + "description": ( + "Find tags that frequently co-occur with a given tag, sorted by co-occurrence count. " + "Useful for discovering related topics and navigating the tag graph." + ), + "inputSchema": { + "type": "object", + "properties": { + "tag": { + "type": "string", + "description": "The tag to find co-occurring tags for", + }, + "limit": { + "type": "integer", + "description": "Maximum number of related tags to return (0 = unlimited, default 0)", + }, + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + "required": ["tag"], + }, + }, + { + "name": "memory_embed", + "description": ( + "Generate or refresh vector embeddings for all memories that do not yet have them. " + "Requires either Ollama (nomic-embed-text model) or an OpenAI API key configured. " + "Embeddings enable semantic recall via memory_recall with semantic=true." + ), + "inputSchema": { + "type": "object", + "properties": { + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + }, + }, + { + "name": "memory_core", + "description": ( + "Return the current CORE.md content — the auto-curated high-priority memory digest " + "used to seed Claude sessions. Lists critical solutions, active decisions, and key fixes." + ), + "inputSchema": { + "type": "object", + "properties": { + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + }, + }, + { + "name": "memory_decay", + "description": ( + "Run a decay pass over all memories: recalculate decay scores based on age, " + "access frequency, importance, and type weight. Archives memories whose score " + "drops below the dormant threshold. Returns a summary of updated scores." + ), + "inputSchema": { + "type": "object", + "properties": { + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + }, + }, + { + "name": "memory_config", + "description": ( + "View or update the cognitive memory embedding configuration (_config.json). " + "Set action='show' to display current config (API key is masked). " + "Provide provider='openai' or provider='ollama' to switch embedding backends. " + "Provide openai_api_key to set the OpenAI API key for embeddings." + ), + "inputSchema": { + "type": "object", + "properties": { + "action": { + "type": "string", + "description": "Set to 'show' to display current config without modifying it", + }, + "provider": { + "type": "string", + "description": "Embedding provider: 'ollama' or 'openai'", + }, + "openai_api_key": { + "type": "string", + "description": "OpenAI API key to store in config", + }, + "graph": { + "type": "string", + "description": "Named memory graph to use (default: 'default')", + }, + }, + }, + }, + { + "name": "memory_graphs", + "description": ( + "List all available memory graphs (named, segregated memory namespaces). " + "Returns each graph's name, path, and whether it exists on disk." + ), + "inputSchema": {"type": "object", "properties": {}}, + }, + ] + + +def handle_tool_call(tool_name: str, arguments: Dict[str, Any]) -> Dict[str, Any]: + """Dispatch MCP tool calls to the CognitiveMemoryClient.""" + + def ok(result: Any) -> Dict[str, Any]: + return { + "content": [ + {"type": "text", "text": json.dumps(result, indent=2, default=str)} + ] + } + + try: + graph = arguments.pop("graph", None) + client = get_client(graph) + + if tool_name == "memory_store": + mem_type = arguments["type"] + title = arguments["title"] + tags = arguments.get("tags") + memory_id = client.store( + type=mem_type, + title=title, + content=arguments["content"], + tags=tags, + importance=arguments.get("importance", 0.5), + ) + # Auto-log episode entry (default true, opt-out with episode=false) + episode_logged = False + if arguments.get("episode", True): + client.episode( + type=mem_type, + title=title, + tags=tags, + ) + episode_logged = True + _trigger_git_sync() + return ok( + { + "success": True, + "memory_id": memory_id, + "episode_logged": episode_logged, + } + ) + + elif tool_name == "memory_recall": + results = client.recall( + query=arguments["query"], + semantic=arguments.get("semantic", True), + limit=arguments.get("limit", 10), + ) + return ok(results) + + elif tool_name == "memory_get": + result = client.get(arguments["memory_id"]) + if result is None: + return ok({"error": f"Memory not found: {arguments['memory_id']}"}) + return ok(result) + + elif tool_name == "memory_search": + results = client.search( + query=arguments.get("query"), + memory_types=arguments.get("memory_types"), + tags=arguments.get("tags"), + min_importance=arguments.get("min_importance"), + ) + return ok(results) + + elif tool_name == "memory_relate": + edge_id = client.relate( + from_id=arguments["from_id"], + to_id=arguments["to_id"], + rel_type=arguments["rel_type"], + description=arguments.get("description"), + strength=arguments.get("strength", 0.8), + ) + if edge_id: + _trigger_git_sync() + return ok({"success": True, "edge_id": edge_id}) + return ok({"success": False, "message": "Relationship already exists"}) + + elif tool_name == "memory_related": + results = client.related( + memory_id=arguments["memory_id"], + rel_types=arguments.get("rel_types"), + max_depth=arguments.get("max_depth", 1), + ) + return ok(results) + + elif tool_name == "memory_edge_get": + result = client.edge_get(arguments["edge_id"]) + if result is None: + return ok({"error": f"Edge not found: {arguments['edge_id']}"}) + return ok(result) + + elif tool_name == "memory_edge_search": + results = client.edge_search( + query=arguments.get("query"), + types=arguments.get("types"), + from_id=arguments.get("from_id"), + to_id=arguments.get("to_id"), + ) + return ok(results) + + elif tool_name == "memory_reflect": + result = client.reflect( + since=arguments.get("since"), + dry_run=arguments.get("dry_run", False), + ) + return ok(result) + + elif tool_name == "memory_reflection": + text = client.reflection_summary() + return ok({"content": text}) + + elif tool_name == "memory_stats": + result = client.stats() + return ok(result) + + elif tool_name == "memory_episode": + client.episode( + type=arguments["type"], + title=arguments["title"], + tags=arguments.get("tags"), + summary=arguments.get("summary"), + ) + return ok({"success": True}) + + elif tool_name == "memory_tags_list": + results = client.tags_list(limit=arguments.get("limit", 0)) + return ok(results) + + elif tool_name == "memory_tags_related": + results = client.tags_related( + tag=arguments["tag"], + limit=arguments.get("limit", 0), + ) + return ok(results) + + elif tool_name == "memory_embed": + result = client.embed() + return ok(result) + + elif tool_name == "memory_core": + text = client.core() + return ok({"content": text}) + + elif tool_name == "memory_decay": + result = client.decay() + return ok(result) + + elif tool_name == "memory_config": + config_path = client.memory_dir / "_config.json" + config = _load_memory_config(config_path) + changed = False + + provider = arguments.get("provider") + openai_api_key = arguments.get("openai_api_key") + + if provider: + config["embedding_provider"] = provider + changed = True + if openai_api_key: + config["openai_api_key"] = openai_api_key + changed = True + + if changed: + config_path.write_text(json.dumps(config, indent=2)) + return ok({"success": True, "updated": True}) + else: + # Show config with masked API key + display = dict(config) + key = display.get("openai_api_key") + if key and isinstance(key, str) and len(key) > 8: + display["openai_api_key"] = key[:4] + "..." + key[-4:] + return ok(display) + + elif tool_name == "memory_graphs": + graphs = list_graphs() + # Enrich with existence check and memory count + for g in graphs: + p = Path(g["path"]) + g["exists"] = p.exists() + if g["exists"]: + index_path = p / "_index.json" + if index_path.exists(): + try: + idx = json.loads(index_path.read_text()) + g["memory_count"] = len(idx.get("entries", {})) + except (json.JSONDecodeError, OSError): + g["memory_count"] = 0 + else: + g["memory_count"] = 0 + return ok(graphs) + + else: + return { + "content": [{"type": "text", "text": f"Unknown tool: {tool_name}"}], + "isError": True, + } + + except Exception as e: + return { + "content": [{"type": "text", "text": f"Error: {str(e)}"}], + "isError": True, + } + + +def main(): + """MCP stdio server main loop (JSON-RPC 2.0).""" + + tools = create_tools() + + for line in sys.stdin: + line = line.strip() + if not line: + continue + + try: + message = json.loads(line) + + if message.get("method") == "initialize": + response = { + "jsonrpc": "2.0", + "id": message.get("id"), + "result": { + "protocolVersion": "2024-11-05", + "capabilities": {"tools": {}}, + "serverInfo": { + "name": "cognitive-memory-mcp-server", + "version": "3.1.0", + }, + }, + } + print(json.dumps(response), flush=True) + + elif message.get("method") == "tools/list": + response = { + "jsonrpc": "2.0", + "id": message.get("id"), + "result": {"tools": tools}, + } + print(json.dumps(response), flush=True) + + elif message.get("method") == "tools/call": + params = message.get("params", {}) + tool_name = params.get("name") + arguments = params.get("arguments", {}) + + result = handle_tool_call(tool_name, arguments) + + response = {"jsonrpc": "2.0", "id": message.get("id"), "result": result} + print(json.dumps(response), flush=True) + + elif message.get("method") == "notifications/initialized": + # Acknowledge but no response required for notifications + pass + + except Exception as e: + error_response = { + "jsonrpc": "2.0", + "id": message.get("id") if "message" in locals() else None, + "error": {"code": -32603, "message": str(e)}, + } + print(json.dumps(error_response), flush=True) + + +if __name__ == "__main__": + main() diff --git a/scripts/edge-proposer.py b/scripts/edge-proposer.py new file mode 100644 index 0000000..813e48a --- /dev/null +++ b/scripts/edge-proposer.py @@ -0,0 +1,427 @@ +#!/usr/bin/env python3 +"""Analyze cognitive memories and propose high-quality edges. + +Reads all active/fading memories (decay >= 0.2), groups by shared tags, +and scores candidate relationships based on three signals: + - Type heuristics (40%): e.g. fix+problem → SOLVES, decision+solution → BUILDS_ON + - Tag overlap (30%): Jaccard similarity of tag sets + - Content similarity (30%): Keyword overlap in memory body text + +Outputs ranked proposals to stdout and saves top 80 as JSON for +programmatic use by Claude Code sessions. + +Usage: + python3 edge-proposer.py + + # Then review stdout output, pick good candidates, and create edges via: + # MCP: memory_relate(from_id, to_id, rel_type, description, strength) + # CLI: claude-memory relate --description "..." + +Output: + - Ranked candidates printed to stdout (score, type, titles, shared tags) + - JSON file saved to ~/.claude/tmp/edge-candidates.json + +Scoring: + - Minimum threshold: 0.15 (below this, candidates are discarded) + - Importance boost: 1.2x multiplier when avg importance >= 0.7 + - Session-log memories tend to produce noise — review FOLLOWS edges carefully + +Relation types available: + SOLVES, CAUSES, BUILDS_ON, ALTERNATIVE_TO, REQUIRES, FOLLOWS, RELATED_TO + +First run: 2026-02-19 — produced 5186 candidates from 473 memories, + 20 high-quality edges were manually selected and created. +""" + +import json +import os +import re +from pathlib import Path +from collections import defaultdict +from itertools import combinations + +# Resolve data directory: COGNITIVE_MEMORY_DIR > XDG_DATA_HOME > default +_env_dir = os.environ.get("COGNITIVE_MEMORY_DIR", "") +if _env_dir: + MEMORY_DIR = Path(_env_dir).expanduser() +else: + _xdg_data = os.environ.get("XDG_DATA_HOME", "") or str( + Path.home() / ".local" / "share" + ) + MEMORY_DIR = Path(_xdg_data) / "cognitive-memory" + +STATE_FILE = MEMORY_DIR / "_state.json" +GRAPH_DIR = MEMORY_DIR / "graph" +EDGES_DIR = GRAPH_DIR / "edges" + +# Type-based heuristics: (type_a, type_b) -> (suggested_rel, direction, base_score) +# direction: "ab" means a->b, "ba" means b->a +TYPE_HEURISTICS = { + ("fix", "problem"): ("SOLVES", "ab", 0.6), + ("solution", "problem"): ("SOLVES", "ab", 0.7), + ("solution", "error"): ("SOLVES", "ab", 0.6), + ("fix", "error"): ("SOLVES", "ab", 0.6), + ("decision", "solution"): ("BUILDS_ON", "ab", 0.3), + ("decision", "decision"): ("ALTERNATIVE_TO", None, 0.2), + ("solution", "solution"): ("BUILDS_ON", None, 0.2), + ("configuration", "solution"): ("REQUIRES", "ab", 0.3), + ("workflow", "configuration"): ("REQUIRES", "ab", 0.3), + ("insight", "solution"): ("BUILDS_ON", "ab", 0.4), + ("insight", "decision"): ("BUILDS_ON", "ab", 0.4), + ("fix", "fix"): ("FOLLOWS", None, 0.15), + ("fix", "solution"): ("BUILDS_ON", "ab", 0.2), + ("code_pattern", "solution"): ("BUILDS_ON", "ab", 0.3), + ("procedure", "workflow"): ("BUILDS_ON", "ab", 0.3), + ("configuration", "configuration"): ("RELATED_TO", None, 0.1), +} + + +def parse_frontmatter(filepath: Path) -> dict | None: + """Parse YAML frontmatter from a markdown file.""" + try: + text = filepath.read_text(encoding="utf-8") + except Exception: + return None + + if not text.startswith("---"): + return None + + end = text.find("---", 3) + if end == -1: + return None + + fm = {} + body = text[end + 3 :].strip() + fm["_body"] = body[:500] # first 500 chars of content for matching + fm["_filepath"] = str(filepath) + + for line in text[3:end].strip().splitlines(): + if ":" not in line: + continue + key, _, val = line.partition(":") + key = key.strip() + val = val.strip().strip('"').strip("'") + + if key == "tags": + # Handle both [a, b] and "a, b" formats + val = val.strip("[]") + fm["tags"] = [ + t.strip().strip('"').strip("'") for t in val.split(",") if t.strip() + ] + elif key == "importance": + try: + fm["importance"] = float(val) + except ValueError: + pass + else: + fm[key] = val + + return fm + + +def load_memories() -> dict[str, dict]: + """Load all memories from graph subdirectories.""" + memories = {} + type_dirs = [ + "solutions", + "fixes", + "decisions", + "configurations", + "problems", + "workflows", + "code-patterns", + "errors", + "general", + "procedures", + "insights", + ] + + for type_dir in type_dirs: + dirpath = GRAPH_DIR / type_dir + if not dirpath.exists(): + continue + for f in dirpath.glob("*.md"): + fm = parse_frontmatter(f) + if fm and "id" in fm: + memories[fm["id"]] = fm + + return memories + + +def load_existing_edges() -> set[tuple[str, str]]: + """Load existing edges to avoid duplicates.""" + existing = set() + + if not EDGES_DIR.exists(): + return existing + + for f in EDGES_DIR.glob("*.md"): + fm = parse_frontmatter(f) + if fm and "from_id" in fm and "to_id" in fm: + existing.add((fm["from_id"], fm["to_id"])) + existing.add((fm["to_id"], fm["from_id"])) # bidirectional check + + return existing + + +def load_decay_state() -> dict[str, float]: + """Load decay scores from state file.""" + if not STATE_FILE.exists(): + return {} + try: + state = json.loads(STATE_FILE.read_text()) + return {mid: info.get("decay_score", 0) for mid, info in state.items()} + except Exception: + return {} + + +def tag_overlap_score(tags_a: list[str], tags_b: list[str]) -> float: + """Jaccard similarity of tag sets.""" + if not tags_a or not tags_b: + return 0.0 + set_a, set_b = set(tags_a), set(tags_b) + intersection = set_a & set_b + union = set_a | set_b + return len(intersection) / len(union) if union else 0.0 + + +def content_keyword_overlap(body_a: str, body_b: str) -> float: + """Simple keyword overlap between content bodies.""" + if not body_a or not body_b: + return 0.0 + + def extract_keywords(text: str) -> set[str]: + words = re.findall(r"[a-zA-Z_]{4,}", text.lower()) + # Filter common words + stopwords = { + "that", + "this", + "with", + "from", + "have", + "been", + "were", + "they", + "their", + "will", + "would", + "could", + "should", + "which", + "where", + "when", + "what", + "about", + "into", + "also", + "more", + "some", + "then", + "than", + "each", + "only", + "used", + "using", + "after", + "before", + "because", + "between", + "through", + "during", + "added", + "updated", + "fixed", + "error", + "issue", + "problem", + "solution", + "memory", + "memories", + "configuration", + "successfully", + "working", + "works", + } + return {w for w in words if w not in stopwords} + + kw_a = extract_keywords(body_a) + kw_b = extract_keywords(body_b) + + if not kw_a or not kw_b: + return 0.0 + + intersection = kw_a & kw_b + union = kw_a | kw_b + return len(intersection) / len(union) if union else 0.0 + + +def get_type_heuristic( + type_a: str, type_b: str +) -> tuple[str, str | None, float] | None: + """Look up type-based heuristic, checking both orderings.""" + key = (type_a, type_b) + if key in TYPE_HEURISTICS: + rel, direction, score = TYPE_HEURISTICS[key] + return rel, direction, score + + key_rev = (type_b, type_a) + if key_rev in TYPE_HEURISTICS: + rel, direction, score = TYPE_HEURISTICS[key_rev] + # Flip direction + if direction == "ab": + direction = "ba" + elif direction == "ba": + direction = "ab" + return rel, direction, score + + return None + + +def score_pair(mem_a: dict, mem_b: dict) -> dict | None: + """Score a candidate edge between two memories.""" + tags_a = mem_a.get("tags", []) + tags_b = mem_b.get("tags", []) + + # Must share at least one tag + shared_tags = set(tags_a) & set(tags_b) + if not shared_tags: + return None + + tag_score = tag_overlap_score(tags_a, tags_b) + content_score = content_keyword_overlap( + mem_a.get("_body", ""), mem_b.get("_body", "") + ) + + type_a = mem_a.get("type", "general") + type_b = mem_b.get("type", "general") + + heuristic = get_type_heuristic(type_a, type_b) + if heuristic: + suggested_rel, direction, type_score = heuristic + else: + suggested_rel = "RELATED_TO" + direction = None + type_score = 0.05 + + # Composite score + total = (tag_score * 0.4) + (content_score * 0.3) + (type_score * 0.3) + + # Boost for high importance memories + imp_a = mem_a.get("importance", 0.5) + imp_b = mem_b.get("importance", 0.5) + if isinstance(imp_a, str): + imp_a = float(imp_a) + if isinstance(imp_b, str): + imp_b = float(imp_b) + avg_importance = (imp_a + imp_b) / 2 + if avg_importance >= 0.7: + total *= 1.2 + + if total < 0.15: + return None + + # Determine from/to based on direction + if direction == "ab": + from_mem, to_mem = mem_a, mem_b + elif direction == "ba": + from_mem, to_mem = mem_b, mem_a + else: + # Default: higher importance is "from" + if imp_a >= imp_b: + from_mem, to_mem = mem_a, mem_b + else: + from_mem, to_mem = mem_b, mem_a + + return { + "score": round(total, 3), + "rel_type": suggested_rel, + "from_id": from_mem["id"], + "from_title": from_mem.get("title", "?"), + "from_type": from_mem.get("type", "?"), + "to_id": to_mem["id"], + "to_title": to_mem.get("title", "?"), + "to_type": to_mem.get("type", "?"), + "shared_tags": sorted(shared_tags), + "tag_score": round(tag_score, 3), + "content_score": round(content_score, 3), + "type_score": round(type_score, 3), + } + + +def main(): + print("Loading memories...") + memories = load_memories() + print(f" Found {len(memories)} memories") + + print("Loading decay state...") + decay_scores = load_decay_state() + + # Filter to active + fading only (decay >= 0.2) + active_ids = { + mid for mid, score in decay_scores.items() if score >= 0.2 and mid in memories + } + # Also include memories without decay state (new) + for mid in memories: + if mid not in decay_scores: + active_ids.add(mid) + + active_memories = {mid: memories[mid] for mid in active_ids} + print(f" {len(active_memories)} active/fading memories to analyze") + + print("Loading existing edges...") + existing = load_existing_edges() + print(f" {len(existing) // 2} existing edges") + + print("Scoring candidate pairs...") + candidates = [] + + # Group by shared tags first to reduce pair space + tag_groups = defaultdict(set) + for mid, mem in active_memories.items(): + for tag in mem.get("tags", []): + tag_groups[tag].add(mid) + + # Collect unique pairs that share at least one tag + seen_pairs = set() + for tag, mids in tag_groups.items(): + if len(mids) < 2 or len(mids) > 50: # skip too-common tags + continue + for a, b in combinations(mids, 2): + pair = tuple(sorted([a, b])) + if pair in seen_pairs: + continue + if (a, b) in existing or (b, a) in existing: + continue + seen_pairs.add(pair) + + result = score_pair(active_memories[a], active_memories[b]) + if result: + candidates.append(result) + + # Sort by score descending + candidates.sort(key=lambda x: x["score"], reverse=True) + + print(f"\n{'='*100}") + print(f"TOP EDGE CANDIDATES ({len(candidates)} total, showing top 80)") + print(f"{'='*100}\n") + + for i, c in enumerate(candidates[:80], 1): + print(f"#{i:3d} | Score: {c['score']:.3f} | {c['rel_type']}") + print(f" FROM [{c['from_type']}] {c['from_title']}") + print(f" TO [{c['to_type']}] {c['to_title']}") + print( + f" Tags: {', '.join(c['shared_tags'])} | " + f"tag={c['tag_score']:.2f} content={c['content_score']:.2f} type={c['type_score']:.2f}" + ) + print(f" IDs: {c['from_id']} -> {c['to_id']}") + print() + + # Also output as JSON for programmatic use + json_path = MEMORY_DIR.parent / "tmp" / "edge-candidates.json" + json_path.parent.mkdir(parents=True, exist_ok=True) + json_path.write_text(json.dumps(candidates[:80], indent=2)) + print(f"Full candidates saved to: {json_path}") + + +if __name__ == "__main__": + main() diff --git a/scripts/memory-git-sync.sh b/scripts/memory-git-sync.sh new file mode 100755 index 0000000..a4ce16e --- /dev/null +++ b/scripts/memory-git-sync.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# Commit and push cognitive memory changes to Gitea (cal/claude-memory) +# +# Called daily by cognitive-memory-daily.service after decay/core/symlinks. +# Only commits if there are actual changes. Safe to run multiple times. +# +# Location: ~/.claude/skills/cognitive-memory/scripts/memory-git-sync.sh +# Repo: cognitive-memory data dir -> https://git.manticorum.com/cal/claude-memory.git + +set -euo pipefail + +# Resolve data directory: COGNITIVE_MEMORY_DIR > XDG_DATA_HOME > default +MEMORY_DIR="${COGNITIVE_MEMORY_DIR:-${XDG_DATA_HOME:-$HOME/.local/share}/cognitive-memory}" + +cd "$MEMORY_DIR" + +# Check if there are any changes to commit +if git diff --quiet && git diff --cached --quiet && [ -z "$(git ls-files --others --exclude-standard)" ]; then + echo "memory-git-sync: no changes to commit" + exit 0 +fi + +# Stage all changes (.gitignore handles exclusions) +git add -A + +# Build a commit message from what changed +ADDED=$(git diff --cached --name-only --diff-filter=A | wc -l) +MODIFIED=$(git diff --cached --name-only --diff-filter=M | wc -l) +DELETED=$(git diff --cached --name-only --diff-filter=D | wc -l) +EDGES=$(git diff --cached --name-only --diff-filter=ACM | grep -c '^graph/edges/' || true) + +MSG="daily sync: ${ADDED} added, ${MODIFIED} modified, ${DELETED} deleted" +if [ "$EDGES" -gt 0 ]; then + MSG="$MSG (${EDGES} edges)" +fi + +git commit -m "$MSG" --no-gpg-sign 2>/dev/null || { + echo "memory-git-sync: commit failed (pre-commit hook?)" + exit 1 +} + +git push origin main 2>/dev/null || { + echo "memory-git-sync: push failed" + exit 1 +} + +echo "memory-git-sync: pushed to origin/main" diff --git a/scripts/session_memory.py b/scripts/session_memory.py new file mode 100755 index 0000000..32cf86e --- /dev/null +++ b/scripts/session_memory.py @@ -0,0 +1,587 @@ +#!/usr/bin/env python3 +""" +Session-end memory hook for Claude Code. + +Reads the session transcript, extracts significant events (commits, bug fixes, +architecture decisions, new patterns, configurations), and stores them as +cognitive memories via claude-memory CLI. +""" + +import json +import re +import subprocess +import sys +from datetime import datetime +from pathlib import Path + +LOG_FILE = Path("/tmp/session-memory-hook.log") + + +def log(msg: str): + """Append a timestamped message to the hook log file.""" + with open(LOG_FILE, "a") as f: + f.write(f"{datetime.now().isoformat(timespec='seconds')} {msg}\n") + + +def log_separator(): + """Write a visual separator to the log for readability between sessions.""" + with open(LOG_FILE, "a") as f: + f.write(f"\n{'='*72}\n") + f.write( + f" SESSION MEMORY HOOK — {datetime.now().isoformat(timespec='seconds')}\n" + ) + f.write(f"{'='*72}\n") + + +def read_stdin(): + """Read the hook input JSON from stdin.""" + try: + raw = sys.stdin.read() + log(f"[stdin] Raw input length: {len(raw)} chars") + data = json.loads(raw) + log(f"[stdin] Parsed keys: {list(data.keys())}") + return data + except (json.JSONDecodeError, EOFError) as e: + log(f"[stdin] ERROR: Failed to parse input: {e}") + return {} + + +def read_transcript(transcript_path: str) -> list[dict]: + """Read JSONL transcript file into a list of normalized message dicts. + + Claude Code transcripts use a wrapper format where each line is: + {"type": "user"|"assistant"|..., "message": {"role": ..., "content": ...}, ...} + This function unwraps them into the inner {"role": ..., "content": ...} dicts + that the rest of the code expects. Non-message entries (like file-history-snapshot) + are filtered out. + """ + messages = [] + path = Path(transcript_path) + if not path.exists(): + log(f"[transcript] ERROR: File does not exist: {transcript_path}") + return messages + file_size = path.stat().st_size + log(f"[transcript] Reading {transcript_path} ({file_size} bytes)") + parse_errors = 0 + skipped_types = {} + line_num = 0 + with open(path) as f: + for line_num, line in enumerate(f, 1): + line = line.strip() + if not line: + continue + try: + raw = json.loads(line) + except json.JSONDecodeError: + parse_errors += 1 + continue + + # Claude Code transcript format: wrapper with "type" and "message" keys + # Unwrap to get the inner message dict with "role" and "content" + if "message" in raw and isinstance(raw["message"], dict): + inner = raw["message"] + # Carry over the wrapper type for logging + wrapper_type = raw.get("type", "unknown") + if "role" not in inner: + inner["role"] = wrapper_type + messages.append(inner) + elif "role" in raw: + # Already in the expected format (future-proofing) + messages.append(raw) + else: + # Non-message entry (file-history-snapshot, etc.) + entry_type = raw.get("type", "unknown") + skipped_types[entry_type] = skipped_types.get(entry_type, 0) + 1 + + if parse_errors: + log(f"[transcript] WARNING: {parse_errors} lines failed to parse") + if skipped_types: + log(f"[transcript] Skipped non-message entries: {skipped_types}") + log(f"[transcript] Loaded {len(messages)} messages from {line_num} lines") + + # Log role breakdown + role_counts = {} + for msg in messages: + role = msg.get("role", "unknown") + role_counts[role] = role_counts.get(role, 0) + 1 + log(f"[transcript] Role breakdown: {role_counts}") + + return messages + + +def _is_memory_tool_use(block: dict) -> str | None: + """Check if a tool_use block is a memory operation. + + Detects both CLI calls (Bash with 'claude-memory') and MCP tool calls + (mcp__cognitive-memory__memory_*). Returns a short description of the + match or None. + """ + name = block.get("name", "") + + # MCP tool calls: mcp__cognitive-memory__memory_store, memory_recall, etc. + if name.startswith("mcp__cognitive-memory__memory_"): + return f"MCP:{name}" + + # Legacy/CLI: Bash commands containing 'claude-memory' + if name == "Bash": + cmd = block.get("input", {}).get("command", "") + if "claude-memory" in cmd: + return f"CLI:{cmd[:100]}" + + return None + + +def find_last_memory_command_index(messages: list[dict]) -> int: + """Find the index of the last message containing a memory operation. + + Scans for both MCP tool calls (mcp__cognitive-memory__memory_*) and + Bash tool_use blocks where the command contains 'claude-memory'. + Returns the index of that message so we can slice the transcript to + only process messages after the last memory operation, avoiding + duplicate storage. + + Returns -1 if no memory operations were found. + """ + last_index = -1 + found_commands = [] + for i, msg in enumerate(messages): + if msg.get("role") != "assistant": + continue + content = msg.get("content", []) + if not isinstance(content, list): + continue + for block in content: + if not isinstance(block, dict): + continue + if block.get("type") != "tool_use": + continue + match = _is_memory_tool_use(block) + if match: + last_index = i + found_commands.append(f"msg[{i}]: {match}") + if found_commands: + log(f"[cutoff] Found {len(found_commands)} memory operations:") + for fc in found_commands: + log(f"[cutoff] {fc}") + log(f"[cutoff] Will slice after message index {last_index}") + else: + log("[cutoff] No memory operations found — processing full transcript") + return last_index + + +def extract_text_content(message: dict) -> str: + """Extract plain text from a message's content blocks.""" + content = message.get("content", "") + if isinstance(content, str): + return content + if isinstance(content, list): + parts = [] + for block in content: + if isinstance(block, dict): + if block.get("type") == "text": + parts.append(block.get("text", "")) + elif block.get("type") == "tool_result": + # Recurse into tool result content + sub = block.get("content", "") + if isinstance(sub, str): + parts.append(sub) + elif isinstance(sub, list): + for sb in sub: + if isinstance(sb, dict) and sb.get("type") == "text": + parts.append(sb.get("text", "")) + elif isinstance(block, str): + parts.append(block) + return "\n".join(parts) + return "" + + +def extract_tool_uses(messages: list[dict]) -> list[dict]: + """Extract all tool_use blocks from assistant messages.""" + tool_uses = [] + for msg in messages: + if msg.get("role") != "assistant": + continue + content = msg.get("content", []) + if not isinstance(content, list): + continue + for block in content: + if isinstance(block, dict) and block.get("type") == "tool_use": + tool_uses.append(block) + + # Log tool use breakdown + tool_counts = {} + for tu in tool_uses: + name = tu.get("name", "unknown") + tool_counts[name] = tool_counts.get(name, 0) + 1 + log(f"[tools] Extracted {len(tool_uses)} tool uses: {tool_counts}") + + return tool_uses + + +def find_git_commits(tool_uses: list[dict]) -> list[str]: + """Find git commit commands from Bash tool uses.""" + commits = [] + for tu in tool_uses: + if tu.get("name") != "Bash": + continue + cmd = tu.get("input", {}).get("command", "") + if "git commit" in cmd: + commits.append(cmd) + log(f"[commits] Found {len(commits)} git commit commands") + return commits + + +def find_files_edited(tool_uses: list[dict]) -> set[str]: + """Find unique files edited via Edit/Write tools.""" + files = set() + for tu in tool_uses: + name = tu.get("name", "") + if name in ("Edit", "Write", "MultiEdit"): + fp = tu.get("input", {}).get("file_path", "") + if fp: + files.add(fp) + log(f"[files] Found {len(files)} edited files:") + for f in sorted(files): + log(f"[files] {f}") + return files + + +def find_errors_encountered(messages: list[dict]) -> list[str]: + """Find error messages from tool results.""" + errors = [] + for msg in messages: + if msg.get("role") != "user": + continue + content = msg.get("content", []) + if not isinstance(content, list): + continue + for block in content: + if not isinstance(block, dict): + continue + if block.get("type") == "tool_result" and block.get("is_error"): + error_text = extract_text_content({"content": block.get("content", "")}) + if error_text and len(error_text) > 10: + errors.append(error_text[:500]) + log(f"[errors] Found {len(errors)} error tool results") + return errors + + +def detect_project(cwd: str, files_edited: set[str]) -> str: + """Detect project name from cwd and edited files.""" + all_paths = [cwd] + list(files_edited) + project_indicators = { + "major-domo": "major-domo", + "paper-dynasty": "paper-dynasty", + "claude-home": "homelab", + "homelab": "homelab", + ".claude": "claude-config", + "openclaw": "openclaw", + "tdarr": "tdarr", + } + for path in all_paths: + for indicator, project in project_indicators.items(): + if indicator in path.lower(): + log( + f"[project] Detected '{project}' from path containing '{indicator}': {path}" + ) + return project + # Fall back to last directory component of cwd + fallback = Path(cwd).name + log(f"[project] No indicator matched, falling back to cwd name: {fallback}") + return fallback + + +def build_session_summary(messages: list[dict], cwd: str) -> dict | None: + """Analyze the transcript and build a summary of storable events.""" + log(f"[summary] Building summary from {len(messages)} messages, cwd={cwd}") + + if len(messages) < 4: + log(f"[summary] SKIP: only {len(messages)} messages, need at least 4") + return "too_short" + + tool_uses = extract_tool_uses(messages) + commits = find_git_commits(tool_uses) + files_edited = find_files_edited(tool_uses) + errors = find_errors_encountered(messages) + project = detect_project(cwd, files_edited) + + # Collect assistant text for topic extraction + assistant_texts = [] + for msg in messages: + if msg.get("role") == "assistant": + text = extract_text_content(msg) + if text: + assistant_texts.append(text) + + full_assistant_text = "\n".join(assistant_texts) + log( + f"[summary] Assistant text: {len(full_assistant_text)} chars from {len(assistant_texts)} messages" + ) + + # Detect what kind of work was done + work_types = set() + keyword_checks = { + "commit": lambda: bool(commits), + "debugging": lambda: bool(errors), + "testing": lambda: any("test" in f.lower() for f in files_edited), + "fix": lambda: any( + kw in full_assistant_text.lower() for kw in ["bug", "fix", "error", "issue"] + ), + "refactoring": lambda: any( + kw in full_assistant_text.lower() + for kw in ["refactor", "restructure", "reorganize"] + ), + "feature": lambda: any( + kw in full_assistant_text.lower() + for kw in ["new feature", "implement", "add support"] + ), + "deployment": lambda: any( + kw in full_assistant_text.lower() + for kw in ["deploy", "production", "release"] + ), + "configuration": lambda: any( + kw in full_assistant_text.lower() + for kw in ["config", "setup", "install", "configure"] + ), + "automation": lambda: any( + kw in full_assistant_text.lower() for kw in ["hook", "script", "automat"] + ), + "tooling": lambda: any( + kw in full_assistant_text.lower() + for kw in [ + "skill", + "command", + "slash command", + "commit-push", + "claude code command", + ] + ), + "creation": lambda: any( + kw in full_assistant_text.lower() + for kw in ["create a ", "created", "new file", "wrote a"] + ), + } + + for work_type, check_fn in keyword_checks.items(): + matched = check_fn() + if matched: + work_types.add(work_type) + log(f"[work_type] MATCH: {work_type}") + else: + log(f"[work_type] no match: {work_type}") + + if not work_types and not files_edited: + log("[summary] SKIP: no work types detected and no files edited") + # Log a snippet of assistant text to help debug missed keywords + snippet = full_assistant_text[:500].replace("\n", " ") + log(f"[summary] Assistant text preview: {snippet}") + return "no_work" + + log( + f"[summary] Result: project={project}, work_types={sorted(work_types)}, " + f"commits={len(commits)}, files={len(files_edited)}, errors={len(errors)}" + ) + + return { + "project": project, + "work_types": work_types, + "commits": commits, + "files_edited": sorted(files_edited), + "errors": errors[:5], # Cap at 5 + "assistant_text_snippet": full_assistant_text[:3000], + "message_count": len(messages), + "tool_use_count": len(tool_uses), + } + + +def build_memory_content(summary: dict) -> str: + """Build a concise memory content string from the summary.""" + parts = [] + + if summary["commits"]: + parts.append(f"Commits made: {len(summary['commits'])}") + for c in summary["commits"][:3]: + msg = extract_commit_message(c) + if msg: + parts.append(f" - {msg}") + + if summary["files_edited"]: + parts.append(f"Files edited ({len(summary['files_edited'])}):") + for f in summary["files_edited"][:10]: + parts.append(f" - {f}") + + if summary["errors"]: + parts.append(f"Errors encountered ({len(summary['errors'])}):") + for e in summary["errors"][:3]: + parts.append(f" - {e[:150]}") + + work_desc = ", ".join(sorted(summary["work_types"])) + parts.append(f"Work types: {work_desc}") + parts.append( + f"Session size: {summary['message_count']} messages, {summary['tool_use_count']} tool calls" + ) + + return "\n".join(parts) + + +def determine_memory_type(summary: dict) -> str: + """Pick the best memory type based on work done.""" + wt = summary["work_types"] + if "fix" in wt or "debugging" in wt: + return "fix" + if "configuration" in wt: + return "configuration" + if "feature" in wt: + return "workflow" + if "refactoring" in wt: + return "code_pattern" + if "deployment" in wt: + return "workflow" + if "automation" in wt or "tooling" in wt: + return "workflow" + if "creation" in wt: + return "workflow" + return "general" + + +def extract_commit_message(commit_cmd: str) -> str | None: + """Extract the commit message from a git commit command string. + + Handles both simple quoted (-m "msg") and heredoc (-m "$(cat <<'EOF'...EOF)") + formats. Tries heredoc first since that's the standard Claude Code format. + """ + # Try heredoc format first (standard Claude Code format) + match = re.search(r"<<'?EOF'?\n(.+?)(?:\nEOF|\n\s*EOF)", commit_cmd, re.DOTALL) + if match: + # Get first non-empty line as the message + for line in match.group(1).strip().split("\n"): + line = line.strip() + if line and not line.startswith("Co-Authored-By:"): + return line[:200] + + # Fall back to simple quoted message (matching same quote type) + match = re.search(r'-m\s+"([^"]+)"', commit_cmd) + if not match: + match = re.search(r"-m\s+'([^']+)'", commit_cmd) + if match: + return match.group(1).split("\n")[0][:200] + + return None + + +def build_title(summary: dict) -> str: + """Generate a descriptive title for the memory.""" + project = summary["project"] + work = ", ".join(sorted(summary["work_types"])) + if summary["commits"]: + msg = extract_commit_message(summary["commits"][0]) + if msg: + return f"[{project}] {msg}" + return f"[{project}] Session: {work}" + + +def store_memory(summary: dict): + """Store the session memory via claude-memory CLI.""" + title = build_title(summary) + content = build_memory_content(summary) + mem_type = determine_memory_type(summary) + importance = "0.4" + + # Boost importance for commits or significant work + if summary["commits"]: + importance = "0.6" + if len(summary["files_edited"]) > 5: + importance = "0.6" + if "deployment" in summary["work_types"]: + importance = "0.7" + + # Build tags + tags = [summary["project"]] + tags.extend(sorted(summary["work_types"])) + tags.append("session-log") + tag_str = ",".join(tags) + + cmd = [ + "claude-memory", + "store", + "--type", + mem_type, + "--title", + title, + "--content", + content, + "--tags", + tag_str, + "--importance", + importance, + "--episode", + ] + + log(f"[store] Memory type: {mem_type}, importance: {importance}") + log(f"[store] Title: {title}") + log(f"[store] Tags: {tag_str}") + log(f"[store] Content length: {len(content)} chars") + log(f"[store] Command: {' '.join(cmd)}") + + try: + result = subprocess.run(cmd, capture_output=True, text=True, timeout=10) + if result.returncode == 0: + log(f"[store] SUCCESS: {title}") + if result.stdout.strip(): + log(f"[store] stdout: {result.stdout.strip()[:200]}") + else: + log(f"[store] FAILED (rc={result.returncode}): {result.stderr.strip()}") + if result.stdout.strip(): + log(f"[store] stdout: {result.stdout.strip()[:200]}") + except subprocess.TimeoutExpired: + log("[store] FAILED: claude-memory timed out after 10s") + except FileNotFoundError: + log("[store] FAILED: claude-memory command not found in PATH") + except Exception as e: + log(f"[store] FAILED: {type(e).__name__}: {e}") + + +def main(): + log_separator() + + hook_input = read_stdin() + transcript_path = hook_input.get("transcript_path", "") + cwd = hook_input.get("cwd", "") + + log(f"[main] cwd: {cwd}") + log(f"[main] transcript_path: {transcript_path}") + + if not transcript_path: + log("[main] ABORT: no transcript path provided") + sys.exit(0) + + messages = read_transcript(transcript_path) + if not messages: + log("[main] ABORT: empty transcript") + sys.exit(0) + + total_messages = len(messages) + + # Only process messages after the last claude-memory command to avoid + # duplicating memories that were already stored during the session. + cutoff = find_last_memory_command_index(messages) + if cutoff >= 0: + messages = messages[cutoff + 1 :] + log(f"[main] After cutoff: {len(messages)} of {total_messages} messages remain") + if not messages: + log("[main] ABORT: no new messages after last claude-memory command") + sys.exit(0) + else: + log(f"[main] Processing all {total_messages} messages (no cutoff)") + + summary = build_session_summary(messages, cwd) + if not isinstance(summary, dict): + log(f"[main] ABORT: build_session_summary returned '{summary}'") + sys.exit(0) + + store_memory(summary) + log("[main] Done") + + +if __name__ == "__main__": + main() diff --git a/systemd/README.md b/systemd/README.md new file mode 100644 index 0000000..349f642 --- /dev/null +++ b/systemd/README.md @@ -0,0 +1,34 @@ +# Cognitive Memory Systemd Timers + +Reference copies of the systemd user units that automate memory maintenance. + +## Services + +| Unit | Schedule | What it does | +|------|----------|-------------| +| `cognitive-memory-daily` | daily | Decay scores, regenerate CORE.md, refresh MEMORY.md symlinks | +| `cognitive-memory-embed` | hourly | Refresh embeddings (skips if unchanged) | +| `cognitive-memory-weekly` | weekly | Run reflection cycle | + +## Install / Update + +```bash +# Copy units into place +cp ~/.claude/skills/cognitive-memory/systemd/*.service \ + ~/.claude/skills/cognitive-memory/systemd/*.timer \ + ~/.config/systemd/user/ + +# Reload and enable +systemctl --user daemon-reload +systemctl --user enable --now cognitive-memory-daily.timer +systemctl --user enable --now cognitive-memory-embed.timer +systemctl --user enable --now cognitive-memory-weekly.timer +``` + +## Verify + +```bash +systemctl --user list-timers 'cognitive-memory-*' +systemctl --user start cognitive-memory-daily.service # manual test run +journalctl --user -u cognitive-memory-daily.service --since today +``` diff --git a/systemd/cognitive-memory-daily.service b/systemd/cognitive-memory-daily.service new file mode 100644 index 0000000..ce1d403 --- /dev/null +++ b/systemd/cognitive-memory-daily.service @@ -0,0 +1,6 @@ +[Unit] +Description=Cognitive Memory daily maintenance (decay, core, git sync) + +[Service] +Type=oneshot +ExecStart=/bin/bash -c 'export PATH="/home/cal/.local/bin:$PATH" && /home/cal/.local/bin/claude-memory decay && /home/cal/.local/bin/claude-memory core && /mnt/NV2/Development/cognitive-memory/scripts/memory-git-sync.sh' diff --git a/systemd/cognitive-memory-daily.timer b/systemd/cognitive-memory-daily.timer new file mode 100644 index 0000000..21d12e6 --- /dev/null +++ b/systemd/cognitive-memory-daily.timer @@ -0,0 +1,9 @@ +[Unit] +Description=Run cognitive memory daily maintenance + +[Timer] +OnCalendar=daily +Persistent=true + +[Install] +WantedBy=timers.target diff --git a/systemd/cognitive-memory-embed.service b/systemd/cognitive-memory-embed.service new file mode 100644 index 0000000..a2d8fb1 --- /dev/null +++ b/systemd/cognitive-memory-embed.service @@ -0,0 +1,6 @@ +[Unit] +Description=Cognitive Memory hourly embedding refresh (skips if unchanged) + +[Service] +Type=oneshot +ExecStart=/bin/bash -c 'export PATH="/home/cal/.local/bin:$PATH" && /home/cal/.local/bin/claude-memory embed --if-changed' diff --git a/systemd/cognitive-memory-embed.timer b/systemd/cognitive-memory-embed.timer new file mode 100644 index 0000000..c6bf5b8 --- /dev/null +++ b/systemd/cognitive-memory-embed.timer @@ -0,0 +1,9 @@ +[Unit] +Description=Run cognitive memory embedding refresh hourly + +[Timer] +OnCalendar=hourly +Persistent=true + +[Install] +WantedBy=timers.target diff --git a/systemd/cognitive-memory-weekly.service b/systemd/cognitive-memory-weekly.service new file mode 100644 index 0000000..eecd737 --- /dev/null +++ b/systemd/cognitive-memory-weekly.service @@ -0,0 +1,6 @@ +[Unit] +Description=Cognitive Memory weekly reflection + +[Service] +Type=oneshot +ExecStart=/home/cal/.local/bin/claude-memory reflect diff --git a/systemd/cognitive-memory-weekly.timer b/systemd/cognitive-memory-weekly.timer new file mode 100644 index 0000000..eb4d221 --- /dev/null +++ b/systemd/cognitive-memory-weekly.timer @@ -0,0 +1,9 @@ +[Unit] +Description=Run cognitive memory weekly reflection + +[Timer] +OnCalendar=weekly +Persistent=true + +[Install] +WantedBy=timers.target