Moved application code from ~/.claude/skills/cognitive-memory/ to its own project directory. The skill layer (SKILL.md, SCHEMA.md) remains in the skill directory for Claude Code to read. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
428 lines
13 KiB
Python
428 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""Analyze cognitive memories and propose high-quality edges.
|
|
|
|
Reads all active/fading memories (decay >= 0.2), groups by shared tags,
|
|
and scores candidate relationships based on three signals:
|
|
- Type heuristics (40%): e.g. fix+problem → SOLVES, decision+solution → BUILDS_ON
|
|
- Tag overlap (30%): Jaccard similarity of tag sets
|
|
- Content similarity (30%): Keyword overlap in memory body text
|
|
|
|
Outputs ranked proposals to stdout and saves top 80 as JSON for
|
|
programmatic use by Claude Code sessions.
|
|
|
|
Usage:
|
|
python3 edge-proposer.py
|
|
|
|
# Then review stdout output, pick good candidates, and create edges via:
|
|
# MCP: memory_relate(from_id, to_id, rel_type, description, strength)
|
|
# CLI: claude-memory relate <from_id> <to_id> <REL_TYPE> --description "..."
|
|
|
|
Output:
|
|
- Ranked candidates printed to stdout (score, type, titles, shared tags)
|
|
- JSON file saved to ~/.claude/tmp/edge-candidates.json
|
|
|
|
Scoring:
|
|
- Minimum threshold: 0.15 (below this, candidates are discarded)
|
|
- Importance boost: 1.2x multiplier when avg importance >= 0.7
|
|
- Session-log memories tend to produce noise — review FOLLOWS edges carefully
|
|
|
|
Relation types available:
|
|
SOLVES, CAUSES, BUILDS_ON, ALTERNATIVE_TO, REQUIRES, FOLLOWS, RELATED_TO
|
|
|
|
First run: 2026-02-19 — produced 5186 candidates from 473 memories,
|
|
20 high-quality edges were manually selected and created.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
from itertools import combinations
|
|
|
|
# Resolve data directory: COGNITIVE_MEMORY_DIR > XDG_DATA_HOME > default
|
|
_env_dir = os.environ.get("COGNITIVE_MEMORY_DIR", "")
|
|
if _env_dir:
|
|
MEMORY_DIR = Path(_env_dir).expanduser()
|
|
else:
|
|
_xdg_data = os.environ.get("XDG_DATA_HOME", "") or str(
|
|
Path.home() / ".local" / "share"
|
|
)
|
|
MEMORY_DIR = Path(_xdg_data) / "cognitive-memory"
|
|
|
|
STATE_FILE = MEMORY_DIR / "_state.json"
|
|
GRAPH_DIR = MEMORY_DIR / "graph"
|
|
EDGES_DIR = GRAPH_DIR / "edges"
|
|
|
|
# Type-based heuristics: (type_a, type_b) -> (suggested_rel, direction, base_score)
|
|
# direction: "ab" means a->b, "ba" means b->a
|
|
TYPE_HEURISTICS = {
|
|
("fix", "problem"): ("SOLVES", "ab", 0.6),
|
|
("solution", "problem"): ("SOLVES", "ab", 0.7),
|
|
("solution", "error"): ("SOLVES", "ab", 0.6),
|
|
("fix", "error"): ("SOLVES", "ab", 0.6),
|
|
("decision", "solution"): ("BUILDS_ON", "ab", 0.3),
|
|
("decision", "decision"): ("ALTERNATIVE_TO", None, 0.2),
|
|
("solution", "solution"): ("BUILDS_ON", None, 0.2),
|
|
("configuration", "solution"): ("REQUIRES", "ab", 0.3),
|
|
("workflow", "configuration"): ("REQUIRES", "ab", 0.3),
|
|
("insight", "solution"): ("BUILDS_ON", "ab", 0.4),
|
|
("insight", "decision"): ("BUILDS_ON", "ab", 0.4),
|
|
("fix", "fix"): ("FOLLOWS", None, 0.15),
|
|
("fix", "solution"): ("BUILDS_ON", "ab", 0.2),
|
|
("code_pattern", "solution"): ("BUILDS_ON", "ab", 0.3),
|
|
("procedure", "workflow"): ("BUILDS_ON", "ab", 0.3),
|
|
("configuration", "configuration"): ("RELATED_TO", None, 0.1),
|
|
}
|
|
|
|
|
|
def parse_frontmatter(filepath: Path) -> dict | None:
|
|
"""Parse YAML frontmatter from a markdown file."""
|
|
try:
|
|
text = filepath.read_text(encoding="utf-8")
|
|
except Exception:
|
|
return None
|
|
|
|
if not text.startswith("---"):
|
|
return None
|
|
|
|
end = text.find("---", 3)
|
|
if end == -1:
|
|
return None
|
|
|
|
fm = {}
|
|
body = text[end + 3 :].strip()
|
|
fm["_body"] = body[:500] # first 500 chars of content for matching
|
|
fm["_filepath"] = str(filepath)
|
|
|
|
for line in text[3:end].strip().splitlines():
|
|
if ":" not in line:
|
|
continue
|
|
key, _, val = line.partition(":")
|
|
key = key.strip()
|
|
val = val.strip().strip('"').strip("'")
|
|
|
|
if key == "tags":
|
|
# Handle both [a, b] and "a, b" formats
|
|
val = val.strip("[]")
|
|
fm["tags"] = [
|
|
t.strip().strip('"').strip("'") for t in val.split(",") if t.strip()
|
|
]
|
|
elif key == "importance":
|
|
try:
|
|
fm["importance"] = float(val)
|
|
except ValueError:
|
|
pass
|
|
else:
|
|
fm[key] = val
|
|
|
|
return fm
|
|
|
|
|
|
def load_memories() -> dict[str, dict]:
|
|
"""Load all memories from graph subdirectories."""
|
|
memories = {}
|
|
type_dirs = [
|
|
"solutions",
|
|
"fixes",
|
|
"decisions",
|
|
"configurations",
|
|
"problems",
|
|
"workflows",
|
|
"code-patterns",
|
|
"errors",
|
|
"general",
|
|
"procedures",
|
|
"insights",
|
|
]
|
|
|
|
for type_dir in type_dirs:
|
|
dirpath = GRAPH_DIR / type_dir
|
|
if not dirpath.exists():
|
|
continue
|
|
for f in dirpath.glob("*.md"):
|
|
fm = parse_frontmatter(f)
|
|
if fm and "id" in fm:
|
|
memories[fm["id"]] = fm
|
|
|
|
return memories
|
|
|
|
|
|
def load_existing_edges() -> set[tuple[str, str]]:
|
|
"""Load existing edges to avoid duplicates."""
|
|
existing = set()
|
|
|
|
if not EDGES_DIR.exists():
|
|
return existing
|
|
|
|
for f in EDGES_DIR.glob("*.md"):
|
|
fm = parse_frontmatter(f)
|
|
if fm and "from_id" in fm and "to_id" in fm:
|
|
existing.add((fm["from_id"], fm["to_id"]))
|
|
existing.add((fm["to_id"], fm["from_id"])) # bidirectional check
|
|
|
|
return existing
|
|
|
|
|
|
def load_decay_state() -> dict[str, float]:
|
|
"""Load decay scores from state file."""
|
|
if not STATE_FILE.exists():
|
|
return {}
|
|
try:
|
|
state = json.loads(STATE_FILE.read_text())
|
|
return {mid: info.get("decay_score", 0) for mid, info in state.items()}
|
|
except Exception:
|
|
return {}
|
|
|
|
|
|
def tag_overlap_score(tags_a: list[str], tags_b: list[str]) -> float:
|
|
"""Jaccard similarity of tag sets."""
|
|
if not tags_a or not tags_b:
|
|
return 0.0
|
|
set_a, set_b = set(tags_a), set(tags_b)
|
|
intersection = set_a & set_b
|
|
union = set_a | set_b
|
|
return len(intersection) / len(union) if union else 0.0
|
|
|
|
|
|
def content_keyword_overlap(body_a: str, body_b: str) -> float:
|
|
"""Simple keyword overlap between content bodies."""
|
|
if not body_a or not body_b:
|
|
return 0.0
|
|
|
|
def extract_keywords(text: str) -> set[str]:
|
|
words = re.findall(r"[a-zA-Z_]{4,}", text.lower())
|
|
# Filter common words
|
|
stopwords = {
|
|
"that",
|
|
"this",
|
|
"with",
|
|
"from",
|
|
"have",
|
|
"been",
|
|
"were",
|
|
"they",
|
|
"their",
|
|
"will",
|
|
"would",
|
|
"could",
|
|
"should",
|
|
"which",
|
|
"where",
|
|
"when",
|
|
"what",
|
|
"about",
|
|
"into",
|
|
"also",
|
|
"more",
|
|
"some",
|
|
"then",
|
|
"than",
|
|
"each",
|
|
"only",
|
|
"used",
|
|
"using",
|
|
"after",
|
|
"before",
|
|
"because",
|
|
"between",
|
|
"through",
|
|
"during",
|
|
"added",
|
|
"updated",
|
|
"fixed",
|
|
"error",
|
|
"issue",
|
|
"problem",
|
|
"solution",
|
|
"memory",
|
|
"memories",
|
|
"configuration",
|
|
"successfully",
|
|
"working",
|
|
"works",
|
|
}
|
|
return {w for w in words if w not in stopwords}
|
|
|
|
kw_a = extract_keywords(body_a)
|
|
kw_b = extract_keywords(body_b)
|
|
|
|
if not kw_a or not kw_b:
|
|
return 0.0
|
|
|
|
intersection = kw_a & kw_b
|
|
union = kw_a | kw_b
|
|
return len(intersection) / len(union) if union else 0.0
|
|
|
|
|
|
def get_type_heuristic(
|
|
type_a: str, type_b: str
|
|
) -> tuple[str, str | None, float] | None:
|
|
"""Look up type-based heuristic, checking both orderings."""
|
|
key = (type_a, type_b)
|
|
if key in TYPE_HEURISTICS:
|
|
rel, direction, score = TYPE_HEURISTICS[key]
|
|
return rel, direction, score
|
|
|
|
key_rev = (type_b, type_a)
|
|
if key_rev in TYPE_HEURISTICS:
|
|
rel, direction, score = TYPE_HEURISTICS[key_rev]
|
|
# Flip direction
|
|
if direction == "ab":
|
|
direction = "ba"
|
|
elif direction == "ba":
|
|
direction = "ab"
|
|
return rel, direction, score
|
|
|
|
return None
|
|
|
|
|
|
def score_pair(mem_a: dict, mem_b: dict) -> dict | None:
|
|
"""Score a candidate edge between two memories."""
|
|
tags_a = mem_a.get("tags", [])
|
|
tags_b = mem_b.get("tags", [])
|
|
|
|
# Must share at least one tag
|
|
shared_tags = set(tags_a) & set(tags_b)
|
|
if not shared_tags:
|
|
return None
|
|
|
|
tag_score = tag_overlap_score(tags_a, tags_b)
|
|
content_score = content_keyword_overlap(
|
|
mem_a.get("_body", ""), mem_b.get("_body", "")
|
|
)
|
|
|
|
type_a = mem_a.get("type", "general")
|
|
type_b = mem_b.get("type", "general")
|
|
|
|
heuristic = get_type_heuristic(type_a, type_b)
|
|
if heuristic:
|
|
suggested_rel, direction, type_score = heuristic
|
|
else:
|
|
suggested_rel = "RELATED_TO"
|
|
direction = None
|
|
type_score = 0.05
|
|
|
|
# Composite score
|
|
total = (tag_score * 0.4) + (content_score * 0.3) + (type_score * 0.3)
|
|
|
|
# Boost for high importance memories
|
|
imp_a = mem_a.get("importance", 0.5)
|
|
imp_b = mem_b.get("importance", 0.5)
|
|
if isinstance(imp_a, str):
|
|
imp_a = float(imp_a)
|
|
if isinstance(imp_b, str):
|
|
imp_b = float(imp_b)
|
|
avg_importance = (imp_a + imp_b) / 2
|
|
if avg_importance >= 0.7:
|
|
total *= 1.2
|
|
|
|
if total < 0.15:
|
|
return None
|
|
|
|
# Determine from/to based on direction
|
|
if direction == "ab":
|
|
from_mem, to_mem = mem_a, mem_b
|
|
elif direction == "ba":
|
|
from_mem, to_mem = mem_b, mem_a
|
|
else:
|
|
# Default: higher importance is "from"
|
|
if imp_a >= imp_b:
|
|
from_mem, to_mem = mem_a, mem_b
|
|
else:
|
|
from_mem, to_mem = mem_b, mem_a
|
|
|
|
return {
|
|
"score": round(total, 3),
|
|
"rel_type": suggested_rel,
|
|
"from_id": from_mem["id"],
|
|
"from_title": from_mem.get("title", "?"),
|
|
"from_type": from_mem.get("type", "?"),
|
|
"to_id": to_mem["id"],
|
|
"to_title": to_mem.get("title", "?"),
|
|
"to_type": to_mem.get("type", "?"),
|
|
"shared_tags": sorted(shared_tags),
|
|
"tag_score": round(tag_score, 3),
|
|
"content_score": round(content_score, 3),
|
|
"type_score": round(type_score, 3),
|
|
}
|
|
|
|
|
|
def main():
|
|
print("Loading memories...")
|
|
memories = load_memories()
|
|
print(f" Found {len(memories)} memories")
|
|
|
|
print("Loading decay state...")
|
|
decay_scores = load_decay_state()
|
|
|
|
# Filter to active + fading only (decay >= 0.2)
|
|
active_ids = {
|
|
mid for mid, score in decay_scores.items() if score >= 0.2 and mid in memories
|
|
}
|
|
# Also include memories without decay state (new)
|
|
for mid in memories:
|
|
if mid not in decay_scores:
|
|
active_ids.add(mid)
|
|
|
|
active_memories = {mid: memories[mid] for mid in active_ids}
|
|
print(f" {len(active_memories)} active/fading memories to analyze")
|
|
|
|
print("Loading existing edges...")
|
|
existing = load_existing_edges()
|
|
print(f" {len(existing) // 2} existing edges")
|
|
|
|
print("Scoring candidate pairs...")
|
|
candidates = []
|
|
|
|
# Group by shared tags first to reduce pair space
|
|
tag_groups = defaultdict(set)
|
|
for mid, mem in active_memories.items():
|
|
for tag in mem.get("tags", []):
|
|
tag_groups[tag].add(mid)
|
|
|
|
# Collect unique pairs that share at least one tag
|
|
seen_pairs = set()
|
|
for tag, mids in tag_groups.items():
|
|
if len(mids) < 2 or len(mids) > 50: # skip too-common tags
|
|
continue
|
|
for a, b in combinations(mids, 2):
|
|
pair = tuple(sorted([a, b]))
|
|
if pair in seen_pairs:
|
|
continue
|
|
if (a, b) in existing or (b, a) in existing:
|
|
continue
|
|
seen_pairs.add(pair)
|
|
|
|
result = score_pair(active_memories[a], active_memories[b])
|
|
if result:
|
|
candidates.append(result)
|
|
|
|
# Sort by score descending
|
|
candidates.sort(key=lambda x: x["score"], reverse=True)
|
|
|
|
print(f"\n{'='*100}")
|
|
print(f"TOP EDGE CANDIDATES ({len(candidates)} total, showing top 80)")
|
|
print(f"{'='*100}\n")
|
|
|
|
for i, c in enumerate(candidates[:80], 1):
|
|
print(f"#{i:3d} | Score: {c['score']:.3f} | {c['rel_type']}")
|
|
print(f" FROM [{c['from_type']}] {c['from_title']}")
|
|
print(f" TO [{c['to_type']}] {c['to_title']}")
|
|
print(
|
|
f" Tags: {', '.join(c['shared_tags'])} | "
|
|
f"tag={c['tag_score']:.2f} content={c['content_score']:.2f} type={c['type_score']:.2f}"
|
|
)
|
|
print(f" IDs: {c['from_id']} -> {c['to_id']}")
|
|
print()
|
|
|
|
# Also output as JSON for programmatic use
|
|
json_path = MEMORY_DIR.parent / "tmp" / "edge-candidates.json"
|
|
json_path.parent.mkdir(parents=True, exist_ok=True)
|
|
json_path.write_text(json.dumps(candidates[:80], indent=2))
|
|
print(f"Full candidates saved to: {json_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|