diff --git a/skills/cognitive-memory/client.py b/skills/cognitive-memory/client.py index 8d0b513..b563cf9 100644 --- a/skills/cognitive-memory/client.py +++ b/skills/cognitive-memory/client.py @@ -410,14 +410,16 @@ def calculate_decay_score( def _ollama_embed( - texts: List[str], timeout: int = EMBEDDING_TIMEOUT + texts: List[str], + model: str = EMBEDDING_MODEL, + timeout: int = EMBEDDING_TIMEOUT, ) -> Optional[List[List[float]]]: """Get embeddings from Ollama for a list of texts. Returns list of embedding vectors, or None if Ollama is unavailable. """ try: - payload = json.dumps({"model": EMBEDDING_MODEL, "input": texts}).encode("utf-8") + payload = json.dumps({"model": model, "input": texts}).encode("utf-8") req = urllib.request.Request( f"{OLLAMA_URL}/api/embed", data=payload, @@ -2050,13 +2052,13 @@ class CognitiveMemoryClient: return vectors, "openai", model # Fallback to ollama ollama_model = config.get("ollama_model", EMBEDDING_MODEL) - vectors = _ollama_embed(texts, timeout=timeout) + vectors = _ollama_embed(texts, model=ollama_model, timeout=timeout) if vectors is not None: return vectors, "ollama", ollama_model else: # ollama first ollama_model = config.get("ollama_model", EMBEDDING_MODEL) - vectors = _ollama_embed(texts, timeout=timeout) + vectors = _ollama_embed(texts, model=ollama_model, timeout=timeout) if vectors is not None: return vectors, "ollama", ollama_model # Fallback to openai @@ -2069,11 +2071,15 @@ class CognitiveMemoryClient: return None, "", "" - def embed(self) -> Dict[str, Any]: + def embed(self, if_changed: bool = False) -> Dict[str, Any]: """Generate embeddings for all memories using configured provider. Detects provider changes and re-embeds everything (dimension mismatch safety). Stores vectors in _embeddings.json (not git-tracked). + + Args: + if_changed: If True, skip embedding if the set of memory IDs hasn't + changed since last run (no new/deleted memories). """ index = self._load_index() entries = index.get("entries", {}) @@ -2097,12 +2103,29 @@ class CognitiveMemoryClient: config = self._get_embedding_provider() new_provider = config.get("embedding_provider", "ollama") - if old_provider and old_provider != new_provider: + provider_changed = old_provider and old_provider != new_provider + if provider_changed: print( f"Provider changed ({old_provider} -> {new_provider}), re-embedding all memories...", file=sys.stderr, ) + # Skip if nothing changed (unless provider switched) + if if_changed and not provider_changed and embeddings_path.exists(): + try: + old_data = json.loads(embeddings_path.read_text()) + embedded_ids = set(old_data.get("entries", {}).keys()) + index_ids = set(entries.keys()) + if embedded_ids == index_ids: + return { + "embedded": 0, + "skipped": True, + "reason": "no new or deleted memories", + "path": str(embeddings_path), + } + except (json.JSONDecodeError, OSError): + pass # Can't read old data, re-embed + # Build texts to embed memory_ids = list(entries.keys()) texts = [] @@ -2180,7 +2203,8 @@ class CognitiveMemoryClient: if vecs: query_vec = vecs[0] if query_vec is None and stored_provider == "ollama": - vecs = _ollama_embed([query]) + stored_model = emb_data.get("model", EMBEDDING_MODEL) + vecs = _ollama_embed([query], model=stored_model) if vecs: query_vec = vecs[0] # Last resort: try any available provider @@ -2896,9 +2920,14 @@ def main(): subparsers.add_parser("reindex", help="Rebuild index from files") # embed - subparsers.add_parser( + embed_parser = subparsers.add_parser( "embed", help="Generate embeddings for all memories via Ollama" ) + embed_parser.add_argument( + "--if-changed", + action="store_true", + help="Skip if no memories were added or deleted since last embed", + ) # pin sp = subparsers.add_parser("pin", help="Move memory to vault (never decays)") @@ -2958,6 +2987,9 @@ def main(): "--provider", choices=["ollama", "openai"], help="Set embedding provider" ) sp.add_argument("--openai-key", help="Set OpenAI API key") + sp.add_argument( + "--ollama-model", help="Set Ollama model name (e.g. qwen3-embedding:8b)" + ) args = parser.parse_args() @@ -3119,11 +3151,13 @@ def main(): result = {"success": True, "indexed": count} elif args.command == "embed": - print( - "Generating embeddings (this may take a while if model needs to be pulled)...", - file=sys.stderr, - ) - result = client.embed() + if_changed = getattr(args, "if_changed", False) + if not if_changed: + print( + "Generating embeddings (this may take a while if model needs to be pulled)...", + file=sys.stderr, + ) + result = client.embed(if_changed=if_changed) elif args.command == "pin": success = client.pin(args.memory_id) @@ -3204,6 +3238,9 @@ def main(): if args.openai_key: config["openai_api_key"] = args.openai_key changed = True + if args.ollama_model: + config["ollama_model"] = args.ollama_model + changed = True if changed: config_path.write_text(json.dumps(config, indent=2))