Add --if-changed flag to embed, configurable Ollama model, hourly timer

- embed --if-changed compares memory ID sets to skip when unchanged
- --ollama-model config flag to switch models without code changes
- _ollama_embed now accepts model param, respects config throughout
- Switched default to qwen3-embedding:8b (70+ MTEB vs nomic's ~52)
- Hourly systemd timer for embedding, daily timer no longer embeds
- Fixed PATH for uv in both systemd services

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Cal Corum 2026-02-19 14:54:21 -06:00
parent a2d18ef0c2
commit 4770c15429

View File

@ -410,14 +410,16 @@ def calculate_decay_score(
def _ollama_embed(
texts: List[str], timeout: int = EMBEDDING_TIMEOUT
texts: List[str],
model: str = EMBEDDING_MODEL,
timeout: int = EMBEDDING_TIMEOUT,
) -> Optional[List[List[float]]]:
"""Get embeddings from Ollama for a list of texts.
Returns list of embedding vectors, or None if Ollama is unavailable.
"""
try:
payload = json.dumps({"model": EMBEDDING_MODEL, "input": texts}).encode("utf-8")
payload = json.dumps({"model": model, "input": texts}).encode("utf-8")
req = urllib.request.Request(
f"{OLLAMA_URL}/api/embed",
data=payload,
@ -2050,13 +2052,13 @@ class CognitiveMemoryClient:
return vectors, "openai", model
# Fallback to ollama
ollama_model = config.get("ollama_model", EMBEDDING_MODEL)
vectors = _ollama_embed(texts, timeout=timeout)
vectors = _ollama_embed(texts, model=ollama_model, timeout=timeout)
if vectors is not None:
return vectors, "ollama", ollama_model
else:
# ollama first
ollama_model = config.get("ollama_model", EMBEDDING_MODEL)
vectors = _ollama_embed(texts, timeout=timeout)
vectors = _ollama_embed(texts, model=ollama_model, timeout=timeout)
if vectors is not None:
return vectors, "ollama", ollama_model
# Fallback to openai
@ -2069,11 +2071,15 @@ class CognitiveMemoryClient:
return None, "", ""
def embed(self) -> Dict[str, Any]:
def embed(self, if_changed: bool = False) -> Dict[str, Any]:
"""Generate embeddings for all memories using configured provider.
Detects provider changes and re-embeds everything (dimension mismatch safety).
Stores vectors in _embeddings.json (not git-tracked).
Args:
if_changed: If True, skip embedding if the set of memory IDs hasn't
changed since last run (no new/deleted memories).
"""
index = self._load_index()
entries = index.get("entries", {})
@ -2097,12 +2103,29 @@ class CognitiveMemoryClient:
config = self._get_embedding_provider()
new_provider = config.get("embedding_provider", "ollama")
if old_provider and old_provider != new_provider:
provider_changed = old_provider and old_provider != new_provider
if provider_changed:
print(
f"Provider changed ({old_provider} -> {new_provider}), re-embedding all memories...",
file=sys.stderr,
)
# Skip if nothing changed (unless provider switched)
if if_changed and not provider_changed and embeddings_path.exists():
try:
old_data = json.loads(embeddings_path.read_text())
embedded_ids = set(old_data.get("entries", {}).keys())
index_ids = set(entries.keys())
if embedded_ids == index_ids:
return {
"embedded": 0,
"skipped": True,
"reason": "no new or deleted memories",
"path": str(embeddings_path),
}
except (json.JSONDecodeError, OSError):
pass # Can't read old data, re-embed
# Build texts to embed
memory_ids = list(entries.keys())
texts = []
@ -2180,7 +2203,8 @@ class CognitiveMemoryClient:
if vecs:
query_vec = vecs[0]
if query_vec is None and stored_provider == "ollama":
vecs = _ollama_embed([query])
stored_model = emb_data.get("model", EMBEDDING_MODEL)
vecs = _ollama_embed([query], model=stored_model)
if vecs:
query_vec = vecs[0]
# Last resort: try any available provider
@ -2896,9 +2920,14 @@ def main():
subparsers.add_parser("reindex", help="Rebuild index from files")
# embed
subparsers.add_parser(
embed_parser = subparsers.add_parser(
"embed", help="Generate embeddings for all memories via Ollama"
)
embed_parser.add_argument(
"--if-changed",
action="store_true",
help="Skip if no memories were added or deleted since last embed",
)
# pin
sp = subparsers.add_parser("pin", help="Move memory to vault (never decays)")
@ -2958,6 +2987,9 @@ def main():
"--provider", choices=["ollama", "openai"], help="Set embedding provider"
)
sp.add_argument("--openai-key", help="Set OpenAI API key")
sp.add_argument(
"--ollama-model", help="Set Ollama model name (e.g. qwen3-embedding:8b)"
)
args = parser.parse_args()
@ -3119,11 +3151,13 @@ def main():
result = {"success": True, "indexed": count}
elif args.command == "embed":
print(
"Generating embeddings (this may take a while if model needs to be pulled)...",
file=sys.stderr,
)
result = client.embed()
if_changed = getattr(args, "if_changed", False)
if not if_changed:
print(
"Generating embeddings (this may take a while if model needs to be pulled)...",
file=sys.stderr,
)
result = client.embed(if_changed=if_changed)
elif args.command == "pin":
success = client.pin(args.memory_id)
@ -3204,6 +3238,9 @@ def main():
if args.openai_key:
config["openai_api_key"] = args.openai_key
changed = True
if args.ollama_model:
config["ollama_model"] = args.ollama_model
changed = True
if changed:
config_path.write_text(json.dumps(config, indent=2))