Add --if-changed flag to embed, configurable Ollama model, hourly timer
- embed --if-changed compares memory ID sets to skip when unchanged - --ollama-model config flag to switch models without code changes - _ollama_embed now accepts model param, respects config throughout - Switched default to qwen3-embedding:8b (70+ MTEB vs nomic's ~52) - Hourly systemd timer for embedding, daily timer no longer embeds - Fixed PATH for uv in both systemd services Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
a2d18ef0c2
commit
4770c15429
@ -410,14 +410,16 @@ def calculate_decay_score(
|
||||
|
||||
|
||||
def _ollama_embed(
|
||||
texts: List[str], timeout: int = EMBEDDING_TIMEOUT
|
||||
texts: List[str],
|
||||
model: str = EMBEDDING_MODEL,
|
||||
timeout: int = EMBEDDING_TIMEOUT,
|
||||
) -> Optional[List[List[float]]]:
|
||||
"""Get embeddings from Ollama for a list of texts.
|
||||
|
||||
Returns list of embedding vectors, or None if Ollama is unavailable.
|
||||
"""
|
||||
try:
|
||||
payload = json.dumps({"model": EMBEDDING_MODEL, "input": texts}).encode("utf-8")
|
||||
payload = json.dumps({"model": model, "input": texts}).encode("utf-8")
|
||||
req = urllib.request.Request(
|
||||
f"{OLLAMA_URL}/api/embed",
|
||||
data=payload,
|
||||
@ -2050,13 +2052,13 @@ class CognitiveMemoryClient:
|
||||
return vectors, "openai", model
|
||||
# Fallback to ollama
|
||||
ollama_model = config.get("ollama_model", EMBEDDING_MODEL)
|
||||
vectors = _ollama_embed(texts, timeout=timeout)
|
||||
vectors = _ollama_embed(texts, model=ollama_model, timeout=timeout)
|
||||
if vectors is not None:
|
||||
return vectors, "ollama", ollama_model
|
||||
else:
|
||||
# ollama first
|
||||
ollama_model = config.get("ollama_model", EMBEDDING_MODEL)
|
||||
vectors = _ollama_embed(texts, timeout=timeout)
|
||||
vectors = _ollama_embed(texts, model=ollama_model, timeout=timeout)
|
||||
if vectors is not None:
|
||||
return vectors, "ollama", ollama_model
|
||||
# Fallback to openai
|
||||
@ -2069,11 +2071,15 @@ class CognitiveMemoryClient:
|
||||
|
||||
return None, "", ""
|
||||
|
||||
def embed(self) -> Dict[str, Any]:
|
||||
def embed(self, if_changed: bool = False) -> Dict[str, Any]:
|
||||
"""Generate embeddings for all memories using configured provider.
|
||||
|
||||
Detects provider changes and re-embeds everything (dimension mismatch safety).
|
||||
Stores vectors in _embeddings.json (not git-tracked).
|
||||
|
||||
Args:
|
||||
if_changed: If True, skip embedding if the set of memory IDs hasn't
|
||||
changed since last run (no new/deleted memories).
|
||||
"""
|
||||
index = self._load_index()
|
||||
entries = index.get("entries", {})
|
||||
@ -2097,12 +2103,29 @@ class CognitiveMemoryClient:
|
||||
|
||||
config = self._get_embedding_provider()
|
||||
new_provider = config.get("embedding_provider", "ollama")
|
||||
if old_provider and old_provider != new_provider:
|
||||
provider_changed = old_provider and old_provider != new_provider
|
||||
if provider_changed:
|
||||
print(
|
||||
f"Provider changed ({old_provider} -> {new_provider}), re-embedding all memories...",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
# Skip if nothing changed (unless provider switched)
|
||||
if if_changed and not provider_changed and embeddings_path.exists():
|
||||
try:
|
||||
old_data = json.loads(embeddings_path.read_text())
|
||||
embedded_ids = set(old_data.get("entries", {}).keys())
|
||||
index_ids = set(entries.keys())
|
||||
if embedded_ids == index_ids:
|
||||
return {
|
||||
"embedded": 0,
|
||||
"skipped": True,
|
||||
"reason": "no new or deleted memories",
|
||||
"path": str(embeddings_path),
|
||||
}
|
||||
except (json.JSONDecodeError, OSError):
|
||||
pass # Can't read old data, re-embed
|
||||
|
||||
# Build texts to embed
|
||||
memory_ids = list(entries.keys())
|
||||
texts = []
|
||||
@ -2180,7 +2203,8 @@ class CognitiveMemoryClient:
|
||||
if vecs:
|
||||
query_vec = vecs[0]
|
||||
if query_vec is None and stored_provider == "ollama":
|
||||
vecs = _ollama_embed([query])
|
||||
stored_model = emb_data.get("model", EMBEDDING_MODEL)
|
||||
vecs = _ollama_embed([query], model=stored_model)
|
||||
if vecs:
|
||||
query_vec = vecs[0]
|
||||
# Last resort: try any available provider
|
||||
@ -2896,9 +2920,14 @@ def main():
|
||||
subparsers.add_parser("reindex", help="Rebuild index from files")
|
||||
|
||||
# embed
|
||||
subparsers.add_parser(
|
||||
embed_parser = subparsers.add_parser(
|
||||
"embed", help="Generate embeddings for all memories via Ollama"
|
||||
)
|
||||
embed_parser.add_argument(
|
||||
"--if-changed",
|
||||
action="store_true",
|
||||
help="Skip if no memories were added or deleted since last embed",
|
||||
)
|
||||
|
||||
# pin
|
||||
sp = subparsers.add_parser("pin", help="Move memory to vault (never decays)")
|
||||
@ -2958,6 +2987,9 @@ def main():
|
||||
"--provider", choices=["ollama", "openai"], help="Set embedding provider"
|
||||
)
|
||||
sp.add_argument("--openai-key", help="Set OpenAI API key")
|
||||
sp.add_argument(
|
||||
"--ollama-model", help="Set Ollama model name (e.g. qwen3-embedding:8b)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@ -3119,11 +3151,13 @@ def main():
|
||||
result = {"success": True, "indexed": count}
|
||||
|
||||
elif args.command == "embed":
|
||||
print(
|
||||
"Generating embeddings (this may take a while if model needs to be pulled)...",
|
||||
file=sys.stderr,
|
||||
)
|
||||
result = client.embed()
|
||||
if_changed = getattr(args, "if_changed", False)
|
||||
if not if_changed:
|
||||
print(
|
||||
"Generating embeddings (this may take a while if model needs to be pulled)...",
|
||||
file=sys.stderr,
|
||||
)
|
||||
result = client.embed(if_changed=if_changed)
|
||||
|
||||
elif args.command == "pin":
|
||||
success = client.pin(args.memory_id)
|
||||
@ -3204,6 +3238,9 @@ def main():
|
||||
if args.openai_key:
|
||||
config["openai_api_key"] = args.openai_key
|
||||
changed = True
|
||||
if args.ollama_model:
|
||||
config["ollama_model"] = args.ollama_model
|
||||
changed = True
|
||||
|
||||
if changed:
|
||||
config_path.write_text(json.dumps(config, indent=2))
|
||||
|
||||
Loading…
Reference in New Issue
Block a user