Rebalance semantic/keyword merge weights to 60/40

Normalize both signals to 0-1 range so semantic similarity scores
aren't drowned out by keyword position scores. Jellyfin DB recovery
now ranks #1 for "media server database broken sqlite error" instead
of being buried behind keyword noise matches on "error".

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Cal Corum 2026-02-19 15:07:43 -06:00
parent 4770c15429
commit aed98a3cc8

View File

@ -942,31 +942,35 @@ class CognitiveMemoryClient:
keyword_results = results[:limit]
# Merge with semantic results if requested
# Weights: semantic 60%, keyword 40% (--semantic signals intent for
# conceptual matching; keyword acts as precision boost for exact terms)
if semantic:
embeddings_path = self.memory_dir / "_embeddings.json"
if embeddings_path.exists():
sem_results = self.semantic_recall(query, limit=limit)
if sem_results:
# Build merged score map: keyword_score + similarity * 5
score_map: Dict[str, float] = {}
result_map: Dict[str, Dict] = {}
# Add keyword results with their position-based score
# Keyword: normalize rank to 0-1 (rank 1 = 1.0, last = ~0.1)
kw_weight = 0.4
for i, r in enumerate(keyword_results):
mid = r["id"]
score_map[mid] = float(limit - i) # higher rank = higher score
normalized = (limit - i) / limit
score_map[mid] = normalized * kw_weight
result_map[mid] = r
# Add semantic results
# Semantic: similarity is already 0-1
sem_weight = 0.6
for r in sem_results:
mid = r["id"]
sim_score = r.get("similarity", 0.0) * 5
sim = r.get("similarity", 0.0)
sem_score = sim * sem_weight
if mid in score_map:
score_map[mid] += sim_score
result_map[mid]["similarity"] = r.get("similarity", 0.0)
score_map[mid] += sem_score
result_map[mid]["similarity"] = sim
else:
score_map[mid] = sim_score
# Enrich with index data for consistent return format
score_map[mid] = sem_score
idx_entry = index.get("entries", {}).get(mid, {})
s = state.get("entries", {}).get(mid, {})
result_map[mid] = {
@ -976,7 +980,7 @@ class CognitiveMemoryClient:
"tags": r.get("tags", []),
"importance": idx_entry.get("importance"),
"decay_score": round(s.get("decay_score", 0.5), 3),
"similarity": r.get("similarity", 0.0),
"similarity": sim,
"path": r.get("path"),
"created": idx_entry.get("created"),
}