Rebalance semantic/keyword merge weights to 60/40
Normalize both signals to 0-1 range so semantic similarity scores aren't drowned out by keyword position scores. Jellyfin DB recovery now ranks #1 for "media server database broken sqlite error" instead of being buried behind keyword noise matches on "error". Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
4770c15429
commit
aed98a3cc8
@ -942,31 +942,35 @@ class CognitiveMemoryClient:
|
||||
keyword_results = results[:limit]
|
||||
|
||||
# Merge with semantic results if requested
|
||||
# Weights: semantic 60%, keyword 40% (--semantic signals intent for
|
||||
# conceptual matching; keyword acts as precision boost for exact terms)
|
||||
if semantic:
|
||||
embeddings_path = self.memory_dir / "_embeddings.json"
|
||||
if embeddings_path.exists():
|
||||
sem_results = self.semantic_recall(query, limit=limit)
|
||||
if sem_results:
|
||||
# Build merged score map: keyword_score + similarity * 5
|
||||
score_map: Dict[str, float] = {}
|
||||
result_map: Dict[str, Dict] = {}
|
||||
|
||||
# Add keyword results with their position-based score
|
||||
# Keyword: normalize rank to 0-1 (rank 1 = 1.0, last = ~0.1)
|
||||
kw_weight = 0.4
|
||||
for i, r in enumerate(keyword_results):
|
||||
mid = r["id"]
|
||||
score_map[mid] = float(limit - i) # higher rank = higher score
|
||||
normalized = (limit - i) / limit
|
||||
score_map[mid] = normalized * kw_weight
|
||||
result_map[mid] = r
|
||||
|
||||
# Add semantic results
|
||||
# Semantic: similarity is already 0-1
|
||||
sem_weight = 0.6
|
||||
for r in sem_results:
|
||||
mid = r["id"]
|
||||
sim_score = r.get("similarity", 0.0) * 5
|
||||
sim = r.get("similarity", 0.0)
|
||||
sem_score = sim * sem_weight
|
||||
if mid in score_map:
|
||||
score_map[mid] += sim_score
|
||||
result_map[mid]["similarity"] = r.get("similarity", 0.0)
|
||||
score_map[mid] += sem_score
|
||||
result_map[mid]["similarity"] = sim
|
||||
else:
|
||||
score_map[mid] = sim_score
|
||||
# Enrich with index data for consistent return format
|
||||
score_map[mid] = sem_score
|
||||
idx_entry = index.get("entries", {}).get(mid, {})
|
||||
s = state.get("entries", {}).get(mid, {})
|
||||
result_map[mid] = {
|
||||
@ -976,7 +980,7 @@ class CognitiveMemoryClient:
|
||||
"tags": r.get("tags", []),
|
||||
"importance": idx_entry.get("importance"),
|
||||
"decay_score": round(s.get("decay_score", 0.5), 3),
|
||||
"similarity": r.get("similarity", 0.0),
|
||||
"similarity": sim,
|
||||
"path": r.get("path"),
|
||||
"created": idx_entry.get("created"),
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user