Rebalance semantic/keyword merge weights to 60/40

Normalize both signals to 0-1 range so semantic similarity scores aren't drowned out by keyword position scores. Jellyfin DB recovery now ranks #1 for "media server database broken sqlite error" instead of being buried behind keyword noise matches on "error". Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 15:07:43 -06:00 · 2026-02-19 15:07:43 -06:00 · aed98a3cc8
commit aed98a3cc8
parent 4770c15429
1 changed files with 14 additions and 10 deletions
--- a/skills/cognitive-memory/client.py
+++ b/skills/cognitive-memory/client.py
@ -942,31 +942,35 @@ class CognitiveMemoryClient:
        keyword_results = results[:limit]

        # Merge with semantic results if requested
+        # Weights: semantic 60%, keyword 40% (--semantic signals intent for
+        # conceptual matching; keyword acts as precision boost for exact terms)
        if semantic:
            embeddings_path = self.memory_dir / "_embeddings.json"
            if embeddings_path.exists():
                sem_results = self.semantic_recall(query, limit=limit)
                if sem_results:
-                    # Build merged score map: keyword_score + similarity * 5
                    score_map: Dict[str, float] = {}
                    result_map: Dict[str, Dict] = {}

-                    # Add keyword results with their position-based score
+                    # Keyword: normalize rank to 0-1 (rank 1 = 1.0, last = ~0.1)
+                    kw_weight = 0.4
                    for i, r in enumerate(keyword_results):
                        mid = r["id"]
-                        score_map[mid] = float(limit - i)  # higher rank = higher score
+                        normalized = (limit - i) / limit
+                        score_map[mid] = normalized * kw_weight
                        result_map[mid] = r

-                    # Add semantic results
+                    # Semantic: similarity is already 0-1
+                    sem_weight = 0.6
                    for r in sem_results:
                        mid = r["id"]
-                        sim_score = r.get("similarity", 0.0) * 5
+                        sim = r.get("similarity", 0.0)
+                        sem_score = sim * sem_weight
                        if mid in score_map:
-                            score_map[mid] += sim_score
-                            result_map[mid]["similarity"] = r.get("similarity", 0.0)
+                            score_map[mid] += sem_score
+                            result_map[mid]["similarity"] = sim
                        else:
-                            score_map[mid] = sim_score
-                            # Enrich with index data for consistent return format
+                            score_map[mid] = sem_score
                            idx_entry = index.get("entries", {}).get(mid, {})
                            s = state.get("entries", {}).get(mid, {})
                            result_map[mid] = {
@ -976,7 +980,7 @@ class CognitiveMemoryClient:
                                "tags": r.get("tags", []),
                                "importance": idx_entry.get("importance"),
                                "decay_score": round(s.get("decay_score", 0.5), 3),
-                                "similarity": r.get("similarity", 0.0),
+                                "similarity": sim,
                                "path": r.get("path"),
                                "created": idx_entry.get("created"),
                            }