Skip to content

Commit 270800b

Browse files
committed
Fix 4 product-quality issues: garbage rejection, ISO-8601, dedup, CLI fallback
1. Reject [object Object] garbage: lower threshold to 1, add check in extract no-LLM path, defensive serialization in Claude Code hooks, harden _asText in cursor-hooks for {text: "..."} wrapper pattern. 2. Accept ISO-8601 datetime in clickmem_list: normalize T separator, trailing Z, and timezone offsets before building SQL. 3. Deduplicate exact-text semantic memories: add _dedup_exact_text() to ContinualRefinement.run(), proactive dedup in extractor before insert. 4. CLI local fallback: add --local flag, auto-fallback to LocalTransport only when no remote config exists (respects CLICKMEM_SERVER_HOST).
1 parent 8ffec7f commit 270800b

File tree

12 files changed

+252
-9
lines changed

12 files changed

+252
-9
lines changed

cursor-hooks/lib/handlers.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ function _log(msg) {
2424
function _asText(val) {
2525
if (val === null || val === undefined) return "";
2626
if (typeof val === "string") return val;
27+
if (typeof val === "object" && !Array.isArray(val) && typeof val.text === "string") return val.text;
2728
if (Array.isArray(val)) return val.map(_asText).join("\n");
2829
return JSON.stringify(val);
2930
}

src/memory_core/cli.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535

3636
_remote_url: str | None = None
3737
_remote_api_key: str | None = None
38+
_force_local: bool = False
3839

3940

4041
@app.callback()
@@ -47,10 +48,15 @@ def _global_options(
4748
None, "--api-key", envvar="CLICKMEM_API_KEY",
4849
help="API key for remote server authentication.",
4950
),
51+
local: bool = typer.Option(
52+
False, "--local",
53+
help="Use embedded database directly (no server needed).",
54+
),
5055
):
51-
global _remote_url, _remote_api_key
56+
global _remote_url, _remote_api_key, _force_local
5257
_remote_url = remote
5358
_remote_api_key = api_key
59+
_force_local = local
5460

5561

5662
# ---------------------------------------------------------------------------
@@ -60,11 +66,31 @@ def _global_options(
6066
_transport_instance = None
6167

6268

69+
def _has_remote_config() -> bool:
70+
"""Check if the user has explicitly configured a remote server."""
71+
return bool(
72+
_remote_url
73+
or os.environ.get("CLICKMEM_REMOTE")
74+
or os.environ.get("CLICKMEM_SERVER_HOST")
75+
)
76+
77+
6378
def _get_transport():
6479
global _transport_instance
6580
if _transport_instance is None:
66-
from memory_core.transport import get_transport
67-
_transport_instance = get_transport(remote=_remote_url, api_key=_remote_api_key)
81+
if _force_local:
82+
from memory_core.transport import LocalTransport
83+
_transport_instance = LocalTransport()
84+
else:
85+
from memory_core.transport import get_transport
86+
try:
87+
_transport_instance = get_transport(remote=_remote_url, api_key=_remote_api_key)
88+
except RuntimeError:
89+
if _has_remote_config():
90+
raise
91+
print("[clickmem] No server found, using local database.", file=sys.stderr)
92+
from memory_core.transport import LocalTransport
93+
_transport_instance = LocalTransport()
6894
return _transport_instance
6995

7096

src/memory_core/db.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,19 @@ def optimize(self) -> None:
295295

296296
# -- Queries -----------------------------------------------------------
297297

298+
@staticmethod
299+
def _normalize_datetime(dt_str: str) -> str:
300+
"""Normalize ISO-8601 datetime to ClickHouse-compatible format.
301+
302+
Handles 'T' separator, trailing 'Z', and timezone offsets like +00:00.
303+
"""
304+
import re
305+
s = dt_str.replace("T", " ")
306+
if s.endswith("Z"):
307+
s = s[:-1]
308+
s = re.sub(r'[+-]\d{2}:?\d{2}$', '', s)
309+
return s.strip()
310+
298311
def _time_conditions(self, since: str | None, until: str | None) -> list[str]:
299312
"""Build SQL WHERE clauses for time filtering.
300313
@@ -304,9 +317,11 @@ def _time_conditions(self, since: str | None, until: str | None) -> list[str]:
304317
_dt_re = re.compile(r"^\d{4}-\d{2}-\d{2}[\sT]?\d{0,2}:?\d{0,2}:?\d{0,2}")
305318
conds: list[str] = []
306319
if since and _dt_re.match(since):
307-
conds.append(f"created_at >= '{self._escape(since)}'")
320+
normalized = self._normalize_datetime(since)
321+
conds.append(f"created_at >= '{self._escape(normalized)}'")
308322
if until and _dt_re.match(until):
309-
conds.append(f"created_at <= '{self._escape(until)}'")
323+
normalized = self._normalize_datetime(until)
324+
conds.append(f"created_at <= '{self._escape(normalized)}'")
310325
return conds
311326

312327
def list_by_layer(self, layer: str, *, limit: int = 100,

src/memory_core/extractor.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def extract(
6666
conversation = "\n".join(
6767
f"{m.get('role', 'user')}: {m.get('content', '')}" for m in messages
6868
)
69-
if conversation.count("[object Object]") >= 2:
69+
if "[object Object]" in conversation:
7070
return []
7171
prompt = _EXTRACT_PROMPT.format(conversation=conversation)
7272
raw_response = llm_complete(prompt)
@@ -82,13 +82,21 @@ def extract(
8282
ids.append(rows[0].id)
8383
continue
8484

85+
content = mem_data.get("content", "")
86+
# Proactive dedup: skip if identical content already exists in this layer
87+
existing = self._db.list_by_layer(layer, limit=100)
88+
dup = next((e for e in existing if e.content.strip().lower() == content.strip().lower()), None)
89+
if dup:
90+
ids.append(dup.id)
91+
continue
92+
8593
m = Memory(
86-
content=mem_data.get("content", ""),
94+
content=content,
8795
layer=layer,
8896
category=mem_data.get("category", "event"),
8997
tags=mem_data.get("tags", []),
9098
entities=mem_data.get("entities", []),
91-
embedding=self._emb.encode_document(mem_data.get("content", "")),
99+
embedding=self._emb.encode_document(content),
92100
session_id=session_id,
93101
source="agent",
94102
raw_id=raw_id or None,

src/memory_core/refinement.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import logging
1010
import math
11+
from datetime import datetime
1112
from typing import TYPE_CHECKING, Callable
1213

1314
from memory_core.json_utils import extract_json_or
@@ -82,11 +83,13 @@ def run(
8283
llm_complete: Callable[[str], str],
8384
) -> dict:
8485
_log.info("Starting continual refinement")
86+
exact_deduped = ContinualRefinement._dedup_exact_text(db)
8587
reextracted = ContinualRefinement._reextract_unprocessed(db, emb, llm_complete)
8688
clusters = ContinualRefinement._cluster_semantic(db, emb)
8789
merged = ContinualRefinement._refine_clusters(db, emb, llm_complete, clusters)
8890
pruned = ContinualRefinement._prune_low_quality(db, llm_complete)
8991
result = {
92+
"exact_deduped": exact_deduped,
9093
"reextracted": reextracted,
9194
"clusters_found": len(clusters),
9295
"merged": merged,
@@ -95,6 +98,27 @@ def run(
9598
_log.info("Refinement complete: %s", result)
9699
return result
97100

101+
@staticmethod
102+
def _dedup_exact_text(db: "MemoryDB") -> int:
103+
"""Remove exact-text duplicates in the semantic layer.
104+
105+
Keeps the oldest memory (by created_at) and deactivates newer copies.
106+
"""
107+
memories = db.list_by_layer("semantic", limit=500)
108+
content_groups: dict[str, list[Memory]] = {}
109+
for m in memories:
110+
key = m.content.strip().lower()
111+
content_groups.setdefault(key, []).append(m)
112+
deduped = 0
113+
for group in content_groups.values():
114+
if len(group) < 2:
115+
continue
116+
group.sort(key=lambda m: m.created_at or datetime.min)
117+
for dup in group[1:]:
118+
db.deactivate(dup.id)
119+
deduped += 1
120+
return deduped
121+
98122
@staticmethod
99123
def _reextract_unprocessed(
100124
db: "MemoryDB", emb, llm_complete: Callable[[str], str],

src/memory_core/server.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,9 @@ async def _cc_user_prompt_submit(payload: dict) -> dict:
366366
"""Buffer the user prompt so the Stop handler can build the full turn."""
367367
session_id = payload.get("session_id", "")
368368
prompt = payload.get("prompt", "")
369+
if not isinstance(prompt, str):
370+
import json as _json
371+
prompt = _json.dumps(prompt, ensure_ascii=False) if prompt else ""
369372
if session_id and prompt:
370373
_cc_prompt_buffers[session_id] = prompt
371374
return {}
@@ -375,6 +378,9 @@ async def _cc_stop(payload: dict) -> dict:
375378
"""Extract memories from the completed turn (buffered prompt + assistant response)."""
376379
session_id = payload.get("session_id", "")
377380
assistant_msg = payload.get("last_assistant_message", "")
381+
if not isinstance(assistant_msg, str):
382+
import json as _json
383+
assistant_msg = _json.dumps(assistant_msg, ensure_ascii=False) if assistant_msg else ""
378384

379385
if not assistant_msg or len(assistant_msg) < 20:
380386
return {}

src/memory_core/transport.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ def extract(self, text: str, session_id: str = "") -> list[str]:
143143
llm_complete = get_llm_complete()
144144

145145
if llm_complete is None:
146+
if "[object Object]" in text:
147+
return []
146148
m = Memory(
147149
content=text, layer="episodic", category="event",
148150
embedding=emb.encode_document(text),
@@ -158,7 +160,7 @@ def extract(self, text: str, session_id: str = "") -> list[str]:
158160
llm_complete, session_id=session_id,
159161
)
160162

161-
_GARBAGE_PATTERN_THRESHOLD = 2
163+
_GARBAGE_PATTERN_THRESHOLD = 1
162164

163165
def ingest(self, text: str, session_id: str = "",
164166
source: str = "cursor") -> dict:

tests/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ def _reset_factory():
144144
cli_mod._transport_instance = shared_t
145145
cli_mod._remote_url = None
146146
cli_mod._remote_api_key = None
147+
cli_mod._force_local = False
147148
import memory_core.server as server_mod
148149
server_mod._transport = None
149150
server_mod._api_key_env = None

tests/test_cli.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,33 @@ def test_sql_invalid_query(self):
253253
assert result.exit_code != 0 or "error" in result.stdout.lower()
254254

255255

256+
class TestLocalFallback:
257+
"""Test --local flag and auto-fallback behavior."""
258+
259+
def test_local_flag_status(self):
260+
"""memory --local status works without a running server."""
261+
result = runner.invoke(app, ["--local", "status"])
262+
assert result.exit_code == 0
263+
output = result.stdout.lower()
264+
assert "working" in output or "l0" in output
265+
266+
def test_local_flag_status_json(self):
267+
"""memory --local status --json returns valid JSON."""
268+
result = runner.invoke(app, ["--local", "status", "--json"])
269+
assert result.exit_code == 0
270+
data = json.loads(result.stdout)
271+
assert isinstance(data, dict)
272+
assert "counts" in data
273+
274+
def test_auto_fallback_when_no_server(self):
275+
"""Without --remote and without a server, CLI auto-falls back to local."""
276+
import memory_core.cli as cli_mod
277+
cli_mod._transport_instance = None
278+
cli_mod._force_local = False
279+
result = runner.invoke(app, ["status"])
280+
assert result.exit_code == 0
281+
282+
256283
class TestMaintainCommand:
257284
"""Test `memory maintain` command."""
258285

tests/test_db.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,56 @@ def test_skips_empty_embeddings(self, db, mock_emb):
463463
assert all(len(r.embedding) > 0 for r in results)
464464

465465

466+
class TestTimeConditionsISO8601:
467+
"""Test ISO-8601 datetime parsing in _time_conditions."""
468+
469+
def test_iso8601_with_t_and_z(self, db):
470+
"""since='2026-03-13T00:00:00Z' should be accepted and normalized."""
471+
m = make_memory(
472+
layer="semantic",
473+
created_at=datetime(2026, 3, 14, tzinfo=timezone.utc),
474+
)
475+
db.insert(m)
476+
results = db.list_memories(since="2026-03-13T00:00:00Z")
477+
assert len(results) == 1
478+
479+
def test_iso8601_with_timezone_offset(self, db):
480+
"""since='2026-03-13T00:00:00+00:00' should be accepted."""
481+
m = make_memory(
482+
layer="semantic",
483+
created_at=datetime(2026, 3, 14, tzinfo=timezone.utc),
484+
)
485+
db.insert(m)
486+
results = db.list_memories(since="2026-03-13T00:00:00+00:00")
487+
assert len(results) == 1
488+
489+
def test_iso8601_until_with_t_and_z(self, db):
490+
"""until='2026-03-15T00:00:00Z' should be accepted."""
491+
m = make_memory(
492+
layer="semantic",
493+
created_at=datetime(2026, 3, 14, tzinfo=timezone.utc),
494+
)
495+
db.insert(m)
496+
results = db.list_memories(until="2026-03-15T00:00:00Z")
497+
assert len(results) == 1
498+
499+
def test_plain_datetime_still_works(self, db):
500+
"""Plain 'YYYY-MM-DD HH:MM:SS' format should still work."""
501+
m = make_memory(
502+
layer="semantic",
503+
created_at=datetime(2026, 3, 14, tzinfo=timezone.utc),
504+
)
505+
db.insert(m)
506+
results = db.list_memories(since="2026-03-13 00:00:00")
507+
assert len(results) == 1
508+
509+
def test_normalize_datetime_static(self, db):
510+
"""_normalize_datetime properly converts ISO-8601 variants."""
511+
assert db._normalize_datetime("2026-03-13T00:00:00Z") == "2026-03-13 00:00:00"
512+
assert db._normalize_datetime("2026-03-13T10:30:00+05:30") == "2026-03-13 10:30:00"
513+
assert db._normalize_datetime("2026-03-13 12:00:00") == "2026-03-13 12:00:00"
514+
515+
466516
class TestRawQuery:
467517
"""Test raw SQL query execution."""
468518

0 commit comments

Comments
 (0)