Skip to content

Commit 84df236

Browse files
author
yuan.wang
committed
Merge remote-tracking branch 'upstream/dev-20260302-v2.0.8' into feat/transfer_pref_to_polar_db
2 parents 82db08c + f886bbc commit 84df236

File tree

3 files changed

+53
-28
lines changed

3 files changed

+53
-28
lines changed

src/memos/configs/mem_scheduler.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,8 +250,12 @@ def validate_partial_initialization(self) -> "AuthConfig":
250250
"All configuration components are None. This may indicate missing environment variables or configuration files."
251251
)
252252
elif failed_components:
253-
logger.warning(
254-
f"Failed to initialize components: {', '.join(failed_components)}. Successfully initialized: {', '.join(initialized_components)}"
253+
# Use info level: individual from_local_env() methods already log
254+
# warnings for actual initialization failures. Components that are
255+
# simply not configured (no env vars) are not errors.
256+
logger.info(
257+
f"Components not configured: {', '.join(failed_components)}. "
258+
f"Successfully initialized: {', '.join(initialized_components)}"
255259
)
256260

257261
return self

src/memos/embedders/universal_api.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,21 @@
1414
logger = get_logger(__name__)
1515

1616

17+
def _sanitize_unicode(text: str) -> str:
18+
"""
19+
Remove Unicode surrogates and other problematic characters.
20+
Surrogates (U+D800-U+DFFF) cause UnicodeEncodeError with some APIs.
21+
"""
22+
try:
23+
# Encode with 'surrogatepass' then decode, replacing invalid chars
24+
cleaned = text.encode("utf-8", errors="surrogatepass").decode("utf-8", errors="replace")
25+
# Replace replacement char with empty string for cleaner output
26+
return cleaned.replace("\ufffd", "")
27+
except Exception:
28+
# Fallback: remove all non-BMP characters
29+
return "".join(c for c in text if ord(c) < 0x10000)
30+
31+
1732
class UniversalAPIEmbedder(BaseEmbedder):
1833
def __init__(self, config: UniversalAPIEmbedderConfig):
1934
self.provider = config.provider
@@ -54,6 +69,8 @@ def __init__(self, config: UniversalAPIEmbedderConfig):
5469
def embed(self, texts: list[str]) -> list[list[float]]:
5570
if isinstance(texts, str):
5671
texts = [texts]
72+
# Sanitize Unicode to prevent encoding errors with emoji/surrogates
73+
texts = [_sanitize_unicode(t) for t in texts]
5774
# Truncate texts if max_tokens is configured
5875
texts = self._truncate_texts(texts)
5976
logger.info(f"Embeddings request with input: {texts}")

src/memos/mem_os/utils/default_config.py

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,15 @@
33
Provides simplified configuration generation for users.
44
"""
55

6+
import logging
67
from typing import Literal
78

89
from memos.configs.mem_cube import GeneralMemCubeConfig
910
from memos.configs.mem_os import MOSConfig
1011
from memos.mem_cube.general import GeneralMemCube
1112

13+
logger = logging.getLogger(__name__)
14+
1215

1316
def get_default_config(
1417
openai_api_key: str,
@@ -116,20 +119,9 @@ def get_default_config(
116119
},
117120
}
118121

119-
# Add activation memory if enabled
120-
if config_dict.get("enable_activation_memory", False):
121-
config_dict["act_mem"] = {
122-
"backend": "kv_cache",
123-
"config": {
124-
"memory_filename": kwargs.get(
125-
"activation_memory_filename", "activation_memory.pickle"
126-
),
127-
"extractor_llm": {
128-
"backend": "openai",
129-
"config": openai_config,
130-
},
131-
},
132-
}
122+
# Note: act_mem configuration belongs in MemCube config (get_default_cube_config),
123+
# not in MOSConfig which doesn't have an act_mem field (extra="forbid").
124+
# The enable_activation_memory flag above is sufficient for MOSConfig.
133125

134126
return MOSConfig(**config_dict)
135127

@@ -237,21 +229,33 @@ def get_default_cube_config(
237229
},
238230
}
239231

240-
# Configure activation memory if enabled
232+
# Configure activation memory if enabled.
233+
# KV cache activation memory requires a local HuggingFace/vLLM model (it
234+
# extracts internal attention KV tensors via build_kv_cache), so it cannot
235+
# work with remote API backends like OpenAI.
236+
# Only create act_mem when activation_memory_backend is explicitly provided.
241237
act_mem_config = {}
242238
if kwargs.get("enable_activation_memory", False):
243-
act_mem_config = {
244-
"backend": "kv_cache",
245-
"config": {
246-
"memory_filename": kwargs.get(
247-
"activation_memory_filename", "activation_memory.pickle"
248-
),
249-
"extractor_llm": {
250-
"backend": "openai",
251-
"config": openai_config,
239+
extractor_backend = kwargs.get("activation_memory_backend")
240+
if extractor_backend in ("huggingface", "huggingface_singleton", "vllm"):
241+
act_mem_config = {
242+
"backend": "kv_cache",
243+
"config": {
244+
"memory_filename": kwargs.get(
245+
"activation_memory_filename", "activation_memory.pickle"
246+
),
247+
"extractor_llm": {
248+
"backend": extractor_backend,
249+
"config": kwargs.get("activation_memory_llm_config", {}),
250+
},
252251
},
253-
},
254-
}
252+
}
253+
else:
254+
logger.info(
255+
"Activation memory (kv_cache) requires a local model backend "
256+
"(huggingface/vllm) via activation_memory_backend kwarg. "
257+
"Skipping act_mem in MemCube config."
258+
)
255259

256260
# Create MemCube configuration
257261
cube_config_dict = {

0 commit comments

Comments
 (0)