Skip to content

Commit 3680687

Browse files
RIO ISHIIclaude
authored andcommitted
Phase 4: openai/litellm/pydantic-ai を optional extra に隔離、dump/load を制御可能に
- requirements.txt から openai, litellm, pydantic-ai-slim を削除 - requirements/llm.txt に隔離(pip install rdagent[llm] で復元) - pyproject.toml に llm optional extra 追加 - rdagent/oai/backend/__init__.py: トップレベル import 削除(動的ロードに委譲) - rdagent/oai/utils/embedding.py: litellm を try/except ガード - rdagent/scenarios/finetune/scen/utils.py: litellm を try/except ガード - rdagent/log/ui/ds_trace.py: litellm を try/except ガード - rdagent/app/utils/health_check.py: litellm を try/except ガード - rdagent/utils/workflow/loop.py: use_pickle_session フラグ追加 Claude Code = LLM 自身なので Python レベルの SDK は不要。 Claudex factor シナリオは SDK なしで動作する。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 52f3a5e commit 3680687

File tree

9 files changed

+70
-26
lines changed

9 files changed

+70
-26
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ dependencies = {file = ["requirements.txt"]}
110110
[tool.setuptools.dynamic.optional-dependencies]
111111
docs = {file = ["requirements/docs.txt"]}
112112
lint = {file = ["requirements/lint.txt"]}
113+
llm = {file = ["requirements/llm.txt"]} # legacy LLM SDK deps (litellm/openai/pydantic-ai); not needed for Claudex
113114
package = {file = ["requirements/package.txt"]}
114115
test = {file = ["requirements/test.txt"]}
115116
torch = {file = ["requirements/torch.txt"]} # some agent algorithms need torch. pip install rdagent[torch]

rdagent/app/utils/health_check.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,16 @@
33

44
import docker
55
import fire
6-
import litellm
76
import typer
8-
from litellm import completion, embedding
9-
from litellm.utils import ModelResponse
7+
8+
try:
9+
import litellm
10+
from litellm import completion, embedding
11+
from litellm.utils import ModelResponse
12+
13+
_litellm_available = True
14+
except ImportError:
15+
_litellm_available = False
1016
from typing_extensions import Annotated
1117

1218
from rdagent.log import rdagent_logger as logger

rdagent/log/ui/ds_trace.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010
import pandas as pd
1111
import plotly.express as px
1212
import streamlit as st
13-
from litellm import get_valid_models
13+
try:
14+
from litellm import get_valid_models
15+
except ImportError:
16+
get_valid_models = lambda: [] # noqa: E731
1417
from streamlit import session_state as state
1518

1619
from rdagent.app.data_science.loop import DataScienceRDLoop

rdagent/oai/backend/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
from .litellm import LiteLLMAPIBackend
1+
# Backend is resolved dynamically via import_class(LLM_SETTINGS.backend).
2+
# No top-level imports here — litellm/openai are optional (pip install rdagent[llm]).

rdagent/oai/utils/embedding.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,14 @@
44

55
from typing import Optional
66

7-
from litellm import decode, encode, get_max_tokens, token_counter
8-
97
from rdagent.log import rdagent_logger as logger
8+
9+
try:
10+
from litellm import decode, encode, get_max_tokens, token_counter
11+
12+
_litellm_available = True
13+
except ImportError:
14+
_litellm_available = False
1015
from rdagent.oai.llm_conf import LLM_SETTINGS
1116

1217
# Common embedding model token limits
@@ -46,12 +51,13 @@ def get_embedding_max_tokens(model: str) -> int:
4651
model_name = model.split("/")[-1] if "/" in model else model
4752

4853
# Level 1: Try litellm
49-
try:
50-
max_tokens = get_max_tokens(model_name)
51-
if max_tokens and max_tokens > 0:
52-
return max_tokens
53-
except Exception as e:
54-
logger.warning(f"Failed to get max tokens for {model_name}: {e}")
54+
if _litellm_available:
55+
try:
56+
max_tokens = get_max_tokens(model_name)
57+
if max_tokens and max_tokens > 0:
58+
return max_tokens
59+
except Exception as e:
60+
logger.warning(f"Failed to get max tokens for {model_name}: {e}")
5561

5662
# Level 2: Query mapping table
5763
if model_name in EMBEDDING_MODEL_LIMITS:
@@ -86,6 +92,14 @@ def trim_text_for_embedding(text: str, model: str, max_tokens: Optional[int] = N
8692
# Apply safety margin
8793
safe_max_tokens = int(max_tokens * 0.9)
8894

95+
if not _litellm_available:
96+
# Without litellm, use a rough character-based truncation
97+
char_limit = safe_max_tokens * 4 # ~4 chars per token heuristic
98+
if len(text) > char_limit:
99+
logger.warning(f"litellm not available; truncating text by character limit ({char_limit} chars)")
100+
return text[:char_limit]
101+
return text
102+
89103
# Calculate current token count
90104
current_tokens = token_counter(model=model, text=text)
91105

rdagent/scenarios/finetune/scen/utils.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,13 @@
66

77
import numpy as np
88
import pandas as pd
9-
import litellm
9+
10+
try:
11+
import litellm
12+
13+
_litellm_available = True
14+
except ImportError:
15+
_litellm_available = False
1016

1117
from rdagent.app.finetune.llm.conf import FT_RD_SETTING
1218
from rdagent.core.utils import cache_with_pickle
@@ -101,13 +107,16 @@ def _compute_column_stats(data: list[dict]) -> dict[str, dict]:
101107
texts.append(val)
102108

103109
if texts:
104-
try:
105-
token_counts = [
106-
litellm.token_counter(model=_TOKENIZER_MODEL, text=t)
107-
for t in texts
108-
]
109-
except Exception as e:
110-
logger.warning(f"Token counting failed for column '{col}': {e}, falling back to char/4")
110+
if _litellm_available:
111+
try:
112+
token_counts = [
113+
litellm.token_counter(model=_TOKENIZER_MODEL, text=t)
114+
for t in texts
115+
]
116+
except Exception as e:
117+
logger.warning(f"Token counting failed for column '{col}': {e}, falling back to char/4")
118+
token_counts = [len(t) // 4 for t in texts]
119+
else:
111120
token_counts = [len(t) // 4 for t in texts]
112121

113122
column_stats[col] = {

rdagent/utils/workflow/loop.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ class LoopTerminationError(Exception):
112112
class LoopResumeError(Exception):
113113
"""Exception raised when loop conditions indicate the loop should stop all coroutines and resume"""
114114

115-
def __init__(self) -> None:
115+
def __init__(self, *, use_pickle_session: bool = True) -> None:
116116
# progress control
117117
self.loop_idx: int = 0 # current loop index / next loop index to kickoff
118118
self.step_idx: defaultdict[int, int] = defaultdict(int) # dict from loop index to next step index
@@ -133,6 +133,9 @@ def __init__(self) -> None:
133133

134134
self.semaphores: dict[str, asyncio.Semaphore] = {}
135135

136+
# When False, skip pickle-based session dump/load (e.g. Claudex adapters use artifact JSON as SSOT)
137+
self.use_pickle_session: bool = use_pickle_session
138+
136139
def get_unfinished_loop_cnt(self, next_loop: int) -> int:
137140
n = 0
138141
for li in range(next_loop):
@@ -300,7 +303,7 @@ async def _run_step(self, li: int, force_subproc: bool = False) -> None:
300303
# Save snapshot after completing the step;
301304
# 1) It has to be after the step_idx is updated, so loading the snapshot will be on the right step.
302305
# 2) Only save it when the step forward, withdraw does not worth saving.
303-
if name in self.loop_prev_out[li]:
306+
if self.use_pickle_session and name in self.loop_prev_out[li]:
304307
# 3) Only dump the step if (so we don't have to redo the step when we load the session again)
305308
# it has been executed successfully
306309
self.dump(self.session_folder / f"{li}" / f"{si}_{name}")
@@ -403,6 +406,9 @@ async def run(self, step_n: int | None = None, loop_n: int | None = None, all_du
403406
self.close_pbar()
404407

405408
def withdraw_loop(self, loop_idx: int) -> None:
409+
if not self.use_pickle_session:
410+
logger.warning(f"Pickle session disabled; cannot withdraw loop {loop_idx}. Skipping.")
411+
return
406412
prev_session_dir = self.session_folder / str(loop_idx - 1)
407413
prev_path = min(
408414
(p for p in prev_session_dir.glob("*_*") if p.is_file()),

requirements.txt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@ filelock
77
loguru
88
fire
99
fuzzywuzzy
10-
openai
11-
litellm>=1.73 # to support `from litellm import get_valid_models`
1210
azure.identity
1311
pyarrow
1412
rich
@@ -70,7 +68,6 @@ azureml-mlflow
7068
types-pytz
7169

7270
# Agent
73-
pydantic-ai-slim[mcp,openai,prefect]
7471
nest-asyncio
7572

7673
# visualize SFT train

requirements/llm.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Legacy LLM SDK dependencies — only needed for non-Claudex scenarios
2+
# (kaggle, data_science, finetune) that still use Python-level LLM calls.
3+
# Claudex factor scenario does not require these.
4+
# Install with: pip install rdagent[llm]
5+
openai
6+
litellm>=1.73
7+
pydantic-ai-slim[mcp,openai,prefect]

0 commit comments

Comments
 (0)