Skip to content

Commit 55f41cb

Browse files
committed
fix: Bump max tokens limits
1 parent 28121ee commit 55f41cb

File tree

6 files changed

+47
-31
lines changed

6 files changed

+47
-31
lines changed

.env.example

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,37 +17,53 @@ LITELLM_BASE_URL=http://localhost:11434/api
1717
# Can also use OPENAI_API_KEY
1818
LITELLM_API_KEY=sk-your-key-here
1919

20-
# Model to use for summarization (default: arc:apex)
20+
# Model to use for summarization (default: qwen3.5:27b)
2121
# Must be a valid model for your LITELLM_BASE_URL
22-
SUMMARY_MODEL=arc:apex
22+
SUMMARY_MODEL=qwen3.5:27b
2323

2424
# Deep research settings (static - configured via env, not per-request)
25-
DEEP_RESEARCH_STAGES=2 # Number of outline sections
26-
DEEP_RESEARCH_PASSES=1 # Research passes - each refines queries
27-
DEEP_RESEARCH_SUBQUERIES=5 # Queries per section per pass
28-
DEEP_RESEARCH_RESULTS_PER_QUERY=10 # Search results per subquery
29-
DEEP_RESEARCH_MAX_TOKENS=8000 # Max tokens for final essay
25+
# Number of outline sections
26+
DEEP_RESEARCH_STAGES=2
27+
# Research passes - each refines queries
28+
DEEP_RESEARCH_PASSES=1
29+
# Queries per section per pass
30+
DEEP_RESEARCH_SUBQUERIES=5
31+
# Search results per subquery
32+
DEEP_RESEARCH_RESULTS_PER_QUERY=10
33+
# Max tokens for final essay
34+
DEEP_RESEARCH_MAX_TOKENS=8000
3035

3136
# Content extraction - fetches full page text for richer synthesis
32-
DEEP_RESEARCH_CONTENT_EXTRACTION=true # Toggle page fetching on/off
33-
DEEP_RESEARCH_PAGES_PER_SECTION=3 # Top pages to extract per section
34-
DEEP_RESEARCH_CONTENT_MAX_CHARS=3000 # Max chars of extracted text per page
37+
# Toggle page fetching on/off
38+
DEEP_RESEARCH_CONTENT_EXTRACTION=true
39+
# Top pages to extract per section
40+
DEEP_RESEARCH_PAGES_PER_SECTION=3
41+
# Max chars of extracted text per page
42+
DEEP_RESEARCH_CONTENT_MAX_CHARS=3000
3543

3644
# Shallow research settings (static - configured via env, not per-request)
37-
SHALLOW_RESEARCH_STAGES=1 # Number of outline sections
38-
SHALLOW_RESEARCH_PASSES=1 # Research passes
39-
SHALLOW_RESEARCH_SUBQUERIES=3 # Queries per section per pass
40-
SHALLOW_RESEARCH_RESULTS_PER_QUERY=5 # Search results per subquery
41-
SHALLOW_RESEARCH_MAX_TOKENS=4000 # Max tokens for final essay
45+
# Number of outline sections
46+
SHALLOW_RESEARCH_STAGES=1
47+
# Research passes
48+
SHALLOW_RESEARCH_PASSES=1
49+
# Queries per section per pass
50+
SHALLOW_RESEARCH_SUBQUERIES=3
51+
# Search results per subquery
52+
SHALLOW_RESEARCH_RESULTS_PER_QUERY=5
53+
# Max tokens for final essay
54+
SHALLOW_RESEARCH_MAX_TOKENS=4000
4255

4356
# Shallow content extraction - typically disabled for faster responses
44-
SHALLOW_RESEARCH_CONTENT_EXTRACTION=false # Toggle page fetching on/off
45-
SHALLOW_RESEARCH_PAGES_PER_SECTION=2 # Top pages to extract per section
46-
SHALLOW_RESEARCH_CONTENT_MAX_CHARS=2000 # Max chars of extracted text per page
57+
# Toggle page fetching on/off
58+
SHALLOW_RESEARCH_CONTENT_EXTRACTION=false
59+
# Top pages to extract per section
60+
SHALLOW_RESEARCH_PAGES_PER_SECTION=2
61+
# Max chars of extracted text per page
62+
SHALLOW_RESEARCH_CONTENT_MAX_CHARS=2000
4763

4864
# Optional runtime settings
4965
ENABLE_SUMMARY=true
50-
SUMMARY_MAX_TOKENS=1024
51-
SEARXNG_TIMEOUT_SECONDS=30
52-
LLM_TIMEOUT_SECONDS=60
66+
SUMMARY_MAX_TOKENS=4000
67+
SEARXNG_TIMEOUT_SECONDS=60
68+
LLM_TIMEOUT_SECONDS=120
5369
LOG_LEVEL=INFO

artemis/config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ def get_settings() -> Settings:
349349
os.getenv("SEARXNG_API_BASE", "http://localhost:8888"),
350350
),
351351
searxng_timeout_seconds=_parse_float(
352-
"SEARXNG_TIMEOUT_SECONDS", 30.0, minimum=0.5, maximum=300.0
352+
"SEARXNG_TIMEOUT_SECONDS", 60.0, minimum=0.5, maximum=300.0
353353
),
354354
litellm_base_url=_validate_url(
355355
"LITELLM_BASE_URL",
@@ -360,9 +360,9 @@ def get_settings() -> Settings:
360360
llm_timeout_seconds=_parse_float(
361361
"LLM_TIMEOUT_SECONDS", 120.0, minimum=1.0, maximum=600.0
362362
),
363-
summary_model=os.getenv("SUMMARY_MODEL", "arc:apex").strip() or "arc:apex",
363+
summary_model=os.getenv("SUMMARY_MODEL", "qwen3.5:27b").strip() or "qwen3.5:27b",
364364
summary_max_tokens=_parse_int(
365-
"SUMMARY_MAX_TOKENS", 2000, minimum=512, maximum=16384
365+
"SUMMARY_MAX_TOKENS", 4000, minimum=512, maximum=16384
366366
),
367367
enable_summary=_parse_bool("ENABLE_SUMMARY", True),
368368
deep_research_stages=_parse_int(

artemis/main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ async def responses(
632632

633633
return ResponsesAPIResponse(
634634
id=str(uuid.uuid4()),
635-
created=_created_timestamp(),
635+
created_at=_created_timestamp(),
636636
model=preset_config.model_name,
637637
output=[
638638
_message_output(research_run.essay),
@@ -651,7 +651,7 @@ async def responses(
651651

652652
return ResponsesAPIResponse(
653653
id=str(uuid.uuid4()),
654-
created=_created_timestamp(),
654+
created_at=_created_timestamp(),
655655
model="artemis-search",
656656
output=[
657657
_message_output(summary or _fallback_text(results)),

artemis/models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ class ResponsesAPIResponse(BaseModel):
204204
205205
Attributes:
206206
id: Unique response identifier
207-
created: Unix timestamp of creation
207+
created_at: Unix timestamp of creation
208208
model: Model identifier used
209209
status: Always "completed" for successful responses
210210
output: List of message and/or search results blocks
@@ -214,7 +214,7 @@ class ResponsesAPIResponse(BaseModel):
214214

215215
id: str
216216
object: Literal["response"] = "response"
217-
created: int
217+
created_at: int
218218
model: str
219219
status: Literal["completed"] = "completed"
220220
output: list[AssistantMessage | SearchResultsBlock]

docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ services:
2424
- ALLOWED_ORIGINS=http://localhost:3000
2525
- ARTEMIS_API_KEY=${ARTEMIS_API_KEY:-}
2626
- ENABLE_SUMMARY=true
27-
- SUMMARY_MODEL=openai/gpt-4o-mini
27+
- SUMMARY_MODEL=qwen3.5:27b
2828
# Add your API key:
2929
# - OPENAI_API_KEY=sk-...
3030
depends_on:

tests/test_models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ class ResponsesAPIResponseTestCase(unittest.TestCase):
107107
def test_defaults(self) -> None:
108108
resp = ResponsesAPIResponse(
109109
id="test-id",
110-
created=1000,
110+
created_at=1000,
111111
model="artemis-search",
112112
output=[],
113113
)
@@ -119,7 +119,7 @@ def test_defaults(self) -> None:
119119
def test_full_response_serialization(self) -> None:
120120
resp = ResponsesAPIResponse(
121121
id="id-1",
122-
created=1234567890,
122+
created_at=1234567890,
123123
model="artemis-search",
124124
output=[
125125
AssistantMessage(

0 commit comments

Comments
 (0)