Skip to content

Commit 6926175

Browse files
committed
fix context length detectoin
1 parent 9ad3ae8 commit 6926175

12 files changed

+760
-336
lines changed

DEVELOPMENT_INSTRUCTIONS.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@
44
- Make sure there are concise and up to date docstrings that document usage.
55
- Debug information belongs into the command line logs, not in the app UI/UX.
66
- Always develop a generic solution, do not use content from specific examples in the code
7-
- Never include content from example documents in the source code. Never leak content from provided examples into test code!
7+
- Never include content from example documents in the source code. Never leak content from provided examples into test code!
8+
- If you create new .py files for testing or debugging, place them in the experiments folder. Delete them after they are no longer useful. If they yield meaningful unit tests, integrate them into the test suite.

app.py

Lines changed: 114 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,12 @@ def get_available_models():
135135
# Direct execution - use default ollama client
136136
models_info = ollama.list()
137137

138-
if 'models' in models_info:
138+
# Handle both dict and ListResponse object
139+
if hasattr(models_info, 'models'):
140+
models_list = models_info.models
141+
return [model.model if hasattr(model, 'model') else model.get('model', model.get('name', ''))
142+
for model in models_list]
143+
elif isinstance(models_info, dict) and 'models' in models_info:
139144
return [model.get('model', model.get('name', ''))
140145
for model in models_info['models']]
141146
return []
@@ -170,52 +175,57 @@ def get_model_info(model_name):
170175

171176
@staticmethod
172177
def get_context_length(model_name):
173-
"""Get the context length for a specific model"""
178+
"""Get the context length for a specific model - returns None if cannot be determined"""
174179
model_info = ModelManager.get_model_info(model_name)
175180

176181
# Try to get context length from model parameters first
177182
if model_info:
178183
try:
179-
# Check in parameters first
180-
if 'parameters' in model_info:
181-
params = model_info['parameters']
182-
if 'num_ctx' in params:
183-
return int(params['num_ctx'])
184+
# Check in parameters first (handle both dict and object)
185+
parameters = None
186+
if hasattr(model_info, 'parameters'):
187+
parameters = model_info.parameters
188+
elif isinstance(model_info, dict) and 'parameters' in model_info:
189+
parameters = model_info['parameters']
184190

185-
# Check in model details/template
186-
if 'details' in model_info:
187-
details = model_info['details']
188-
if 'parameter_size' in details:
189-
# Some models have context info in details
190-
pass
191+
if parameters:
192+
num_ctx = None
193+
if hasattr(parameters, 'get') and parameters.get('num_ctx'):
194+
num_ctx = parameters['num_ctx']
195+
elif hasattr(parameters, 'num_ctx'):
196+
num_ctx = parameters.num_ctx
197+
elif isinstance(parameters, dict) and 'num_ctx' in parameters:
198+
num_ctx = parameters['num_ctx']
199+
200+
if num_ctx:
201+
return int(num_ctx)
202+
203+
# Check in modelfile for PARAMETER num_ctx
204+
modelfile = None
205+
if hasattr(model_info, 'modelfile'):
206+
modelfile = model_info.modelfile
207+
elif isinstance(model_info, dict) and 'modelfile' in model_info:
208+
modelfile = model_info['modelfile']
209+
210+
if modelfile:
211+
import re
212+
ctx_match = re.search(r'PARAMETER\s+num_ctx\s+(\d+)', modelfile, re.IGNORECASE)
213+
if ctx_match:
214+
return int(ctx_match.group(1))
215+
216+
# Get model family for fallback detection
217+
family = None
218+
if hasattr(model_info, 'details') and hasattr(model_info.details, 'family'):
219+
family = model_info.details.family
220+
elif isinstance(model_info, dict) and 'details' in model_info and 'family' in model_info['details']:
221+
family = model_info['details']['family']
191222

192223
except Exception as e:
193224
logger.warning(f"Error parsing model info for {model_name}: {e}")
194225

195-
# Fallback to default context lengths for common model families
196-
try:
197-
model_lower = model_name.lower()
198-
if 'llama3.1' in model_lower or 'llama-3.1' in model_lower:
199-
return 131072 # 128k context
200-
elif 'llama3' in model_lower or 'llama-3' in model_lower:
201-
return 8192 # 8k context
202-
elif 'llama2' in model_lower or 'llama-2' in model_lower:
203-
return 4096 # 4k context
204-
elif 'mistral' in model_lower:
205-
return 32768 # 32k context for most Mistral models
206-
elif 'codellama' in model_lower:
207-
return 16384 # 16k context
208-
elif 'deepseek' in model_lower:
209-
return 32768 # 32k context
210-
elif 'qwen' in model_lower:
211-
return 32768 # 32k context
212-
else:
213-
# Default fallback
214-
return 2048
215-
216-
except Exception as e:
217-
logger.warning(f"Error parsing context length for {model_name}: {e}")
218-
return 2048 # Conservative default
226+
# Return None if we truly cannot determine it
227+
logger.warning(f"Could not determine context length for {model_name} - no explicit parameter found and unknown model family")
228+
return None
219229

220230
class ContextChecker:
221231
"""Utility class for checking context window compatibility"""
@@ -247,24 +257,76 @@ def estimate_token_count(text):
247257
return max(char_based, word_based)
248258

249259
@staticmethod
250-
def check_document_fits_context(document_text, model_name, system_prompt_length=2000):
260+
def check_document_fits_context(document_text, model_name, user_prompt=""):
251261
"""Check if document + system prompt fits in model's context window"""
252262
if not document_text or not model_name:
253263
return True, None, None
254264

255265
context_length = ModelManager.get_context_length(model_name)
256266
if context_length is None:
257-
return True, None, "Could not determine model context length"
267+
return True, None, f"Cannot determine context length for model '{model_name}'. Context checking skipped."
258268

259-
doc_tokens = ContextChecker.estimate_token_count(document_text)
260-
total_tokens = doc_tokens + system_prompt_length # Reserve space for system prompt and user query
269+
# Generate the actual system prompt to measure its real size
270+
system_prompt = f"""You are a document analysis assistant. Answer questions ONLY using information from this document:
271+
272+
DOCUMENT CONTENT:
273+
{document_text}
274+
275+
INITIAL CHECK:
276+
First, verify you have received document content above. If the document is empty or missing, respond: "Error: No document content received, cannot proceed."
277+
278+
RESPONSE RULES:
279+
Choose ONE approach based on whether the document contains relevant information:
280+
281+
1. **IF ANSWERABLE**: Provide a complete answer with citations
282+
- Every factual claim must have a citation [1], [2], etc.
283+
- List citations at the end using this exact format:
284+
[1] "exact quote from document"
285+
[2] "another exact quote"
286+
287+
2. **IF NOT ANSWERABLE**: Decline to answer
288+
- State: "I cannot answer this based on the document"
289+
- Do NOT include any citations when declining
290+
- Do not attempt to answer the question with your own knowledge.
291+
292+
CITATION GUIDELINES:
293+
- Use verbatim quotes in their original language (never translate)
294+
- Quote meaningful phrases (3-8 words) that provide context
295+
- Include descriptive context around numbers/measurements
296+
- Each citation on its own line
297+
298+
LANGUAGE RULES:
299+
- Respond in the user's language
300+
- Keep citations in the document's original language
301+
302+
EXAMPLE - Answerable:
303+
Q: Does he have medical experience?
304+
A: Yes, he has experience in medical applications. [1]
305+
306+
[1] "project development for AI applications: medical data mining & AI"
307+
308+
EXAMPLE - Not answerable:
309+
Q: What's his favorite language?
310+
A: I cannot answer this based on the document.
311+
"""
312+
313+
# Measure actual token counts
314+
system_tokens = ContextChecker.estimate_token_count(system_prompt)
315+
user_tokens = ContextChecker.estimate_token_count(user_prompt)
316+
317+
# Reserve space for response (conservative estimate)
318+
response_reserve = 1000
319+
320+
total_tokens = system_tokens + user_tokens + response_reserve
261321

262322
fits = total_tokens <= context_length
263323
usage_percent = (total_tokens / context_length) * 100
264324

265325
return fits, {
266326
'context_length': context_length,
267-
'document_tokens': doc_tokens,
327+
'system_tokens': system_tokens,
328+
'user_tokens': user_tokens,
329+
'response_reserve': response_reserve,
268330
'total_estimated_tokens': total_tokens,
269331
'usage_percent': usage_percent,
270332
'available_tokens': context_length - total_tokens
@@ -450,6 +512,13 @@ def render_document_upload(chat_manager):
450512
else:
451513
st.success(f"✅ **Good fit** - Uses {usage_percent:.0f}% of {context_info['context_length']:,} tokens")
452514
st.caption(f"~{context_info['available_tokens']:,} tokens remaining for conversation")
515+
516+
# Show breakdown in expander
517+
with st.expander("📊 Token Breakdown", expanded=False):
518+
st.write(f"**System prompt:** ~{context_info['system_tokens']:,} tokens")
519+
st.write(f"**Response reserve:** ~{context_info['response_reserve']:,} tokens")
520+
st.write(f"**Total estimated:** ~{context_info['total_estimated_tokens']:,} tokens")
521+
st.write(f"**Context limit:** {context_info['context_length']:,} tokens")
453522
else:
454523
st.info("💡 Select a model to check context window compatibility")
455524

@@ -507,7 +576,7 @@ def render_chat_interface(chat_manager):
507576
st.progress(progress_value, text=f"Context Usage: {usage_percent:.1f}%")
508577

509578
# Show brief summary
510-
st.caption(f"~{context_info['document_tokens']:,} tokens / {context_info['context_length']:,} limit")
579+
st.caption(f"~{context_info['system_tokens']:,} tokens / {context_info['context_length']:,} limit")
511580

512581
# Show PDF and extracted text side by side
513582
if chat.get("document_content") and chat.get("document_text"):
@@ -754,7 +823,8 @@ def main():
754823

755824
if error:
756825
st.markdown("---")
757-
st.warning(f"⚠️ Could not check context compatibility: {error}")
826+
st.info(f"ℹ️ {error}")
827+
st.caption("Context checking requires model configuration that includes context window size.")
758828
elif context_info:
759829
usage_percent = context_info['usage_percent']
760830

experiments/README.md

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Experiments and Development Tools
2+
3+
This folder contains development tools, debugging scripts, and experimental code that are useful for development but not part of the main application or test suite.
4+
5+
## Files
6+
7+
### `debug_context.py`
8+
**Purpose**: Debug and analyze context window calculations for Ollama models.
9+
10+
**Usage**:
11+
```bash
12+
# Debug context detection for all available models
13+
python experiments/debug_context.py
14+
15+
# Test context calculation with sample text
16+
python experiments/debug_context.py test <model_name> <sample_text>
17+
```
18+
19+
**Features**:
20+
- Detects context lengths for available Ollama models
21+
- Compares actual vs detected context lengths
22+
- Tests document fitting within context windows
23+
- Shows token usage statistics and warnings
24+
25+
### `simplified_extraction_demo.py`
26+
**Purpose**: Demonstrates the streamlined PDF extraction approach using PyMuPDF4LLM.
27+
28+
**Usage**:
29+
```bash
30+
python experiments/simplified_extraction_demo.py
31+
```
32+
33+
**Features**:
34+
- Shows automatic structure detection
35+
- Demonstrates section extraction
36+
- Highlights benefits of the simplified approach
37+
- Automatically finds PDF files in current directory
38+
39+
### `test_improved_extraction.py`
40+
**Purpose**: Comprehensive testing tool for PDF extraction quality and methods.
41+
42+
**Usage**:
43+
```bash
44+
python experiments/test_improved_extraction.py <pdf_file>
45+
```
46+
47+
**Features**:
48+
- Tests multiple extraction methods
49+
- Analyzes text quality and structure preservation
50+
- Compares different approaches
51+
- Saves extracted text for inspection
52+
- Shows document statistics and metadata
53+
54+
## When to Use These Tools
55+
56+
- **During development**: When working on extraction or context handling features
57+
- **For debugging**: When troubleshooting issues with specific models or documents
58+
- **For analysis**: When evaluating extraction quality or performance
59+
- **For demonstrations**: When showing capabilities to stakeholders
60+
61+
## Integration with Main Codebase
62+
63+
These tools import from the main `ragnarok` package and `app.py`, so they test the actual production code. They're kept separate to avoid cluttering the main application while remaining useful for development.

0 commit comments

Comments
 (0)