diff --git a/MANIFEST.in b/MANIFEST.in index e50b514b..e3a43fcb 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ +include optillm.py include optillm/plugins/*.py include optillm/cepo/*.py include optillm/cepo/configs/*.yaml diff --git a/optillm/__init__.py b/optillm/__init__.py index cbb5521b..8c831666 100644 --- a/optillm/__init__.py +++ b/optillm/__init__.py @@ -2,7 +2,7 @@ import os # Version information -__version__ = "0.2.0" +__version__ = "0.2.1" # Get the path to the root optillm.py spec = util.spec_from_file_location( diff --git a/optillm/plugins/longcepo/mapreduce.py b/optillm/plugins/longcepo/mapreduce.py index 4afec2b5..66497afb 100644 --- a/optillm/plugins/longcepo/mapreduce.py +++ b/optillm/plugins/longcepo/mapreduce.py @@ -158,8 +158,8 @@ def fetch_map_response(client, model, chunk, query, system_prompt, summary): tokenizer, cb_log, longcepo_config, + irrelevance_tags, ) - result = remove_chunks(result, irrelevance_tags) if not result: return "No information", cb_log @@ -200,6 +200,7 @@ def collapse_chunks( tokenizer, cb_log: CBLog, longcepo_config: LongCepoConfig, + irrelevance_tags: Tuple[str] = ("[NO INFORMATION]",), ) -> Tuple[List[str], CBLog]: """ Collapses context chunk pairs in sliding window until the total token count fits within the context window. @@ -221,7 +222,7 @@ def collapse_chunks( num_tokens = get_prompt_length(format_chunk_list(context_chunks), tokenizer) token_budget = ( longcepo_config.max_context_window - - get_prompt_length(longcepo_config.collapse_prompt, tokenizer) + - get_prompt_length(longcepo_config.reduce_prompt, tokenizer) - longcepo_config.max_output_tokens ) logger.info(f"Pre-collapse length of chunks {num_tokens}, allowed {token_budget}") @@ -269,6 +270,7 @@ def fetch_collapse_response(client, model, docs, query, system_prompt): system_prompt, cb_log, ) + context_chunks = remove_chunks(context_chunks, irrelevance_tags) merge_pair_idx = (merge_pair_idx + 1) % max(len(context_chunks) - 1, 1) num_tokens = get_prompt_length(format_chunk_list(context_chunks), tokenizer) collapse_step += 1 diff --git a/pyproject.toml b/pyproject.toml index 4d68a74f..0a5cffe5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "optillm" -version = "0.2.0" +version = "0.2.1" description = "An optimizing inference proxy for LLMs." readme = "README.md" license = "Apache-2.0" @@ -81,4 +81,7 @@ optillm = [ "plugins/*.py", "cepo/*.py", "cepo/configs/*.yaml", -] \ No newline at end of file +] + +[tool.setuptools.data-files] +"" = ["optillm.py"] \ No newline at end of file