Skip to content

Commit bbe495a

Browse files
committed
Merge branch '3.5.x'
2 parents cb5f9dc + 4be7382 commit bbe495a

File tree

7 files changed

+259
-462
lines changed

7 files changed

+259
-462
lines changed

framework/fel/python/plugins/fel_llama_splitter_tools/callable_registers.py

Lines changed: 0 additions & 29 deletions
This file was deleted.

framework/fel/python/plugins/fel_llama_splitter_tools/llama_splitter_tool.py

Lines changed: 14 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import Tuple, List, Any, Callable
88

99
from fitframework import fit_logger
10+
from fitframework.api.decorators import fitable
1011
from llama_index.core.node_parser import (
1112
SentenceSplitter,
1213
TokenTextSplitter,
@@ -17,11 +18,11 @@
1718
from llama_index.core.schema import Document as LDocument
1819
from llama_index.embeddings.openai import OpenAIEmbedding
1920

20-
from .callable_registers import register_callable_tool
2121
from .node_utils import to_llama_index_document
22+
from .types.semantic_splitter_options import SemanticSplitterOptions
2223

23-
24-
def sentence_splitter(text: str, separator: str, chunk_size: int, chunk_overlap: int, **kwargs) -> List[str]:
24+
@fitable("llama.tools.sentence_splitter", "default")
25+
def sentence_splitter(text: str, separator: str, chunk_size: int, chunk_overlap: int) -> List[str]:
2526
"""Parse text with a preference for complete sentences."""
2627
if len(text) == 0:
2728
return []
@@ -38,7 +39,8 @@ def sentence_splitter(text: str, separator: str, chunk_size: int, chunk_overlap:
3839
return []
3940

4041

41-
def token_text_splitter(text: str, separator: str, chunk_size: int, chunk_overlap: int, **kwargs) -> List[str]:
42+
@fitable("llama.tools.token_text_splitter", "default")
43+
def token_text_splitter(text: str, separator: str, chunk_size: int, chunk_overlap: int) -> List[str]:
4244
"""Splitting text that looks at word tokens."""
4345
if len(text) == 0:
4446
return []
@@ -55,14 +57,15 @@ def token_text_splitter(text: str, separator: str, chunk_size: int, chunk_overla
5557
return []
5658

5759

58-
def semantic_splitter(buffer_size: int, breakpoint_percentile_threshold: int, docs: List[LDocument], **kwargs) \
60+
# @fitable("llama.tools.semantic_splitter", "default")
61+
def semantic_splitter(buffer_size: int, breakpoint_percentile_threshold: int, docs: List[LDocument], options: SemanticSplitterOptions) \
5962
-> List[BaseNode]:
6063
"""Splitting text that looks at word tokens."""
6164
if len(docs) == 0:
6265
return []
63-
api_key = kwargs.get("api_key")
64-
model_name = kwargs.get("model_name")
65-
api_base = kwargs.get("api_base")
66+
api_key = options.api_key
67+
model_name = options.model_name
68+
api_base = options.api_base
6669

6770
embed_model = OpenAIEmbedding(model_name=model_name, api_base=api_base, api_key=api_key, max_tokens=4096)
6871

@@ -80,8 +83,9 @@ def semantic_splitter(buffer_size: int, breakpoint_percentile_threshold: int, do
8083
return []
8184

8285

86+
# @fitable("llama.tools.sentence_window_node_parser", "default")
8387
def sentence_window_node_parser(window_size: int, window_metadata_key: str, original_text_metadata_key: str,
84-
docs: List[LDocument], **kwargs) -> List[BaseNode]:
88+
docs: List[LDocument]) -> List[BaseNode]:
8589
"""Splitting text that looks at word tokens."""
8690
if len(docs) == 0:
8791
return []
@@ -96,26 +100,4 @@ def sentence_window_node_parser(window_size: int, window_metadata_key: str, orig
96100
except BaseException:
97101
fit_logger.error("Invoke semantic splitter failed.")
98102
traceback.print_exc()
99-
return []
100-
101-
102-
# Tuple 结构: (tool_func, config_args, return_description)
103-
splitter_basic_toolkit: List[Tuple[Callable[..., Any], List[str], str]] = [
104-
(sentence_splitter, ["text", "separator", "chunk_size", "chunk_overlap"], "Split sentences by sentence."),
105-
(token_text_splitter, ["text", "separator", "chunk_size", "chunk_overlap"], "Split sentences by token."),
106-
(semantic_splitter,
107-
["docs", "buffer_size", "breakpoint_percentile_threshold", "chunk_overlap", "model_name", "api_key", "api_base"],
108-
"Split sentences by semantic."),
109-
(sentence_window_node_parser, ["docs", "window_size", "window_metadata_key", "original_text_metadata_key"],
110-
"Splits all documents into individual sentences")
111-
]
112-
113-
for tool in splitter_basic_toolkit:
114-
register_callable_tool(tool, sentence_splitter.__module__, "llama_index.rag.toolkit")
115-
116-
if __name__ == '__main__':
117-
import time
118-
from .llama_schema_helper import dump_llama_schema
119-
120-
current_timestamp = time.strftime('%Y%m%d%H%M%S')
121-
dump_llama_schema(splitter_basic_toolkit, f"./llama_tool_schema-{str(current_timestamp)}.json")
103+
return []

0 commit comments

Comments
 (0)