|
1 | 1 | import copy |
2 | 2 | import re |
3 | 3 | from abc import ABC, abstractmethod |
4 | | -from dataclasses import dataclass |
5 | 4 | from typing import Callable, Iterable, List, Literal, Optional, Union |
6 | 5 |
|
7 | 6 | from graphgen.bases.datatypes import Chunk |
8 | 7 | from graphgen.utils import logger |
9 | 8 |
|
10 | 9 |
|
11 | | -@dataclass |
12 | 10 | class BaseSplitter(ABC): |
13 | 11 | """ |
14 | 12 | Abstract base class for splitting text into smaller chunks. |
15 | 13 | """ |
16 | 14 |
|
17 | | - chunk_size: int = 1024 |
18 | | - chunk_overlap: int = 100 |
19 | | - length_function: Callable[[str], int] = len |
20 | | - keep_separator: bool = False |
21 | | - add_start_index: bool = False |
22 | | - strip_whitespace: bool = True |
| 15 | + def __init__( |
| 16 | + self, |
| 17 | + chunk_size: int = 1024, |
| 18 | + chunk_overlap: int = 100, |
| 19 | + length_function: Callable[[str], int] = len, |
| 20 | + keep_separator: bool = False, |
| 21 | + add_start_index: bool = False, |
| 22 | + strip_whitespace: bool = True, |
| 23 | + ): |
| 24 | + self.chunk_size = chunk_size |
| 25 | + self.chunk_overlap = chunk_overlap |
| 26 | + self.length_function = length_function |
| 27 | + self.keep_separator = keep_separator |
| 28 | + self.add_start_index = add_start_index |
| 29 | + self.strip_whitespace = strip_whitespace |
23 | 30 |
|
24 | 31 | @abstractmethod |
25 | 32 | def split_text(self, text: str) -> List[str]: |
|
0 commit comments