1- from typing import Iterator
2- from abc import ABC , abstractmethod
1+ from abc import ABC
2+ from typing import Optional , Iterator
33from graphrag_sdk .document import Document
44from graphrag_sdk .document_loaders import (
55 PDFLoader ,
88 HTMLLoader ,
99 CSVLoader ,
1010 JSONLLoader ,
11+ StringLoader ,
1112)
1213
1314
14- def Source (path : str , instruction : str | None = None ) -> "AbstractSource" :
15+ def Source (path : str , instruction : Optional [ str ] = None ) -> "AbstractSource" :
1516 """
1617 Creates a source object
1718
18- Parameters :
19+ Args :
1920 path (str): path to source
2021 instruction (str): source specific instruction for the LLM
2122
2223 Returns:
23- AbstractSource: source
24+ AbstractSource: A source object corresponding to the input path format.
2425 """
2526
2627 if not isinstance (path , str ) or path == "" :
2728 raise Exception ("Invalid argument, path should be a none empty string." )
2829
29- s = None
30-
3130 if ".pdf" in path .lower ():
3231 s = PDF (path )
3332 elif ".html" in path .lower ():
@@ -38,33 +37,54 @@ def Source(path: str, instruction: str | None = None) -> "AbstractSource":
3837 s = CSV (path )
3938 elif ".jsonl" in path .lower ():
4039 s = JSONL (path )
41- else :
40+ elif ".txt" in path . lower () :
4241 s = TEXT (path )
42+ else :
43+ raise Exception ("Unsupported file format." )
4344
4445 # Set source instructions
4546 s .instruction = instruction
4647
4748 return s
4849
50+ def Source_FromRawText (text : str , instruction : Optional [str ] = None ) -> "AbstractSource" :
51+ """
52+ Creates a source object from raw text
53+
54+ Args:
55+ text (str): raw text
56+ instruction (str): source specific instruction for the LLM
57+
58+ Returns:
59+ AbstractSource: A string source object.
60+ """
61+ if not isinstance (text , str ) or text == "" :
62+ raise Exception ("Invalid argument, text should be a none empty string." )
63+
64+ s = STRING (text )
65+ s .instruction = instruction
66+
67+ return s
68+
4969
5070class AbstractSource (ABC ):
5171 """
5272 Abstract class representing a source file
5373 """
5474
55- def __init__ (self , path : str ):
75+ def __init__ (self , data_source : str ):
5676 """
5777 Initializes a new instance of the Source class.
5878
5979 Args:
60- path (str): The path to the source file .
80+ data_source (str): Either a file path or a string .
6181
6282 Attributes:
63- path (str): The path to the source file .
64- loader: The loader object associated with the source file .
83+ data_source (str): The source path for the data or the data as a string .
84+ loader: The loader object associated with the source.
6585 instruction (str): The instruction for the source file.
6686 """
67- self .path = path
87+ self .data_source = data_source
6888 self .loader = None
6989 self .instruction = ""
7090
@@ -90,73 +110,82 @@ def __eq__(self, other) -> bool:
90110 if not isinstance (other , AbstractSource ):
91111 return False
92112
93- return self .path == other .path
113+ return self .data_source == other .data_source
94114
95115 def __hash__ (self ):
96116 """
97- Calculates the hash value of the Source object based on its path .
117+ Calculates the hash value of the Source object based on its data_source .
98118
99119 Returns:
100120 int: The hash value of the Source object.
101121 """
102- return hash (self .path )
122+ return hash (self .data_source )
103123
104124
105125class PDF (AbstractSource ):
106126 """
107127 PDF resource
108128 """
109129
110- def __init__ (self , path ):
111- super ().__init__ (path )
112- self .loader = PDFLoader (self .path )
130+ def __init__ (self , data_source ):
131+ super ().__init__ (data_source )
132+ self .loader = PDFLoader (self .data_source )
113133
114134
115135class TEXT (AbstractSource ):
116136 """
117137 TEXT resource
118138 """
119139
120- def __init__ (self , path ):
121- super ().__init__ (path )
122- self .loader = TextLoader (self .path )
140+ def __init__ (self , data_source ):
141+ super ().__init__ (data_source )
142+ self .loader = TextLoader (self .data_source )
123143
124144
125145class URL (AbstractSource ):
126146 """
127147 URL resource
128148 """
129149
130- def __init__ (self , path ):
131- super ().__init__ (path )
132- self .loader = URLLoader (self .path )
150+ def __init__ (self , data_source ):
151+ super ().__init__ (data_source )
152+ self .loader = URLLoader (self .data_source )
133153
134154
135155class HTML (AbstractSource ):
136156 """
137157 HTML resource
138158 """
139159
140- def __init__ (self , path ):
141- super ().__init__ (path )
142- self .loader = HTMLLoader (self .path )
160+ def __init__ (self , data_source ):
161+ super ().__init__ (data_source )
162+ self .loader = HTMLLoader (self .data_source )
143163
144164
145165class CSV (AbstractSource ):
146166 """
147167 CSV resource
148168 """
149169
150- def __init__ (self , path , rows_per_document : int = 50 ):
151- super ().__init__ (path )
152- self .loader = CSVLoader (self .path , rows_per_document )
170+ def __init__ (self , data_source , rows_per_document : int = 50 ):
171+ super ().__init__ (data_source )
172+ self .loader = CSVLoader (self .data_source , rows_per_document )
153173
154174
155175class JSONL (AbstractSource ):
156176 """
157177 JSONL resource
158178 """
159179
160- def __init__ (self , path , rows_per_document : int = 50 ):
161- super ().__init__ (path )
162- self .loader = JSONLLoader (self .path , rows_per_document )
180+ def __init__ (self , data_source , rows_per_document : int = 50 ):
181+ super ().__init__ (data_source )
182+ self .loader = JSONLLoader (self .data_source , rows_per_document )
183+
184+ class STRING (AbstractSource ):
185+ """
186+ String resource
187+ """
188+
189+ def __init__ (self , data_source : str ):
190+ super ().__init__ (data_source )
191+ self .loader = StringLoader (self .data_source )
0 commit comments