-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
59 lines (52 loc) · 1.65 KB
/
main.py
File metadata and controls
59 lines (52 loc) · 1.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import shutil
import signal
from knbase import KnowledgeBasesHub
from knbase_file_scanner import FileScannerModule
from knbase_pdf_parser import PDFParserModule
from knbase_index import IndexDatabase
def main() -> None:
temp_path = os.path.join(__file__, "..", "temp")
temp_path = os.path.abspath(temp_path)
knbase_path = "/Users/taozeyu/Downloads/TestPaper2/"
if os.path.exists(temp_path):
shutil.rmtree(temp_path)
os.makedirs(temp_path)
pdf_parser_module = PDFParserModule()
index_db = IndexDatabase(base_path=temp_path)
file_scanner_module = FileScannerModule(
db_path=os.path.join(temp_path, "file-scanner.sqlite3"),
preprocess_modules_map={"*": pdf_parser_module},
index_modules=[*index_db.modules],
)
knbases_hub = KnowledgeBasesHub(
db_path=os.path.join(temp_path, "main.sqlite3"),
preprocess_path=os.path.join(temp_path, "preprocess"),
scan_workers=2,
process_workers=2,
modules=(
file_scanner_module,
pdf_parser_module,
*index_db.modules,
),
)
index_db.set_hub(knbases_hub)
signal.signal(
signalnum=signal.SIGINT,
handler=lambda _1, _2: knbases_hub.interrupt(),
)
knbases_hub.create_knowledge_base(
resource_module=file_scanner_module,
resource_param={
"path": knbase_path,
},
)
knbases_hub.scan()
for row in index_db.query("一带一路", 5):
print(row.matching.name, row.metadata)
for segment in row.segments:
content = " ".join(segment.matched_tokens)
print(f" [{segment.start}-{segment.end}] fts5={segment.fts5_rank} vector={segment.vector_distance}")
print(" ", content)
if __name__ == "__main__":
main()