Skip to content

Commit f4ced61

Browse files
authored
v8.2
1 parent 9e8c475 commit f4ced61

12 files changed

+756
-522
lines changed

src/database_interactions.py

Lines changed: 186 additions & 121 deletions
Large diffs are not rendered by default.

src/document_processor.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,19 @@
2929

3030
from typing import Optional, Any, Iterator, Union
3131
from langchain_community.document_loaders.blob_loaders import Blob
32-
3332
from langchain_community.document_loaders.parsers import PyMuPDFParser
3433
import pymupdf
3534

3635
from constants import DOCUMENT_LOADERS
3736
from extract_metadata import extract_document_metadata, add_pymupdf_page_metadata, compute_content_hash
3837

39-
logging.basicConfig(
40-
level=logging.ERROR,
41-
format='%(asctime)s - %(levelname)s - %(message)s',
42-
handlers=[
43-
logging.FileHandler('document_processor.log', mode='w')
44-
]
45-
)
38+
# logging.basicConfig(
39+
# level=logging.ERROR,
40+
# format='%(asctime)s - %(levelname)s - %(message)s',
41+
# handlers=[
42+
# logging.FileHandler('document_processor.log', mode='w')
43+
# ]
44+
# )
4645

4746
warnings.filterwarnings("ignore", category=FutureWarning)
4847
warnings.filterwarnings("ignore", category=UserWarning)
@@ -55,12 +54,12 @@
5554
from typing import List
5655

5756
class FixedSizeTextSplitter:
58-
"""Splits text into equally-sized character chunks without regex.
57+
"""Splits text into equally-sized character chunks.
5958
6059
Parameters
6160
----------
6261
chunk_size : int
63-
Maximum characters per chunk. Comes straight from config.yaml.
62+
Maximum characters per chunk. Taken from config.yaml.
6463
"""
6564

6665
def __init__(self, chunk_size: int):
@@ -104,7 +103,7 @@ def __init__(self, file_path: Union[str, PurePath], **kwargs: Any) -> None:
104103
extract_images=kwargs.get('extract_images', False)
105104
)
106105

107-
# ensure all loader class names map correctly
106+
# map loaders
108107
for ext, loader_name in DOCUMENT_LOADERS.items():
109108
DOCUMENT_LOADERS[ext] = globals()[loader_name]
110109

src/gui_tabs_settings.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,42 @@
11
import logging
22
from functools import partial
3-
from PySide6.QtWidgets import QVBoxLayout, QGroupBox, QPushButton, QHBoxLayout, QWidget, QMessageBox
3+
from PySide6.QtWidgets import (
4+
QVBoxLayout,
5+
QGroupBox,
6+
QPushButton,
7+
QHBoxLayout,
8+
QWidget,
9+
QMessageBox,
10+
)
411
from gui_tabs_settings_server import ServerSettingsTab
512
from gui_tabs_settings_database_create import ChunkSettingsTab
613
from gui_tabs_settings_database_query import DatabaseSettingsTab
714
from gui_tabs_settings_tts import TTSSettingsTab
815
from gui_tabs_settings_vision import VisionSettingsTab
916

17+
1018
def update_all_configs(configs):
1119
updated = False
1220
for config in configs.values():
1321
updated = config.update_config() or updated
1422
if updated:
1523
logging.info("config.yaml file updated")
16-
17-
message = 'Settings Updated' if updated else 'No Updates'
18-
details = 'One or more settings have been updated.' if updated else 'No new settings were entered.'
19-
24+
25+
message = "Settings Updated" if updated else "No Updates"
26+
details = (
27+
"One or more settings have been updated."
28+
if updated
29+
else "No new settings were entered."
30+
)
31+
2032
QMessageBox.information(None, message, details)
2133

34+
2235
def adjust_stretch(groups, layout):
2336
for group, factor in groups.items():
2437
layout.setStretchFactor(group, factor if group.isChecked() else 0)
2538

39+
2640
class GuiSettingsTab(QWidget):
2741
def __init__(self):
2842
super(GuiSettingsTab, self).__init__()
@@ -34,6 +48,7 @@ def __init__(self):
3448
}
3549
self.groups = {}
3650
self.configs = {}
51+
3752
for title, (TabClass, stretch) in classes.items():
3853
settings = TabClass()
3954
group = QGroupBox(title)
@@ -47,7 +62,6 @@ def __init__(self):
4762
self.layout.addWidget(group, stretch)
4863
group.toggled.connect(partial(self.toggle_group, group))
4964

50-
# TTS
5165
ttsSettings = TTSSettingsTab()
5266
ttsGroup = QGroupBox("Text to Speech")
5367
ttsLayout = QVBoxLayout()
@@ -56,10 +70,9 @@ def __init__(self):
5670
ttsGroup.setCheckable(True)
5771
ttsGroup.setChecked(True)
5872
self.layout.addWidget(ttsGroup, 3)
59-
self.groups[ttsGroup] = 2
73+
self.groups[ttsGroup] = 3
6074
ttsGroup.toggled.connect(partial(self.toggle_tts_group, ttsSettings))
6175

62-
# VisionSettingsTab - handled separately
6376
visionSettings = VisionSettingsTab()
6477
visionGroup = QGroupBox("Vision Models")
6578
visionLayout = QVBoxLayout()
@@ -68,7 +81,7 @@ def __init__(self):
6881
visionGroup.setCheckable(True)
6982
visionGroup.setChecked(True)
7083
self.layout.addWidget(visionGroup, 2)
71-
self.groups[visionGroup] = 1
84+
self.groups[visionGroup] = 2
7285
visionGroup.toggled.connect(partial(self.toggle_vision_group, visionSettings))
7386

7487
self.update_all_button = QPushButton("Update Settings")
@@ -79,6 +92,7 @@ def __init__(self):
7992
center_button_layout.addWidget(self.update_all_button)
8093
center_button_layout.addStretch(1)
8194
self.layout.addLayout(center_button_layout)
95+
8296
self.setLayout(self.layout)
8397
adjust_stretch(self.groups, self.layout)
8498

src/gui_tabs_settings_database_create.py

Lines changed: 61 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -4,98 +4,98 @@
44

55
from constants import TOOLTIPS
66

7+
78
class ChunkSettingsTab(QWidget):
89
def __init__(self):
910
super(ChunkSettingsTab, self).__init__()
10-
with open('config.yaml', 'r', encoding='utf-8') as f:
11+
with open("config.yaml", "r", encoding="utf-8") as f:
1112
config_data = yaml.safe_load(f)
12-
self.database_config = config_data['database']
13-
self.compute_device_options = config_data['Compute_Device']['available']
14-
self.database_creation_device = config_data['Compute_Device']['database_creation']
15-
13+
self.database_config = config_data["database"]
14+
self.compute_device_options = config_data["Compute_Device"]["available"]
15+
self.database_creation_device = config_data["Compute_Device"]["database_creation"]
16+
1617
grid_layout = QGridLayout()
17-
18+
1819
# Device selection and current setting
1920
self.device_label = QLabel("Device:")
2021
self.device_label.setToolTip(TOOLTIPS["CREATE_DEVICE_DB"])
2122
grid_layout.addWidget(self.device_label, 0, 0)
22-
23+
2324
self.device_combo = QComboBox()
2425
self.device_combo.addItems(self.compute_device_options)
2526
self.device_combo.setToolTip(TOOLTIPS["CREATE_DEVICE_DB"])
2627
if self.database_creation_device in self.compute_device_options:
27-
self.device_combo.setCurrentIndex(self.compute_device_options.index(self.database_creation_device))
28+
self.device_combo.setCurrentIndex(
29+
self.compute_device_options.index(self.database_creation_device)
30+
)
2831
self.device_combo.setMinimumWidth(100)
2932
grid_layout.addWidget(self.device_combo, 0, 2)
30-
33+
3134
self.current_device_label = QLabel(f"{self.database_creation_device}")
3235
self.current_device_label.setToolTip(TOOLTIPS["CREATE_DEVICE_DB"])
3336
grid_layout.addWidget(self.current_device_label, 0, 1)
34-
37+
3538
# Chunk size and current setting
3639
self.chunk_size_label = QLabel("Chunk Size (# characters):")
3740
self.chunk_size_label.setToolTip(TOOLTIPS["CHUNK_SIZE"])
3841
grid_layout.addWidget(self.chunk_size_label, 0, 3)
39-
42+
4043
self.chunk_size_edit = QLineEdit()
4144
self.chunk_size_edit.setPlaceholderText("Enter new chunk_size...")
4245
self.chunk_size_edit.setValidator(QIntValidator(1, 1000000))
4346
self.chunk_size_edit.setToolTip(TOOLTIPS["CHUNK_SIZE"])
4447
grid_layout.addWidget(self.chunk_size_edit, 0, 5)
45-
46-
current_size = self.database_config.get('chunk_size', '')
48+
49+
current_size = self.database_config.get("chunk_size", "")
4750
self.current_size_label = QLabel(f"{current_size}")
4851
self.current_size_label.setToolTip(TOOLTIPS["CHUNK_SIZE"])
4952
grid_layout.addWidget(self.current_size_label, 0, 4)
50-
53+
5154
# Chunk overlap and current setting
5255
self.chunk_overlap_label = QLabel("Overlap (# characters):")
5356
self.chunk_overlap_label.setToolTip(TOOLTIPS["CHUNK_OVERLAP"])
5457
grid_layout.addWidget(self.chunk_overlap_label, 0, 6)
55-
58+
5659
self.chunk_overlap_edit = QLineEdit()
5760
self.chunk_overlap_edit.setPlaceholderText("Enter new chunk_overlap...")
5861
self.chunk_overlap_edit.setValidator(QIntValidator(0, 1000000))
5962
self.chunk_overlap_edit.setToolTip(TOOLTIPS["CHUNK_OVERLAP"])
6063
grid_layout.addWidget(self.chunk_overlap_edit, 0, 8)
61-
62-
current_overlap = self.database_config.get('chunk_overlap', '')
64+
65+
current_overlap = self.database_config.get("chunk_overlap", "")
6366
self.current_overlap_label = QLabel(f"{current_overlap}")
6467
self.current_overlap_label.setToolTip(TOOLTIPS["CHUNK_OVERLAP"])
6568
grid_layout.addWidget(self.current_overlap_label, 0, 7)
66-
69+
6770
# "Half-Precision" checkbox
6871
self.half_precision_label = QLabel("Half-Precision (2x speedup - GPU only):")
6972
self.half_precision_label.setToolTip(TOOLTIPS["HALF_PRECISION"])
7073
grid_layout.addWidget(self.half_precision_label, 1, 0, 1, 3)
71-
74+
7275
self.half_precision_checkbox = QCheckBox()
73-
self.half_precision_checkbox.setChecked(self.database_config.get('half', False))
76+
self.half_precision_checkbox.setChecked(self.database_config.get("half", False))
7477
self.half_precision_checkbox.setToolTip(TOOLTIPS["HALF_PRECISION"])
7578
grid_layout.addWidget(self.half_precision_checkbox, 1, 3)
76-
79+
7780
self.setLayout(grid_layout)
7881

7982
def update_config(self):
8083
try:
81-
with open('config.yaml', 'r', encoding='utf-8') as f:
84+
with open("config.yaml", "r", encoding="utf-8") as f:
8285
config_data = yaml.safe_load(f)
8386
except Exception as e:
8487
QMessageBox.critical(
85-
self,
86-
"Error Loading Configuration",
87-
f"An error occurred while loading the configuration: {e}"
88+
self,
89+
"Error Loading Configuration",
90+
f"An error occurred while loading the configuration: {e}",
8891
)
8992
return False
90-
93+
9194
settings_changed = False
9295
errors = []
9396

9497
new_device = self.device_combo.currentText()
95-
if new_device != self.database_creation_device:
96-
device_changed = True
97-
else:
98-
device_changed = False
98+
device_changed = new_device != self.database_creation_device
9999

100100
new_chunk_size_text = self.chunk_size_edit.text().strip()
101101
if new_chunk_size_text:
@@ -106,7 +106,7 @@ def update_config(self):
106106
except ValueError as ve:
107107
errors.append(f"Chunk size must be a positive integer: {str(ve)}")
108108
else:
109-
new_chunk_size = self.database_config.get('chunk_size', 0)
109+
new_chunk_size = self.database_config.get("chunk_size", 0)
110110

111111
new_chunk_overlap_text = self.chunk_overlap_edit.text().strip()
112112
if new_chunk_overlap_text:
@@ -115,60 +115,70 @@ def update_config(self):
115115
if new_chunk_overlap < 0:
116116
raise ValueError("Chunk overlap cannot be negative.")
117117
except ValueError as ve:
118-
errors.append(f"Chunk overlap must be a non-negative integer: {str(ve)}")
118+
errors.append(
119+
f"Chunk overlap must be a non-negative integer: {str(ve)}"
120+
)
119121
else:
120-
new_chunk_overlap = self.database_config.get('chunk_overlap', 0)
122+
new_chunk_overlap = self.database_config.get("chunk_overlap", 0)
121123

122124
if new_chunk_size and new_chunk_overlap >= new_chunk_size:
123125
errors.append("Chunk overlap must be less than chunk size.")
124126

125127
if errors:
126128
error_message = "\n".join(errors)
127129
QMessageBox.warning(
128-
self,
129-
"Invalid Input",
130-
f"The following errors occurred:\n{error_message}"
130+
self, "Invalid Input", f"The following errors occurred:\n{error_message}"
131131
)
132132
return False
133133

134134
if device_changed:
135-
config_data['Compute_Device']['database_creation'] = new_device
135+
config_data["Compute_Device"]["database_creation"] = new_device
136136
self.database_creation_device = new_device
137137
self.current_device_label.setText(f"{new_device}")
138138
settings_changed = True
139139

140-
if new_chunk_size_text and new_chunk_size != self.database_config.get('chunk_size', 0):
141-
config_data['database']['chunk_size'] = new_chunk_size
140+
if new_chunk_size_text and new_chunk_size != self.database_config.get(
141+
"chunk_size", 0
142+
):
143+
config_data["database"]["chunk_size"] = new_chunk_size
142144
self.current_size_label.setText(f"{new_chunk_size}")
143145
settings_changed = True
144146

145-
if new_chunk_overlap_text and new_chunk_overlap != self.database_config.get('chunk_overlap', 0):
146-
config_data['database']['chunk_overlap'] = new_chunk_overlap
147+
if new_chunk_overlap_text and new_chunk_overlap != self.database_config.get(
148+
"chunk_overlap", 0
149+
):
150+
config_data["database"]["chunk_overlap"] = new_chunk_overlap
147151
self.current_overlap_label.setText(f"{new_chunk_overlap}")
148152
settings_changed = True
149153

150154
new_half_precision = self.half_precision_checkbox.isChecked()
151-
if new_half_precision != self.database_config.get('half', False):
152-
config_data['database']['half'] = new_half_precision
153-
self.database_config['half'] = new_half_precision
155+
if new_half_precision != self.database_config.get("half", False):
156+
config_data["database"]["half"] = new_half_precision
154157
settings_changed = True
155158

156159
if settings_changed:
157160
try:
158-
with open('config.yaml', 'w', encoding='utf-8') as f:
161+
with open("config.yaml", "w", encoding="utf-8") as f:
159162
yaml.safe_dump(config_data, f)
160-
163+
164+
self.database_config["chunk_size"] = config_data["database"]["chunk_size"]
165+
self.database_config["chunk_overlap"] = config_data["database"]["chunk_overlap"]
166+
self.database_config["half"] = config_data["database"]["half"]
167+
168+
self.database_creation_device = config_data["Compute_Device"][
169+
"database_creation"
170+
]
171+
161172
self.chunk_overlap_edit.clear()
162173
self.chunk_size_edit.clear()
163174
except Exception as e:
164175
QMessageBox.critical(
165-
self,
166-
"Error Saving Configuration",
167-
f"An error occurred while saving the configuration: {e}"
176+
self,
177+
"Error Saving Configuration",
178+
f"An error occurred while saving the configuration: {e}",
168179
)
169180
return False
170-
171181
else:
172182
return False
173183

174-
return settings_changed
184+
return settings_changed

0 commit comments

Comments
 (0)