Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/khoj/database/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,7 @@ class EntrySource(models.TextChoices):
COMPUTER = "computer"
NOTION = "notion"
GITHUB = "github"
OBSIDIAN = "obsidian"

user = models.ForeignKey(KhojUser, on_delete=models.CASCADE, default=None, null=True, blank=True)
agent = models.ForeignKey(Agent, on_delete=models.CASCADE, default=None, null=True, blank=True)
Expand Down
6 changes: 4 additions & 2 deletions src/khoj/processor/content/docx/docx_to_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ def __init__(self):
super().__init__()

# Define Functions
def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
def process(
self, files: dict[str, str], user: KhojUser, regenerate: bool = False, file_source: str = None
) -> Tuple[int, int]:
# Extract required fields from config
deletion_file_names = set([file for file in files if files[file] == b""])
files_to_process = set(files) - deletion_file_names
Expand All @@ -38,7 +40,7 @@ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = Fals
user,
current_entries,
DbEntry.EntryType.DOCX,
DbEntry.EntrySource.COMPUTER,
file_source or DbEntry.EntrySource.COMPUTER,
"compiled",
logger,
deletion_file_names,
Expand Down
6 changes: 4 additions & 2 deletions src/khoj/processor/content/images/image_to_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ def __init__(self):
super().__init__()

# Define Functions
def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
def process(
self, files: dict[str, str], user: KhojUser, regenerate: bool = False, file_source: str = None
) -> Tuple[int, int]:
# Extract required fields from config
deletion_file_names = set([file for file in files if files[file] == b""])
files_to_process = set(files) - deletion_file_names
Expand All @@ -37,7 +39,7 @@ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = Fals
user,
current_entries,
DbEntry.EntryType.IMAGE,
DbEntry.EntrySource.COMPUTER,
file_source or DbEntry.EntrySource.COMPUTER,
"compiled",
logger,
deletion_file_names,
Expand Down
6 changes: 4 additions & 2 deletions src/khoj/processor/content/markdown/markdown_to_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ def __init__(self):
super().__init__()

# Define Functions
def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
def process(
self, files: dict[str, str], user: KhojUser, regenerate: bool = False, file_source: str = None
) -> Tuple[int, int]:
# Extract required fields from config
deletion_file_names = set([file for file in files if files[file] == ""])
files_to_process = set(files) - deletion_file_names
Expand All @@ -39,7 +41,7 @@ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = Fals
user,
current_entries,
DbEntry.EntryType.MARKDOWN,
DbEntry.EntrySource.COMPUTER,
file_source or DbEntry.EntrySource.COMPUTER,
"compiled",
logger,
deletion_file_names,
Expand Down
6 changes: 4 additions & 2 deletions src/khoj/processor/content/org_mode/org_to_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ def __init__(self):
super().__init__()

# Define Functions
def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
def process(
self, files: dict[str, str], user: KhojUser, regenerate: bool = False, file_source: str = None
) -> Tuple[int, int]:
deletion_file_names = set([file for file in files if files[file] == ""])
files_to_process = set(files) - deletion_file_names
files = {file: files[file] for file in files_to_process}
Expand All @@ -38,7 +40,7 @@ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = Fals
user,
current_entries,
DbEntry.EntryType.ORG,
DbEntry.EntrySource.COMPUTER,
file_source or DbEntry.EntrySource.COMPUTER,
"compiled",
logger,
deletion_file_names,
Expand Down
6 changes: 4 additions & 2 deletions src/khoj/processor/content/pdf/pdf_to_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ def __init__(self):
super().__init__()

# Define Functions
def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
def process(
self, files: dict[str, str], user: KhojUser, regenerate: bool = False, file_source: str = None
) -> Tuple[int, int]:
# Extract required fields from config
deletion_file_names = set([file for file in files if files[file] == b""])
files_to_process = set(files) - deletion_file_names
Expand All @@ -41,7 +43,7 @@ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = Fals
user,
current_entries,
DbEntry.EntryType.PDF,
DbEntry.EntrySource.COMPUTER,
file_source or DbEntry.EntrySource.COMPUTER,
"compiled",
logger,
deletion_file_names,
Expand Down
6 changes: 4 additions & 2 deletions src/khoj/processor/content/plaintext/plaintext_to_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ def __init__(self):
super().__init__()

# Define Functions
def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
def process(
self, files: dict[str, str], user: KhojUser, regenerate: bool = False, file_source: str = None
) -> Tuple[int, int]:
deletion_file_names = set([file for file in files if files[file] == ""])
files_to_process = set(files) - deletion_file_names
files = {file: files[file] for file in files_to_process}
Expand All @@ -38,7 +40,7 @@ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = Fals
user,
current_entries,
DbEntry.EntryType.PLAINTEXT,
DbEntry.EntrySource.COMPUTER,
file_source or DbEntry.EntrySource.COMPUTER,
key="compiled",
logger=logger,
deletion_filenames=deletion_file_names,
Expand Down
4 changes: 3 additions & 1 deletion src/khoj/processor/content/text_to_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ def __init__(self, config: Any = None):
self.date_filter = DateFilter()

@abstractmethod
def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]: ...
def process(
self, files: dict[str, str], user: KhojUser, regenerate: bool = False, file_source: str = None
) -> Tuple[int, int]: ...

@staticmethod
def hash_func(key: str) -> Callable:
Expand Down
6 changes: 6 additions & 0 deletions src/khoj/routers/api_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,11 @@ async def indexer(
docx=index_files["docx"],
)

# Determine file_source based on client type
file_source = DbEntry.EntrySource.COMPUTER
if client and client.lower() == "obsidian":
file_source = DbEntry.EntrySource.OBSIDIAN

loop = asyncio.get_event_loop()
success = await loop.run_in_executor(
None,
Expand All @@ -589,6 +594,7 @@ async def indexer(
indexer_input.model_dump(),
regenerate,
t,
file_source,
)
if not success:
raise RuntimeError(f"Failed to {method} {t} data sent by {client} client into content index")
Expand Down
7 changes: 7 additions & 0 deletions src/khoj/routers/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3003,6 +3003,7 @@ def configure_content(
files: Optional[dict[str, dict[str, str]]],
regenerate: bool = False,
t: Optional[state.SearchType] = state.SearchType.All,
file_source: Optional[str] = None,
) -> bool:
success = True
if t is None:
Expand Down Expand Up @@ -3036,6 +3037,7 @@ def configure_content(
files.get("org"),
regenerate=regenerate,
user=user,
file_source=file_source,
)
except Exception as e:
logger.error(f"🚨 Failed to setup org: {e}", exc_info=True)
Expand All @@ -3053,6 +3055,7 @@ def configure_content(
files.get("markdown"),
regenerate=regenerate,
user=user,
file_source=file_source,
)

except Exception as e:
Expand All @@ -3071,6 +3074,7 @@ def configure_content(
files.get("pdf"),
regenerate=regenerate,
user=user,
file_source=file_source,
)

except Exception as e:
Expand All @@ -3089,6 +3093,7 @@ def configure_content(
files.get("plaintext"),
regenerate=regenerate,
user=user,
file_source=file_source,
)

except Exception as e:
Expand Down Expand Up @@ -3149,6 +3154,7 @@ def configure_content(
files.get("image"),
regenerate=regenerate,
user=user,
file_source=file_source,
)
except Exception as e:
logger.error(f"🚨 Failed to setup images: {e}", exc_info=True)
Expand All @@ -3161,6 +3167,7 @@ def configure_content(
files.get("docx"),
regenerate=regenerate,
user=user,
file_source=file_source,
)
except Exception as e:
logger.error(f"🚨 Failed to setup docx: {e}", exc_info=True)
Expand Down
5 changes: 3 additions & 2 deletions src/khoj/search_type/text_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,14 +212,15 @@ def setup(
regenerate: bool,
user: KhojUser,
config=None,
file_source: str = None,
) -> Tuple[int, int]:
if config:
num_new_embeddings, num_deleted_embeddings = text_to_entries(config).process(
files=files, user=user, regenerate=regenerate
files=files, user=user, regenerate=regenerate, file_source=file_source
)
else:
num_new_embeddings, num_deleted_embeddings = text_to_entries().process(
files=files, user=user, regenerate=regenerate
files=files, user=user, regenerate=regenerate, file_source=file_source
)

if files:
Expand Down