thiswillbeyourgithub
diff --git a/‎DocToolsLLM/DocToolsLLM.py‎
Lines changed: 130 additions & 79 deletions b/‎DocToolsLLM/DocToolsLLM.py‎
Lines changed: 130 additions & 79 deletions
diff --git a/‎DocToolsLLM/__init__.py‎
Lines changed: 52 additions & 32 deletions b/‎DocToolsLLM/__init__.py‎
Lines changed: 52 additions & 32 deletions
diff --git a/‎DocToolsLLM/docs/USAGE.md‎
Lines changed: 12 additions & 11 deletions b/‎DocToolsLLM/docs/USAGE.md‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎DocToolsLLM/docs/json_list_example.txt‎ renamed to ‎DocToolsLLM/docs/json_entries_example.txt‎
Lines changed: 1 addition & 1 deletion b/‎DocToolsLLM/docs/json_list_example.txt‎ renamed to ‎DocToolsLLM/docs/json_entries_example.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎DocToolsLLM/utils/__init__.py‎
Lines changed: 8 additions & 0 deletions b/‎DocToolsLLM/utils/__init__.py‎
Lines changed: 8 additions & 0 deletions
@@ -8,53 +8,73 @@
 from rich.markdown import Markdown
 from rich.console import Console
 
-from .DocToolsLLM import DocToolsLLM_class
+from .DocToolsLLM import DocToolsLLM_class as DocToolsLLM
+
+__all__ = [
+    "DocToolsLLM",
+    "cli_launcher",
+    "utils",
+]
+
+__VERSION__ = DocToolsLLM.VERSION
 
 
 def fire_wrapper(
-    h: bool = False,
-    help: bool = False,
     *args,
     **kwargs,
-    ) -> Tuple[List, dict]:
+    ) -> dict:
     "used to catch --help arg to display it better then fire does on its own"
-    assert "h" not in args and "h" not in kwargs
-    assert "help" not in args and "help" not in kwargs
-
-    if (h in ["h", "help", True] or help in ["h", "help", True]):  # --help or similar intentions
-        return [], {"help": True}
 
-    if not (h or help or args or kwargs):
-        return [], {"help": True}
+    # --help or similar
+    if ("help" in args and len(args) == 1) or ("help" in kwargs and kwargs["help"]):
+        print("Showing help")
+        md = Markdown(DocToolsLLM.__doc__)
+        console = Console()
+        console.print(md, style=None)
+        raise SystemExit()
 
-    # parse args as if nothing happened
-    args = list(args)
-    if h:
-        args.insert(0, h)
-    if help:
-        args.insert(1, help)
+    # no args given
+    if not any([args, kwargs]):
+        print("Empty arguments, showing help")
+        md = Markdown(DocToolsLLM.__doc__)
+        console = Console()
+        console.print(md, style=None)
+        raise SystemExit()
 
     # while we're at it, make it so that
     # "DocToolsLLM summary" is parsed like "DocToolsLLM --task=summary"
+    args = list(args)
     if args and isinstance(args[0], str):
-        args[0] = args[0].replace("summary", "summarize")
-        if args[0] in ["query", "search", "summarize", "summarize_then_query"]:
-            assert "task" not in kwargs, f"Tried to give task as arg and kwarg?\nargs: {args}\bnkwargs: {kwargs}"
-            kwargs["task"] = args.pop(0)
-    return args, kwargs
+        if args[0].replace("summary", "summarize") in ["query", "search", "summarize", "summarize_then_query"]:
+            assert "task" not in kwargs or not kwargs["task"], f"Tried to give task as arg and kwarg?\n- args: {args}\n- kwargs: {kwargs}"
+            kwargs["task"] = args.pop(0).replace("summary", "summarize")
+
+    # prepare the parsing of --query
+    if "query" not in kwargs:
+        kwargs["query"] = None
+    if kwargs["query"] in [True, None, False]:
+        kwargs["query"] = ""
+    else:
+        kwargs["query"] = str(kwargs["query"])
+
+    # any remaining args is put in --query
+    if args:
+        if not kwargs["query"]:
+            kwargs["query"] = " ".join(map(str, args))
+        else:
+            kwargs["query"] += " " + " ".join(map(str, args))
+        args = []
+
+    kwargs["query"] = kwargs["query"].replace("summary", "summarize")
+
+    assert not args
+    return kwargs
 
 
 def cli_launcher() -> None:
     sys_args = sys.argv
     if "--completion" in sys_args:
-        return fire.Fire(DocToolsLLM_class)
+        return fire.Fire(DocToolsLLM)
 
-    args, kwargs = fire.Fire(fire_wrapper)
-
-    if "help" in kwargs:
-        md = Markdown(DocToolsLLM_class.__doc__)
-        console = Console()
-        console.print(md, style=None)
-        raise SystemExit()
-    else:
-        instance = DocToolsLLM_class(*args, **kwargs)
+    kwargs = fire.Fire(fire_wrapper)
+    instance = DocToolsLLM(**kwargs)
@@ -9,9 +9,9 @@
 
 * `--filetype`: str, default `infer`
     * the type of input. Depending on the value, different other parameters
-    are needed. If json_list is used, the line of the input file can contain
+    are needed. If json_entries is used, the line of the input file can contain
     any of those parameters as long as they are as json. You can find
-    an example of json_list file in `DocToolsLLM/docs/json_list_example.txt`
+    an example of json_entries file in `DocToolsLLM/docs/json_entries_example.txt`
 
     * Supported values:
         * `infer`: will guess the appropriate filetype based on `--path`.
@@ -28,10 +28,10 @@
         you must type or paste the string
         * `local_audio`: must be set: `--whisper_prompt`, `--whisper_lang`. The model used will be `whisper-1`
 
-        * `json_list`: `--path` is path to a txt file that contains a json
+        * `json_entries`: `--path` is path to a txt file that contains a json
         for each line containing at least a filetype and a path key/value
         but can contain any parameters described here
-        * `recursive`: `--path` is the starting path `--pattern` is the globbing
+        * `recursive_paths`: `--path` is the starting path `--pattern` is the globbing
         patterns to append `--exclude` and `--include` can be a list of regex
         applying to found paths (include is run first then exclude, if the
         pattern is only lowercase it will be case insensitive) `--recursed_filetype`
@@ -47,7 +47,7 @@
 
 * `--modelname`: str, default `"openai/gpt-4o"`
     * Keep in mind that given that the default backend used is litellm
-    the part of modelname before the slash (/) is the server name.
+    the part of modelname before the slash (/) is the backend name (also called provider).
     If the backend is 'testing/' then a fake LLM will be used
     for debugging purposes.
     If the value is not part of the model list of litellm, will use
@@ -94,7 +94,7 @@
 ---
 
 * `--query`: str, default `None`
-    * if str, will be directly used for the first query if task in `["query", "search"]`
+    * if str, will be directly used for the first query if task in `["query", "search", "summarize_then_query"]`
 
 * `--query_retrievers`: str, default `"default"`
     * must be a string that specifies which retriever will be used for
@@ -164,6 +164,7 @@
 * `--debug`: bool, default `False`
     * if True will enable langchain tracing, increase verbosity,
     disable multithreading for summaries and loading files,
+    crash if an error is encountered when loading a file,
     automatically trigger the debugger on exceptions.
 
 * `--dollar_limit`: int, default `5`
@@ -182,7 +183,7 @@
     * if True, will remember the messages across a given chat exchange.
     Disabled if using a testing model.
 
-* `--no_llm_cache`: bool, default `False`
+* `--disable_llm_cache`: bool, default `False`
     * WARNING: The cache is temporarily ignored in non openaillms
     generations because of an error with langchain's ChatLiteLLM.
     Basically if you don't use `--private` and use llm form openai,
@@ -227,7 +228,7 @@
 
 # Loader specific arguments
     Those arguments can be set at cli time but can also be used
-    when using recursive filetype combination to have arguments specific
+    when using recursive_paths filetype combination to have arguments specific
     to a loader. They apply depending on the value of `--filetype`.
     An unexpected argument for a given filetype will result in a crash.
 
@@ -293,8 +294,8 @@
     the audio from the youtube link, and deepgram will be used to turn the audio into text. `--deepgram_kwargs` will be used if set.
 
 * `--include`: str
-    * Only active if `--filetype` is one of json_list, recursive,
-    link_file, youtube_playlist.
+    * Only active if `--filetype` is one of 'json_entries', 'recursive_paths',
+    'link_file', 'youtube_playlist'.
     `--include` can be a list of regex that must be present in the
     document PATH (not content!)
     `--exclude` can be a list of regex that if present in the PATH
@@ -387,7 +388,7 @@
 * `--loading_failure`: str, default `crash`
     * either `crash` or `warn`. Determines what to do with
     exceptions happening when loading a document. This can be set
-    per document if a recursive filetype is used.
+    per document if a recursive_paths filetype is used.
 
 # Runtime flags
 
 
@@ -6,7 +6,7 @@
 
 
 # this will fetch all the pdf recursively inside a dir according to some parameters
-{"path": "../some/path/to/parent/", "filetype": "recursive", "recursed_filetype": "pdf", "pattern": "*pdf", "exclude": ["regex_to_exclude"], "include": ["regex_that_need_to_be_present"]}
+{"path": "../some/path/to/parent/", "filetype": "recursive_paths", "recursed_filetype": "pdf", "pattern": "*pdf", "exclude": ["regex_to_exclude"], "include": ["regex_that_need_to_be_present"]}
 
 
 # anki deck example
 
@@ -0,0 +1,8 @@
+from DocToolsLLM.utils import *
+
+__all__ = [
+    'batch_file_loader',
+    'loaders',
+    'misc',
+    'prompts',
+]