thiswillbeyourgithub
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎DocToolsLLM/utils/typechecker.py‎
Lines changed: 0 additions & 25 deletions b/‎DocToolsLLM/utils/typechecker.py‎
Lines changed: 0 additions & 25 deletions
diff --git a/‎README.md‎
Lines changed: 59 additions & 52 deletions b/‎README.md‎
Lines changed: 59 additions & 52 deletions
diff --git a/‎DocToolsLLM/DocToolsLLM.py‎ renamed to ‎WinstonDoc/WinstonDoc.py‎
Lines changed: 133 additions & 167 deletions b/‎DocToolsLLM/DocToolsLLM.py‎ renamed to ‎WinstonDoc/WinstonDoc.py‎
Lines changed: 133 additions & 167 deletions
diff --git a/‎DocToolsLLM/__init__.py‎ renamed to ‎WinstonDoc/__init__.py‎
Lines changed: 12 additions & 10 deletions b/‎DocToolsLLM/__init__.py‎ renamed to ‎WinstonDoc/__init__.py‎
Lines changed: 12 additions & 10 deletions
diff --git a/‎DocToolsLLM/__main__.py‎ renamed to ‎WinstonDoc/__main__.py‎
Lines changed: 1 addition & 1 deletion b/‎DocToolsLLM/__main__.py‎ renamed to ‎WinstonDoc/__main__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎DocToolsLLM/docs/USAGE.md‎ renamed to ‎WinstonDoc/docs/USAGE.md‎
Lines changed: 19 additions & 31 deletions b/‎DocToolsLLM/docs/USAGE.md‎ renamed to ‎WinstonDoc/docs/USAGE.md‎
Lines changed: 19 additions & 31 deletions
diff --git a/‎DocToolsLLM/docs/json_entries_example.txt‎ renamed to ‎WinstonDoc/docs/json_entries_example.json‎ b/‎DocToolsLLM/docs/json_entries_example.txt‎ renamed to ‎WinstonDoc/docs/json_entries_example.json‎
diff --git a/‎WinstonDoc/docs/toml_entries_example.toml‎
Lines changed: 16 additions & 0 deletions b/‎WinstonDoc/docs/toml_entries_example.toml‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎DocToolsLLM/utils/__init__.py‎ renamed to ‎WinstonDoc/utils/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎DocToolsLLM/utils/__init__.py‎ renamed to ‎WinstonDoc/utils/__init__.py‎
Lines changed: 1 addition & 1 deletion
@@ -7,6 +7,6 @@ author_dir
 pages
 .env
 **/__pycache__
-DocToolsLLM.egg-info
+*.egg-info
 build
 .aider*
@@ -5,15 +5,15 @@
 import sys
 import fire
 
-from .DocToolsLLM import DocToolsLLM_class as DocToolsLLM
+from .WinstonDoc import WinstonDoc
 
 __all__ = [
-    "DocToolsLLM",
+    "WinstonDoc",
     "cli_launcher",
     "utils",
 ]
 
-__VERSION__ = DocToolsLLM.VERSION
+__VERSION__ = WinstonDoc.VERSION
 
 
 def fire_wrapper(
@@ -25,17 +25,17 @@ def fire_wrapper(
     # --help but not catched by sys.argv
     if "help" in kwargs and kwargs["help"]:
         print("Showing help")
-        DocToolsLLM.md_printer(DocToolsLLM.__doc__)
+        WinstonDoc.md_printer(WinstonDoc.__doc__)
         raise SystemExit()
 
     # no args given
     if not any([args, kwargs]):
         print("Empty arguments, showing help")
-        DocToolsLLM.md_printer(DocToolsLLM.__doc__)
+        WinstonDoc.md_printer(WinstonDoc.__doc__)
         raise SystemExit()
 
     # while we're at it, make it so that
-    # "DocToolsLLM summary" is parsed like "DocToolsLLM --task=summary"
+    # "WinstonDoc summary" is parsed like "WinstonDoc --task=summary"
     args = list(args)
     if args and isinstance(args[0], str):
         if args[0].replace("summary", "summarize") in ["query", "search", "summarize", "summarize_then_query"]:
@@ -67,12 +67,14 @@ def fire_wrapper(
 
 def cli_launcher() -> None:
     sys_args = sys.argv
+    if "--version" in sys_args:
+        return __VERSION__
     if "--help" in sys_args:
         print("Showing help")
-        DocToolsLLM.md_printer(DocToolsLLM.__doc__)
+        WinstonDoc.md_printer(WinstonDoc.__doc__)
         raise SystemExit()
-    if "--completion" in sys_args:
-        return fire.Fire(DocToolsLLM)
+    if "--" in sys_args and "--completion" in sys_args:
+        return fire.Fire(WinstonDoc)
 
     kwargs = fire.Fire(fire_wrapper)
-    instance = DocToolsLLM(**kwargs)
+    instance = WinstonDoc(**kwargs)
@@ -1,5 +1,5 @@
 """
-Entry point used when DocToolsLLM is imported or called by 'python -m DocToolsLLM'.
+Entry point used when WinstonDoc is imported or called by 'python -m WinstonDoc'.
 Does the same as __init__.py
 """
 
 
@@ -7,19 +7,19 @@
         * `summarize`: means the input will be passed through a summarization prompt.
         * `summarize_then_query`: summarize the text then open the prompt to allow querying directly the source document.
 
-* `--filetype`: str, default `infer`
+* `--filetype`: str, default `auto`
     * the type of input. Depending on the value, different other parameters
     are needed. If json_entries is used, the line of the input file can contain
     any of those parameters as long as they are as json. You can find
-    an example of json_entries file in `DocToolsLLM/docs/json_entries_example.txt`
+    an example of json_entries file in `WinstonDoc/docs/json_entries_example.txt`
 
     * Supported values:
-        * `infer`: will guess the appropriate filetype based on `--path`.
+        * `auto`: will guess the appropriate filetype based on `--path`.
             Irrelevant for some filetypes, eg if `--filetype`=anki
         * `youtube`: `--path` must link to a youtube video
         * `youtube_playlist`: `--path` must link to a youtube playlist
         * `pdf`: `--path` is path to pdf
-        * `txt`: `--path` is path to txt
+        * `text`: `--path` is path to a .txt file
         * `url`: `--path` must be a valid http(s) link
         * `anki`: must be set: `--anki_profile`. Optional: `--anki_deck`,
         `--anki_notetype`, `--anki_template`, `--anki_tag_filter`.
@@ -35,7 +35,7 @@
         be downloaded. Possible arguments are `--onlinemedia_url_regex`,
         `--onlinemedia_resourcetype_regex`. Then arguments of `local_audio`.
 
-        * `json_entries`: `--path` is path to a txt file that contains a json
+        * `json_entries`: `--path` is path to a text file that contains a json
         for each line containing at least a filetype and a path key/value
         but can contain any parameters described here
         * `recursive_paths`: `--path` is the starting path `--pattern` is the globbing
@@ -118,14 +118,14 @@
     if contains `hyde` but modelname contains `testing` then `hyde` will
     be removed.
 
-* `--query_eval_modelname`: str, default `"openrouter/anthropic/claude-3.5-sonnet:beta"`
+* `--query_eval_modelname`: str, default `"openai/gpt4o-mini"`
     * Cheaper and quicker model than modelname. Used for intermediate
     steps in the RAG, not used in other tasks.
     If the value is not part of the model list of litellm, will use
     fuzzy matching to find the best match.
     None to disable.
 
-* `--query_eval_check_number`: int, default `1`
+* `--query_eval_check_number`: int, default `4`
     * number of pass to do with the eval llm to check if the document
     is indeed relevant to the question. The document will not
     be processed if all answers from the eval llm are 0, and will
@@ -137,13 +137,6 @@
     * threshold underwhich a document cannot be considered relevant by
     embeddings alone.
 
-* `--query_condense_question`: bool, default `True`
-    * if True, will not use a special LLM call to reformulate the question
-    when task is `query`. Otherwise, the query will be reformulated as
-    a standalone question. Useful when you have multiple questions in
-    a row.
-    Disabled if using a testing model.
-
 ---
 
 * `--summary_n_recursion`: int, default `1`
@@ -187,15 +180,11 @@
     can be used for example to send notification on your phone
     using ntfy.sh to get summaries.
 
-* `--memoryless`: bool, default `False`
-    * if False, will remember the messages across a given chat exchange.
-    Disabled if using a testing model.
-
 * `--disable_llm_cache`: bool, default `False`
     * WARNING: The cache is temporarily ignored in non openaillms
     generations because of an error with langchain's ChatLiteLLM.
     Basically if you don't use `--private` and use llm form openai,
-    DocToolsLLM will use ChatOpenAI with regular caching, otherwise
+    WinstonDoc will use ChatOpenAI with regular caching, otherwise
     we use ChatLiteLLM with LLM caching disabled.
     More at https://github.com/langchain-ai/langchain/issues/22389
 
@@ -243,7 +232,7 @@
     to a loader. They apply depending on the value of `--filetype`.
     An unexpected argument for a given filetype will result in a crash.
 
-* `--path`: str
+* `--path`: str or PosixPath
     * Used by most loaders. For example for `--filetype=youtube` the path
     must point to a youtube video.
 
@@ -311,14 +300,13 @@
     Either 'youtube', 'whisper' or 'deepgram'.
     Default is 'youtube'.
     * If 'youtube': will take the youtube transcripts as text content.
-    * If 'whisper': DocToolsLLM will download
+    * If 'whisper': WinstonDoc will download
     the audio from the youtube link, and whisper will be used to turn the audio into text. whisper_prompt and whisper_lang will be used if set.
     * If 'deepgram' will download
     the audio from the youtube link, and deepgram will be used to turn the audio into text. `--deepgram_kwargs` will be used if set.
 
 * `--include`: str
-    * Only active if `--filetype` is one of 'json_entries', 'recursive_paths',
-    'link_file', 'youtube_playlist'.
+    * Only active if `--filetype` is 'recursive_paths'
     `--include` can be a list of regex that must be present in the
     document PATH (not content!)
     `--exclude` can be a list of regex that if present in the PATH
@@ -329,10 +317,10 @@
 
 # Other specific arguments
 
-* `--out_file`: str, default `None`
-    * If doctools must create a summary, if out_file given the summary will
+* `--out_file`: str or PosixPath, default `None`
+    * If WinstonDoc must create a summary, if out_file given the summary will
     be written to this file. Note that the file is not erased and
-    Doctools will simply append to it.
+    WinstonDoc will simply append to it.
     * If `--summary_n_recursion` is used, additional files will be
     created with the name `{out_file}.n.md` with n being the n-1th recursive
     summary.
@@ -379,10 +367,10 @@
     each document instead of the metadata.
     Syntax: `[+-]your_regex`
     Example:
-    * Keep only the document that contain `doctools`
-        `--filter_content=+.*doctools.*`
-    * Discard the document that contain `DOCTOOLS`
-        `--filter_content=-.*DOCTOOLS.*`
+    * Keep only the document that contain `winstondoc`
+        `--filter_content=+.*winstondoc.*`
+    * Discard the document that contain `winstondoc`
+        `--filter_content=-.*winstondoc.*`
 
 * `--embed_instruct`: bool, default `None`
     * when loading an embedding model using HuggingFace or
@@ -436,7 +424,7 @@
 
 # Runtime flags
 
-* `DOCTOOLS_TYPECHECKING`
+* `WINSTONDOC_TYPECHECKING`
     * Setting for runtime type checking. Default value is `warn`.     * Possible values:
     The typing is checked using [beartype](https://beartype.readthedocs.io/en/latest/) so shouldn't slow down the runtime.
         * `disabled`: disable typechecking.
 
@@ -0,0 +1,16 @@
+[[this_will_fetch_all_the_pdf_recursively_inside_a_dir_according_to_some_parameters]]
+path = '../some/path/to/parent/'
+filetype = 'recursive_paths'
+recursed_filetype = 'pdf'
+pattern = '*pdf'
+exclude = ['regex_to_exclude']
+include = ['regex_that_need_to_be_present']
+
+[[anki_deck_example]]
+filetype = 'anki'
+anki_profile = 'name_of_your_anki_profile_for_ankipandas'
+anki_deck = 'personnal::paintings'
+anki_notetype = 'my_note_type'
+anki_template = '''
+Question:{question_field}
+Answer:{answer_field}'''
@@ -1,4 +1,4 @@
-from DocToolsLLM.utils import *
+from WinstonDoc.utils import *
 
 __all__ = [
     'batch_file_loader',
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-from DocToolsLLM.utils import *`
	`1`	`+from WinstonDoc.utils import *`
`2`	`2`
`3`	`3`	`__all__ = [`
`4`	`4`	`'batch_file_loader',`