add file upload guidance to fine-tune docs (#231)

saiatmakuri · web-flow · commit 84a9b7f38e33 · 2023-08-28T18:37:26.000-07:00
diff --git a/clients/python/llmengine/fine_tuning.py b/clients/python/llmengine/fine_tuning.py
@@ -107,14 +107,14 @@ def create(
             writer.writerows(data)
         ```
 
-        Currently, data needs to be uploaded to a publicly accessible web URL so that it can be read
-        for fine-tuning. Publicly accessible HTTP and HTTPS URLs are currently supported.
-        Support for privately sharing data with the LLM Engine API is coming shortly. For quick
-        iteration, you can look into tools like Pastebin or GitHub Gists to quickly host your CSV
-        files in a public manner. An example Github Gist can be found
-        [here](https://gist.github.com/tigss/7cec73251a37de72756a3b15eace9965). To use the gist,
-        you can use the URL given when you click the “Raw” button
-        ([URL](https://gist.githubusercontent.com/tigss/7cec73251a37de72756a3b15eace9965/raw/85d9742890e1e6b0c06468507292893b820c13c9/llm_sample_data.csv)).
+        Currently, data needs to be uploaded to either a publicly accessible web URL or to LLM Engine's
+        private file server so that it can be read for fine-tuning. Publicly accessible HTTP and HTTPS
+        URLs are currently supported.
+
+        To privately share data with the LLM Engine API, use LLM Engine's [File.upload](../../api/python_client/#llmengine.File.upload)
+        API. You can upload data in local file to LLM Engine's private file server and then use the
+        returned file ID to reference your data in the FineTune API. The file ID is generally in the
+        form of `file-<random_string>`, e.g. "file-7DLVeLdN2Ty4M2m".
 
         Example code for fine-tuning:
         === "Fine-tuning in Python"
@@ -123,7 +123,7 @@ def create(
 
             response = FineTune.create(
                 model="llama-2-7b",
-                training_file="https://my-bucket.s3.us-west-2.amazonaws.com/path/to/training-file.csv",
+                training_file="file-7DLVeLdN2Ty4M2m",
             )
 
             print(response.json())
diff --git a/docs/guides/fine_tuning.md b/docs/guides/fine_tuning.md
@@ -103,14 +103,18 @@ with open('customer_service_data.csv', 'w', newline='') as file:
 
 ## Making your data accessible to LLM Engine
 
-Currently, data needs to be uploaded to a publicly accessible web URL so that it can be read
-for fine-tuning. Publicly accessible HTTP and HTTPS URLs are currently supported.
-Support for privately sharing data with the LLM Engine API is coming shortly. For quick
-iteration, you can look into tools like Pastebin or GitHub Gists to quickly host your CSV
-files in a public manner. An example Github Gist can be found
-[here](https://gist.github.com/tigss/7cec73251a37de72756a3b15eace9965). To use the gist,
-you can use the URL given when you click the “Raw” button
-([URL](https://gist.githubusercontent.com/tigss/7cec73251a37de72756a3b15eace9965/raw/85d9742890e1e6b0c06468507292893b820c13c9/llm_sample_data.csv)).
+Currently, data needs to be uploaded to either a publicly accessible web URL or to LLM Engine's private file server so that it can be read for fine-tuning. Publicly accessible HTTP and HTTPS URLs are currently supported.
+
+To privately share data with the LLM Engine API, use LLM Engine's [File.upload](../../api/python_client/#llmengine.File.upload) API. You can upload data in local file to LLM Engine's private file server and then use the returned file ID to reference your data in the FineTune API. The file ID is generally in the form of `file-<random_string>`, e.g. "file-7DLVeLdN2Ty4M2m".
+
+=== "Upload to LLM Engine's private file server"
+
+```python
+from llmengine import File
+
+response = File.upload(open("customer_service_data.csv", "r"))
+print(response.json())
+```
 
 ## Launching the fine-tune
 
@@ -137,8 +141,8 @@ from llmengine import FineTune
 
 response = FineTune.create(
     model="llama-2-7b",
-    training_file="s3://my-bucket/path/to/training-file.csv",
-    validation_file="s3://my-bucket/path/to/validation-file.csv",
+    training_file="file-7DLVeLdN2Ty4M2m",
+    training_file="file-ezSRtpgKQyItI26",
 )
 
 print(response.json())