Add both R1 and v3

DBlankvoort · DBlankvoort · commit 2552674437f6 · 2025-01-28T15:46:28.000+01:00
diff --git a/deepseek-R1.yaml b/deepseek-R1.yaml
@@ -0,0 +1,100 @@
+---
+# Thank you for contributing!
+# In filling out this yaml file, please follow the criteria as described here: 
+# https://github.com/opening-up-chatgpt/opening-up-chatgpt.github.io/tree/main/projects#criteria
+
+# You're free to build on this work and reuse the data. It is licensed under CC-BY 4.0, with the
+# stipulation that attribution should come in the form of a link to http://opening-up-chatgpt.github.io
+# and a citation to the paper in which the initial dataset & criteria were published:
+
+# Liesenfeld, Andreas, Alianda Lopez, and Mark Dingemanse. 2023. “Opening up ChatGPT: Tracking Openness, Transparency, and Accountability in Instruction-Tuned Text Generators.” In CUI '23: Proceedings of the 5th International Conference on Conversational User Interfaces. July 19-21, Eindhoven. doi: 10.1145/3571884.3604316 
+
+system:
+    name: DeepSeek R1
+    link: https://github.com/deepseek-ai/DeepSeek-R1
+    type: text 
+    performanceclass: 
+    basemodelname: DeepSeek-V3-Base
+    endmodelname: DeepSeek-R1
+    endmodellicense: DeepSeek License Agreement
+    releasedate: 2025-01
+    notes: 
+
+org:
+    name: DeepSeek 
+    link: https://www.deepseek.com/
+    notes: "DeepSeek (深度求索), founded in 2023, is a Chinese company dedicated to making AGI a reality."
+
+# availability:
+datasources_basemodel:
+    class: closed
+    link:
+    notes: Pretraining data not specified or documented besides the claim that it amounts to 14.8T tokens in Chinese and English. Proprietary dataset.
+
+datasources_endmodel:
+    class: closed
+    link: 
+    notes: Post-training data not specified or documented. Said to include '1.5M instances'.
+
+weights_basemodel:
+    class: open
+    link: https://huggingface.co/deepseek-ai/DeepSeek-V3-Base
+    notes: 
+
+weights_endmodel:
+    class: open
+    link: https://huggingface.co/deepseek-ai/DeepSeek-R1
+    notes: 
+
+trainingcode:
+    class: closed
+    link: https://github.com/deepseek-ai/DeepSeek-R1
+    notes: Repository does not contain code.
+
+# documentation:
+code:
+    class: closed
+    link: https://github.com/deepseek-ai/DeepSeek-V3/
+    notes: No code, so no documentation.
+
+architecture:
+    class: partial
+    link:  https://arxiv.org/pdf/2412.19437
+    notes: Model architecture described in a fair bit of detail in base model technical report.
+
+preprint:
+    class: open
+    link: https://arxiv.org/pdf/2501.12948
+    notes: Paper provides information about techniques used for constructing model, but nothing on pre- or post-training data.
+
+paper:
+    class: closed
+    link:
+    notes: No peer-reviewed paper found.
+
+modelcard:
+    class: partial
+    link: https://huggingface.co/deepseek-ai/DeepSeek-V3-Base
+    notes: Model card on HF provides basic summary but no info on intended downstream uses, scope, biases, risks and limitations.
+
+datasheet:
+    class: closed
+    link:
+    notes: No datasheet provided.
+
+# access:
+package:
+    class: partial
+    link:
+    notes: No specific package provided by the authors but integrates well with many widely used packages.
+
+api:
+    class: open
+    link: platform.deepseek.com
+    notes: Available through various APIs.
+    metaprompt: closed
+
+licenses:
+    class: partial
+    link: https://huggingface.co/deepseek-ai/DeepSeek-V3-Base/blob/main/LICENSE-MODEL
+    notes: MIT License and a separate Model License. Derived model (R1) claims to be fully licensed through MIT.
diff --git a/deepseek-v3.yaml b/deepseek-v3.yaml
@@ -17,7 +17,7 @@ system:
     basemodelname: DeepSeek-V3-Base
     endmodelname: DeepSeek-V3
     endmodellicense: DeepSeek License Agreement
-    releasedate: 2025-01
+    releasedate: 2024-12
     notes: 
 
 org:
@@ -44,12 +44,12 @@ weights_basemodel:
 weights_endmodel:
     class: open
     link: https://huggingface.co/deepseek-ai/DeepSeek-V3
-    notes: Also available in smaller model sizes
+    notes: 
 
 trainingcode:
     class: partial
     link: https://github.com/deepseek-ai/DeepSeek-V3/tree/main/inference
-    notes: Repository provides sparse source code and some examples for inference
+    notes: Repository provides sparse source code and some examples for inference.
 
 # documentation:
 code:
@@ -59,12 +59,12 @@ code:
 
 architecture:
     class: partial
-    link:  https://github.com/deepseek-ai/DeepSeek-V3/blob/main/DeepSeek_V3.pdf
-    notes: Model architecture described in a fair bit of detail in technical report
+    link:  https://arxiv.org/pdf/2412.19437
+    notes: Model architecture described in a fair bit of detail in technical report.
 
 preprint:
     class: open
-    link: https://github.com/deepseek-ai/DeepSeek-V3/blob/main/DeepSeek_V3.pdf
+    link: https://arxiv.org/pdf/2412.19437
     notes: Corporate technical report provides info on infrastructure compute, performance, but nothing on pre- or post-training data.
 
 paper:
@@ -75,7 +75,7 @@ paper:
 modelcard:
     class: partial
     link: https://huggingface.co/deepseek-ai/DeepSeek-V3-Base
-    notes: Model card on HF provides basic summary but no info on intended downstream uses, scope, biases, risks and limitations
+    notes: Model card on HF provides basic summary but no info on intended downstream uses, scope, biases, risks and limitations.
 
 datasheet:
     class: closed
@@ -86,15 +86,15 @@ datasheet:
 package:
     class: partial
     link:
-    notes: No specific package provided by the authors but integrates well with many widely used packages
+    notes: No specific package provided by the authors but integrates well with many widely used packages.
 
 api:
     class: open
     link: platform.deepseek.com
-    notes: Available through various APIs
+    notes: Available through various APIs.
     metaprompt: closed
 
 licenses:
     class: partial
     link: https://huggingface.co/deepseek-ai/DeepSeek-V3-Base/blob/main/LICENSE-MODEL
-    notes: MIT License and a separate Model License
+    notes: MIT License and a separate Model License.