Merge branch 'concedo_experimental' into croco_nex

Nexesenex · Nexesenex · commit a4a1e0350227 · 2024-11-02T02:41:12.000+01:00
diff --git a/.github/workflows/kcpp-build-release-win-full-cu12.yaml b/.github/workflows/kcpp-build-release-win-full-cu12.yaml
@@ -33,6 +33,15 @@ jobs:
         run: |
           echo "$(Get-Location)\w64devkit\bin" | Out-File -Append -FilePath $env:GITHUB_PATH -Encoding utf8
 
+      - name: Print System Environment Variables
+        id: printvars
+        run: |
+          echo "Number of processors: $NUMBER_OF_PROCESSORS"
+          echo "Processor Architecture: $PROCESSOR_ARCHITECTURE"
+          echo "Computer Name: $COMPUTERNAME"
+          wmic cpu get name
+          wmic os get TotalVisibleMemorySize, FreePhysicalMemory
+
       - name: Build Non-CUDA
         id: make_build
         run: |
diff --git a/.github/workflows/kcpp-build-release-win-full.yaml b/.github/workflows/kcpp-build-release-win-full.yaml
@@ -33,6 +33,15 @@ jobs:
         run: |
           echo "$(Get-Location)\w64devkit\bin" | Out-File -Append -FilePath $env:GITHUB_PATH -Encoding utf8
 
+      - name: Print System Environment Variables
+        id: printvars
+        run: |
+          echo "Number of processors: $NUMBER_OF_PROCESSORS"
+          echo "Processor Architecture: $PROCESSOR_ARCHITECTURE"
+          echo "Computer Name: $COMPUTERNAME"
+          wmic cpu get name
+          wmic os get TotalVisibleMemorySize, FreePhysicalMemory
+
       - name: Build Non-CUDA
         id: make_build
         run: |
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
@@ -450,6 +450,15 @@ void ContextRewind(std::vector<int> &embd, std::vector<int> &current_context_tok
         last_n_tokens.resize(last_n_tokens.size() - amount_rewind);
     }
 
+    if(amount_rewind >= top_picks_history.size())
+    {
+        top_picks_history.clear();
+    }
+    else
+    {
+        top_picks_history.resize(top_picks_history.size() - amount_rewind);
+    }
+
     if (amount_rewind >= current_context_tokens.size())
     {
         current_context_tokens.clear();
diff --git a/kcpp_docs.embd b/kcpp_docs.embd
@@ -257,6 +257,11 @@
                             "minimum": 0,
                             "type": "number"
                          },
+                         "logprobs": {
+                            "default": false,
+                            "description": "If true, return up to 5 top logprobs for generated tokens. Incurs performance overhead.",
+                            "type": "boolean"
+                         },
                       },
                       "required": [
                          "prompt"
@@ -808,6 +813,215 @@
                       ]
                    }
                 },
+                "/api/extra/last_logprobs": {
+                   "post": {
+                      "description": "Obtains the token logprobs of the most recent request. A unique genkey previously submitted is required in multiuser mode.",
+                      "requestBody": {
+                         "content": {
+                            "application/json": {
+                               "example": {
+                                  "genkey": "KCPP2342"
+                               },
+                               "schema": {
+                                  "properties": {
+                                     "genkey": {
+                                        "type": "string",
+                                        "description": "A unique key used to identify the previous generation."
+                                     }
+                                  },
+                                  "type": "object"
+                               }
+                            }
+                         },
+                         "required": false
+                      },
+                      "responses": {
+                         "200": {
+                            "content": {
+                               "application/json": {
+                                  "example": {
+                                     "logprobs": {
+                                       "content": [
+                                          {
+                                             "token": "Hello",
+                                             "logprob": -0.31725305,
+                                             "bytes": [72, 101, 108, 108, 111],
+                                             "top_logprobs": [
+                                             {
+                                                "token": "Hello",
+                                                "logprob": -0.31725305,
+                                                "bytes": [72, 101, 108, 108, 111]
+                                             },
+                                             {
+                                                "token": "Hi",
+                                                "logprob": -1.3190403,
+                                                "bytes": [72, 105]
+                                             }
+                                             ]
+                                          },
+                                          {
+                                             "token": "!",
+                                             "logprob": -0.02380986,
+                                             "bytes": [
+                                             33
+                                             ],
+                                             "top_logprobs": [
+                                             {
+                                                "token": "!",
+                                                "logprob": -0.02380986,
+                                                "bytes": [33]
+                                             },
+                                             {
+                                                "token": " there",
+                                                "logprob": -3.787621,
+                                                "bytes": [32, 116, 104, 101, 114, 101]
+                                             }
+                                             ]
+                                          },
+                                          {
+                                             "token": " How",
+                                             "logprob": -0.000054669687,
+                                             "bytes": [32, 72, 111, 119],
+                                             "top_logprobs": [
+                                             {
+                                                "token": " How",
+                                                "logprob": -0.000054669687,
+                                                "bytes": [32, 72, 111, 119]
+                                             },
+                                             {
+                                                "token": "<|end|>",
+                                                "logprob": -10.953937,
+                                                "bytes": null
+                                             }
+                                             ]
+                                          },
+                                          {
+                                             "token": " can",
+                                             "logprob": -0.015801601,
+                                             "bytes": [32, 99, 97, 110],
+                                             "top_logprobs": [
+                                             {
+                                                "token": " can",
+                                                "logprob": -0.015801601,
+                                                "bytes": [32, 99, 97, 110]
+                                             },
+                                             {
+                                                "token": " may",
+                                                "logprob": -4.161023,
+                                                "bytes": [32, 109, 97, 121]
+                                             }
+                                             ]
+                                          },
+                                          {
+                                             "token": " I",
+                                             "logprob": -3.7697225e-6,
+                                             "bytes": [
+                                             32,
+                                             73
+                                             ],
+                                             "top_logprobs": [
+                                             {
+                                                "token": " I",
+                                                "logprob": -3.7697225e-6,
+                                                "bytes": [32, 73]
+                                             },
+                                             {
+                                                "token": " assist",
+                                                "logprob": -13.596657,
+                                                "bytes": [32, 97, 115, 115, 105, 115, 116]
+                                             }
+                                             ]
+                                          },
+                                          {
+                                             "token": " assist",
+                                             "logprob": -0.04571125,
+                                             "bytes": [32, 97, 115, 115, 105, 115, 116],
+                                             "top_logprobs": [
+                                             {
+                                                "token": " assist",
+                                                "logprob": -0.04571125,
+                                                "bytes": [32, 97, 115, 115, 105, 115, 116]
+                                             },
+                                             {
+                                                "token": " help",
+                                                "logprob": -3.1089056,
+                                                "bytes": [32, 104, 101, 108, 112]
+                                             }
+                                             ]
+                                          },
+                                          {
+                                             "token": " you",
+                                             "logprob": -5.4385737e-6,
+                                             "bytes": [32, 121, 111, 117],
+                                             "top_logprobs": [
+                                             {
+                                                "token": " you",
+                                                "logprob": -5.4385737e-6,
+                                                "bytes": [32, 121, 111, 117]
+                                             },
+                                             {
+                                                "token": " today",
+                                                "logprob": -12.807695,
+                                                "bytes": [32, 116, 111, 100, 97, 121]
+                                             }
+                                             ]
+                                          },
+                                          {
+                                             "token": " today",
+                                             "logprob": -0.0040071653,
+                                             "bytes": [32, 116, 111, 100, 97, 121],
+                                             "top_logprobs": [
+                                             {
+                                                "token": " today",
+                                                "logprob": -0.0040071653,
+                                                "bytes": [32, 116, 111, 100, 97, 121]
+                                             },
+                                             {
+                                                "token": "?",
+                                                "logprob": -5.5247097,
+                                                "bytes": [63]
+                                             }
+                                             ]
+                                          },
+                                          {
+                                             "token": "?",
+                                             "logprob": -0.0008108172,
+                                             "bytes": [63],
+                                             "top_logprobs": [
+                                             {
+                                                "token": "?",
+                                                "logprob": -0.0008108172,
+                                                "bytes": [63]
+                                             },
+                                             {
+                                                "token": "?\n",
+                                                "logprob": -7.184561,
+                                                "bytes": [63, 10]
+                                             }
+                                             ]
+                                          }
+                                       ]
+                                    }
+                                  },
+                                  "schema": {
+                                     "properties": {
+                                        "logprobs": {
+                                           "type": "object",
+                                           "description": "A logprobs object in the same format as OpenAI API."
+                                        }
+                                     }
+                                  }
+                               }
+                            },
+                            "description": "Successful request"
+                         }
+                      },
+                      "summary": "Obtains the token logprobs of the most recent request.",
+                      "tags": [
+                         "api/extra"
+                      ]
+                   }
+                },
                 "/api/extra/tokencount": {
                    "post": {
                       "description": "Counts the number of tokens in a string.",
diff --git a/klite.embd b/klite.embd
diff --git a/koboldcpp.py b/koboldcpp.py