MaggotHATE
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎UI_simple.h‎
Lines changed: 2 additions & 2 deletions b/‎UI_simple.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎base/common.cpp‎
Lines changed: 26 additions & 1 deletion b/‎base/common.cpp‎
Lines changed: 26 additions & 1 deletion
diff --git a/‎base/ggml/ggml-backend.c‎
Lines changed: 15 additions & 10 deletions b/‎base/ggml/ggml-backend.c‎
Lines changed: 15 additions & 10 deletions
@@ -71,6 +71,7 @@ Libraries:
 * [mistral-7b-instruct-v0.1](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF) from config.json (q4_K_S version works faster)
 * [Mistral-Nemo-Instruct-2407-GGUF](https://huggingface.co/ZeroWw/Mistral-Nemo-Instruct-2407-GGUF) from config.json (specific quants with output and embed tensors quantized to f16, q5 is the smallest)
 * [redmond-puffin-13b (previously recommended)](https://huggingface.co/TheBloke/Redmond-Puffin-13B-GGUF) from config.json (q4_K_S version works faster)
+* Exceptionally useful tool for visualizing sampling results and finding the pest combination: https://artefact2.github.io/llm-sampling/index.xhtml
 
 ### Additional notes
 
 
@@ -475,7 +475,7 @@ static void sliderPStep(float& p_step, float& default_p_step){
         }
         ImGui::SameLine();
     }
-    ImGui::SliderFloat("p_step", &p_step, 0.5f, 1.0f);
+    ImGui::SliderFloat("p_step", &p_step, 0.1f, 1.0f);
     if (ImGui::BeginPopupContextItem("p_step"))
     {
         if (ImGui::Selectable("Reset to default")){
@@ -1672,7 +1672,7 @@ struct chatUI{
         newChat.startGen();
         output = "...";
         //newChat.getResultAsyncStringFull2(false, true);
-        newChat.getResultAsyncStringRepeat3();
+        newChat.getResultAsyncStringFull3();
         copiedDialog = false;
         copiedTimings = false;
         scrolled = false;
 
@@ -79,7 +79,32 @@ int32_t get_num_physical_cores() {
         return num_physical_cores;
     }
 #elif defined(_WIN32)
-    //TODO: Implement
+    //returns number of physical processor on windows
+    unsigned int fallback_threads = std::thread::hardware_concurrency();
+    DWORD length = 0;
+    GetLogicalProcessorInformationEx(RelationAll, nullptr, &length);
+
+    if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+        std::cerr << "GLIPEx INSUFFICIENT_BUFFER" << std::endl;
+        return fallback_threads > 0 ? (fallback_threads <= 4 ? fallback_threads : fallback_threads / 2) : 4;
+    }
+
+    std::vector<uint8_t> buffer(length);
+    if (!GetLogicalProcessorInformationEx(RelationAll, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data()), &length)) {
+        std::cerr << "GLIPEx: Unable to get processor information" << std::endl;
+        return fallback_threads > 0 ? (fallback_threads <= 4 ? fallback_threads : fallback_threads / 2) : 4;
+    }
+
+    DWORD physicalCoreCount = 0;
+    PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data());
+    while (reinterpret_cast<uint8_t*>(info) < buffer.data() + buffer.size()) {
+        if (info->Relationship == RelationProcessorCore) {
+            physicalCoreCount++;
+        }
+        info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(reinterpret_cast<uint8_t*>(info) + info->Size);
+    }
+    return physicalCoreCount;
+
 #endif
     unsigned int n_threads = std::thread::hardware_concurrency();
     return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
 
@@ -351,15 +351,10 @@ void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t b
     }
 
     // an async copy would normally happen after all the queued operations on both backends are completed
-    // sync src, set_async dst
-    if (ggml_backend_buffer_is_host(src->buffer)) {
-        ggml_backend_synchronize(backend_src);
-        ggml_backend_tensor_set_async(backend_dst, dst, src->data, 0, ggml_nbytes(src));
-    } else {
-        ggml_backend_synchronize(backend_src);
-        ggml_backend_tensor_copy(src, dst);
-        ggml_backend_synchronize(backend_dst);
-    }
+    // to simulate the same behavior, we need to synchronize both backends first, and do a blocking copy
+    ggml_backend_synchronize(backend_src);
+    ggml_backend_synchronize(backend_dst);
+    ggml_backend_tensor_copy(src, dst);
 }
 
 // events
@@ -1782,7 +1777,17 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
                 } else {
                     ggml_backend_synchronize(split_backend);
                 }
-                ggml_backend_tensor_copy_async(input_backend, split_backend, input, input_cpy);
+                // try async copy, but if not possible, we can still use a sync copy without synchronizing the dst backend, since we handle the synchronization here with multiple copies and events
+                // TODO: add public function to facilitate this, since applications do not have direct access to the backend interface
+                if (!split_backend->iface.cpy_tensor_async || !split_backend->iface.cpy_tensor_async(input_backend, split_backend, input, input_cpy)) {
+                    ggml_backend_synchronize(input_backend);
+                    if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
+                        ggml_backend_event_synchronize(sched->events[split_backend_id][sched->cur_copy]);
+                    } else {
+                        ggml_backend_synchronize(split_backend);
+                    }
+                    ggml_backend_tensor_copy(input, input_cpy);
+                }
             }
         }
Original file line number	Diff line number	Diff line change
`@@ -475,7 +475,7 @@ static void sliderPStep(float& p_step, float& default_p_step){`
`475`	`475`	`}`
`476`	`476`	`ImGui::SameLine();`
`477`	`477`	`}`
`478`		`- ImGui::SliderFloat("p_step", &p_step, 0.5f, 1.0f);`
	`478`	`+ ImGui::SliderFloat("p_step", &p_step, 0.1f, 1.0f);`
`479`	`479`	`if (ImGui::BeginPopupContextItem("p_step"))`
`480`	`480`	`{`
`481`	`481`	`if (ImGui::Selectable("Reset to default")){`
`@@ -1672,7 +1672,7 @@ struct chatUI{`
`1672`	`1672`	`newChat.startGen();`
`1673`	`1673`	`output = "...";`
`1674`	`1674`	`//newChat.getResultAsyncStringFull2(false, true);`
`1675`		`- newChat.getResultAsyncStringRepeat3();`
	`1675`	`+ newChat.getResultAsyncStringFull3();`
`1676`	`1676`	`copiedDialog = false;`
`1677`	`1677`	`copiedTimings = false;`
`1678`	`1678`	`scrolled = false;`