Skip to content

Commit 188a6ad

Browse files
committed
Latest commits, new presets
* These presets are the result of testing Nemo and it's capabilities * Added a link to the tool I used, thanks to https://github.com/kalomaze * UI fixes for both main and console apps
1 parent c7efdc6 commit 188a6ad

File tree

16 files changed

+150990
-150923
lines changed

16 files changed

+150990
-150923
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ Libraries:
7171
* [mistral-7b-instruct-v0.1](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF) from config.json (q4_K_S version works faster)
7272
* [Mistral-Nemo-Instruct-2407-GGUF](https://huggingface.co/ZeroWw/Mistral-Nemo-Instruct-2407-GGUF) from config.json (specific quants with output and embed tensors quantized to f16, q5 is the smallest)
7373
* [redmond-puffin-13b (previously recommended)](https://huggingface.co/TheBloke/Redmond-Puffin-13B-GGUF) from config.json (q4_K_S version works faster)
74+
* Exceptionally useful tool for visualizing sampling results and finding the pest combination: https://artefact2.github.io/llm-sampling/index.xhtml
7475

7576
### Additional notes
7677

UI_simple.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,7 @@ static void sliderPStep(float& p_step, float& default_p_step){
475475
}
476476
ImGui::SameLine();
477477
}
478-
ImGui::SliderFloat("p_step", &p_step, 0.5f, 1.0f);
478+
ImGui::SliderFloat("p_step", &p_step, 0.1f, 1.0f);
479479
if (ImGui::BeginPopupContextItem("p_step"))
480480
{
481481
if (ImGui::Selectable("Reset to default")){
@@ -1672,7 +1672,7 @@ struct chatUI{
16721672
newChat.startGen();
16731673
output = "...";
16741674
//newChat.getResultAsyncStringFull2(false, true);
1675-
newChat.getResultAsyncStringRepeat3();
1675+
newChat.getResultAsyncStringFull3();
16761676
copiedDialog = false;
16771677
copiedTimings = false;
16781678
scrolled = false;

base/common.cpp

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,32 @@ int32_t get_num_physical_cores() {
7979
return num_physical_cores;
8080
}
8181
#elif defined(_WIN32)
82-
//TODO: Implement
82+
//returns number of physical processor on windows
83+
unsigned int fallback_threads = std::thread::hardware_concurrency();
84+
DWORD length = 0;
85+
GetLogicalProcessorInformationEx(RelationAll, nullptr, &length);
86+
87+
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
88+
std::cerr << "GLIPEx INSUFFICIENT_BUFFER" << std::endl;
89+
return fallback_threads > 0 ? (fallback_threads <= 4 ? fallback_threads : fallback_threads / 2) : 4;
90+
}
91+
92+
std::vector<uint8_t> buffer(length);
93+
if (!GetLogicalProcessorInformationEx(RelationAll, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data()), &length)) {
94+
std::cerr << "GLIPEx: Unable to get processor information" << std::endl;
95+
return fallback_threads > 0 ? (fallback_threads <= 4 ? fallback_threads : fallback_threads / 2) : 4;
96+
}
97+
98+
DWORD physicalCoreCount = 0;
99+
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data());
100+
while (reinterpret_cast<uint8_t*>(info) < buffer.data() + buffer.size()) {
101+
if (info->Relationship == RelationProcessorCore) {
102+
physicalCoreCount++;
103+
}
104+
info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(reinterpret_cast<uint8_t*>(info) + info->Size);
105+
}
106+
return physicalCoreCount;
107+
83108
#endif
84109
unsigned int n_threads = std::thread::hardware_concurrency();
85110
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;

base/ggml/ggml-backend.c

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -351,15 +351,10 @@ void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t b
351351
}
352352

353353
// an async copy would normally happen after all the queued operations on both backends are completed
354-
// sync src, set_async dst
355-
if (ggml_backend_buffer_is_host(src->buffer)) {
356-
ggml_backend_synchronize(backend_src);
357-
ggml_backend_tensor_set_async(backend_dst, dst, src->data, 0, ggml_nbytes(src));
358-
} else {
359-
ggml_backend_synchronize(backend_src);
360-
ggml_backend_tensor_copy(src, dst);
361-
ggml_backend_synchronize(backend_dst);
362-
}
354+
// to simulate the same behavior, we need to synchronize both backends first, and do a blocking copy
355+
ggml_backend_synchronize(backend_src);
356+
ggml_backend_synchronize(backend_dst);
357+
ggml_backend_tensor_copy(src, dst);
363358
}
364359

365360
// events
@@ -1782,7 +1777,17 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
17821777
} else {
17831778
ggml_backend_synchronize(split_backend);
17841779
}
1785-
ggml_backend_tensor_copy_async(input_backend, split_backend, input, input_cpy);
1780+
// try async copy, but if not possible, we can still use a sync copy without synchronizing the dst backend, since we handle the synchronization here with multiple copies and events
1781+
// TODO: add public function to facilitate this, since applications do not have direct access to the backend interface
1782+
if (!split_backend->iface.cpy_tensor_async || !split_backend->iface.cpy_tensor_async(input_backend, split_backend, input, input_cpy)) {
1783+
ggml_backend_synchronize(input_backend);
1784+
if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
1785+
ggml_backend_event_synchronize(sched->events[split_backend_id][sched->cur_copy]);
1786+
} else {
1787+
ggml_backend_synchronize(split_backend);
1788+
}
1789+
ggml_backend_tensor_copy(input, input_cpy);
1790+
}
17861791
}
17871792
}
17881793

0 commit comments

Comments
 (0)