Skip to content

Commit 9770fe8

Browse files
committed
- replace rng sampling with custom implementation based on
- log generation results to stderr - log raw rng values generated during inference to stderr
1 parent 7a2c913 commit 9770fe8

File tree

2 files changed

+62
-24
lines changed

2 files changed

+62
-24
lines changed

dev/todo.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
- [x] run inference, save to output.txt
2+
- `./build/bin/llama-simple -m models/gemma/gemma-1.1-7b-it.Q4_K_M.gguf -n 100 -p "Tell me about the history of artificial intelligence" >> output.txt`
3+
New way:
4+
```
5+
./build/bin/llama-run --ngl 999 models/gemma/gemma-1.1-7b-it.Q4_K_M.gguf Hello World > output.txt
6+
```
7+
8+
- [x] b) I want to modify the code, re-build project and see the changes
9+
- Just something stupid. Print hello wordl from Petr
10+
- changed `simple.cpp`
11+
``` fprintf(stderr, "Generating token number %d\n", n_decode + 1); ```
12+
Runs fine.
13+
14+
- [x] c) Next, I want specifically interject into places where RNGs are generated.
15+
- During inference, sampling
16+
- Specifically, save each rng generated number to a file
17+
18+
- [x] d) then I want to replace all the custom non-trivial rng generation
19+
- (e.g. "sample this custom distribution") with my own implementations using the basic uniform (0,1) rng generator
20+
21+
- [ ] e) then I want to replace the default (0,1) uniform distribution with my custom provider coming from external api
22+
23+
- [ ] f) Idea: measure the bias in the source distribution based
24+
- specifically: see each generated number and see how it changes
25+
26+
- [ ] g) try / support temperature > 1
27+
- try min_p lower (0?)
28+
- try other models - bigger, better

src/llama-sampling.cpp

Lines changed: 34 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -129,35 +129,45 @@ struct ring_buffer {
129129
};
130130

131131
static int llama_sample_dist(llama_token_data_array * cur_p, std::mt19937 & rng) {
132-
// iterator for the probabilities
133-
#ifdef __GNUC__
134-
#pragma GCC diagnostic push
135-
#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
136-
#endif
137-
138-
struct probs_iterator {
139-
typedef std::input_iterator_tag iterator_category;
140-
typedef float value_type;
141-
typedef float * pointer;
142-
typedef float & reference;
143-
typedef ptrdiff_t difference_type;
132+
// Get uniform random number between 0 and 1
133+
double u = std::uniform_real_distribution<>(0.0, 1.0)(rng);
134+
fprintf(stderr, "\nRNG internal:\n");
135+
fprintf(stderr, "- Raw uniform random number: %f\n", u);
144136

145-
const llama_token_data * data;
137+
// Calculate cumulative probabilities
138+
std::vector<float> cumulative_probs;
139+
cumulative_probs.reserve(cur_p->size);
140+
float sum = 0.0f;
146141

147-
bool operator==(const probs_iterator & other) const { return data == other.data; }
148-
bool operator!=(const probs_iterator & other) const { return data != other.data; }
149-
const float & operator*() const { return data->p; }
150-
probs_iterator & operator++() { ++data; return *this; }
151-
probs_iterator operator++(int) { probs_iterator tmp = *this; ++data; return tmp; }
152-
};
142+
fprintf(stderr, "- Token probabilities:\n");
143+
for (size_t i = 0; i < cur_p->size; ++i) {
144+
sum += cur_p->data[i].p;
145+
cumulative_probs.push_back(sum);
146+
fprintf(stderr, " [%zu] token %d = %f (cumulative: %f)\n",
147+
i, cur_p->data[i].id, cur_p->data[i].p, sum);
148+
}
153149

154-
#ifdef __GNUC__
155-
#pragma GCC diagnostic pop
156-
#endif
150+
// Normalize cumulative probabilities
151+
if (sum != 1.0f) {
152+
for (float& p : cumulative_probs) {
153+
p /= sum;
154+
}
155+
fprintf(stderr, "- Normalized cumulative probabilities\n");
156+
}
157157

158-
std::discrete_distribution<int> dist(probs_iterator{cur_p->data}, probs_iterator{cur_p->data + cur_p->size});
158+
// Scale random number to probability sum
159+
double scaled = u * 1.0; // since we normalized, multiply by 1.0
160+
fprintf(stderr, "- Scaled random number: %f\n", scaled);
159161

160-
return dist(rng);
162+
// Find the selected index using binary search
163+
auto it = std::lower_bound(cumulative_probs.begin(), cumulative_probs.end(), scaled);
164+
size_t selected_idx = it - cumulative_probs.begin();
165+
166+
fprintf(stderr, "- Selected index: %zu\n", selected_idx);
167+
fprintf(stderr, "RNG generated sample: %zu (token id: %d, probability: %f)\n",
168+
selected_idx, cur_p->data[selected_idx].id, cur_p->data[selected_idx].p);
169+
170+
return selected_idx;
161171
}
162172

163173
/*

0 commit comments

Comments
 (0)