Skip to content

Commit f6f1df7

Browse files
committed
Add 64-bit versions of core power of 2 functions
1 parent 64b0990 commit f6f1df7

File tree

25 files changed

+165
-128
lines changed

25 files changed

+165
-128
lines changed

core/io/file_access.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -415,17 +415,17 @@ class CharBuffer {
415415
char stack_buffer[256];
416416

417417
char *buffer = nullptr;
418-
int capacity = 0;
419-
int written = 0;
418+
int64_t capacity = 0;
419+
int64_t written = 0;
420420

421421
bool grow() {
422-
if (vector.resize(next_power_of_2(1 + written)) != OK) {
422+
if (vector.resize(next_power_of_2((uint64_t)1 + (uint64_t)written)) != OK) {
423423
return false;
424424
}
425425

426426
if (buffer == stack_buffer) { // first chunk?
427427

428-
for (int i = 0; i < written; i++) {
428+
for (int64_t i = 0; i < written; i++) {
429429
vector.write[i] = stack_buffer[i];
430430
}
431431
}

core/io/file_access_compressed.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class FileAccessCompressed : public FileAccess {
3939
bool writing = false;
4040
uint64_t write_pos = 0;
4141
uint8_t *write_ptr = nullptr;
42-
uint32_t write_buffer_size = 0;
42+
uint64_t write_buffer_size = 0;
4343
uint64_t write_max = 0;
4444
uint32_t block_size = 0;
4545
mutable bool read_eof = false;

core/io/image.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,8 +1115,8 @@ bool Image::is_size_po2() const {
11151115
void Image::resize_to_po2(bool p_square, Interpolation p_interpolation) {
11161116
ERR_FAIL_COND_MSG(is_compressed(), "Cannot resize in compressed image formats.");
11171117

1118-
int w = next_power_of_2(width);
1119-
int h = next_power_of_2(height);
1118+
int w = next_power_of_2((uint32_t)width);
1119+
int h = next_power_of_2((uint32_t)height);
11201120
if (p_square) {
11211121
w = h = MAX(w, h);
11221122
}

core/io/packet_peer.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
void PacketPeer::set_encode_buffer_max_size(int p_max_size) {
3939
ERR_FAIL_COND_MSG(p_max_size < 1024, "Max encode buffer must be at least 1024 bytes");
4040
ERR_FAIL_COND_MSG(p_max_size > 256 * 1024 * 1024, "Max encode buffer cannot exceed 256 MiB");
41-
encode_buffer_max_size = next_power_of_2(p_max_size);
41+
encode_buffer_max_size = next_power_of_2((uint32_t)p_max_size);
4242
encode_buffer.clear();
4343
}
4444

@@ -103,7 +103,7 @@ Error PacketPeer::put_var(const Variant &p_packet, bool p_full_objects) {
103103

104104
if (unlikely(encode_buffer.size() < len)) {
105105
encode_buffer.resize(0); // Avoid realloc
106-
encode_buffer.resize(next_power_of_2(len));
106+
encode_buffer.resize(next_power_of_2((uint32_t)len));
107107
}
108108

109109
uint8_t *w = encode_buffer.ptrw();
@@ -301,16 +301,16 @@ void PacketPeerStream::set_input_buffer_max_size(int p_max_size) {
301301
ERR_FAIL_COND_MSG(p_max_size < 0, "Max size of input buffer size cannot be smaller than 0.");
302302
// WARNING: May lose packets.
303303
ERR_FAIL_COND_MSG(ring_buffer.data_left(), "Buffer in use, resizing would cause loss of data.");
304-
ring_buffer.resize(nearest_shift(next_power_of_2(p_max_size + 4)) - 1);
305-
input_buffer.resize(next_power_of_2(p_max_size + 4));
304+
ring_buffer.resize(nearest_shift(next_power_of_2((uint32_t)p_max_size + (uint32_t)4)) - 1);
305+
input_buffer.resize(next_power_of_2((uint32_t)p_max_size + (uint32_t)4));
306306
}
307307

308308
int PacketPeerStream::get_input_buffer_max_size() const {
309309
return input_buffer.size() - 4;
310310
}
311311

312312
void PacketPeerStream::set_output_buffer_max_size(int p_max_size) {
313-
output_buffer.resize(next_power_of_2(p_max_size + 4));
313+
output_buffer.resize(next_power_of_2((uint32_t)p_max_size + (uint32_t)4));
314314
}
315315

316316
int PacketPeerStream::get_output_buffer_max_size() const {

core/io/packet_peer_udp.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ Error PacketPeerUDP::bind(int p_port, const IPAddress &p_bind_address, int p_rec
200200
_sock->close();
201201
return err;
202202
}
203-
rb.resize(nearest_shift(p_recv_buffer_size));
203+
rb.resize(nearest_shift((uint32_t)p_recv_buffer_size));
204204
return OK;
205205
}
206206

core/io/stream_peer_gzip.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ Error StreamPeerGZIP::_start(bool p_compress, bool p_is_deflate, int buffer_size
7979
ERR_FAIL_COND_V_MSG(buffer_size <= 0, ERR_INVALID_PARAMETER, "Invalid buffer size. It should be a positive integer.");
8080
clear();
8181
compressing = p_compress;
82-
rb.resize(nearest_shift(buffer_size - 1));
82+
rb.resize(nearest_shift(uint32_t(buffer_size - 1)));
8383
buffer.resize(1024);
8484

8585
// Create ctx.

core/math/geometry_2d.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,8 @@ void Geometry2D::make_atlas(const Vector<Size2i> &p_rects, Vector<Point2i> &r_re
226226
real_t best_aspect = 1e20;
227227

228228
for (int i = 0; i < results.size(); i++) {
229-
real_t h = next_power_of_2(results[i].max_h);
230-
real_t w = next_power_of_2(results[i].max_w);
229+
real_t h = next_power_of_2((uint32_t)results[i].max_h);
230+
real_t w = next_power_of_2((uint32_t)results[i].max_w);
231231
real_t aspect = h > w ? h / w : w / h;
232232
if (aspect < best_aspect) {
233233
best = i;

core/string/string_buffer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::reserve(int p_
123123
}
124124

125125
bool need_copy = string_length > 0 && buffer.is_empty();
126-
buffer.resize(next_power_of_2(p_size));
126+
buffer.resize(next_power_of_2((uint32_t)p_size));
127127
if (need_copy) {
128128
memcpy(buffer.ptrw(), short_buffer, string_length * sizeof(char32_t));
129129
}

core/templates/cowdata.h

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -52,25 +52,6 @@ class CowData {
5252
static constexpr USize MAX_INT = INT64_MAX;
5353

5454
private:
55-
// Function to find the next power of 2 to an integer.
56-
static _FORCE_INLINE_ USize next_po2(USize x) {
57-
if (x == 0) {
58-
return 0;
59-
}
60-
61-
--x;
62-
x |= x >> 1;
63-
x |= x >> 2;
64-
x |= x >> 4;
65-
x |= x >> 8;
66-
x |= x >> 16;
67-
if (sizeof(USize) == 8) {
68-
x |= x >> 32;
69-
}
70-
71-
return ++x;
72-
}
73-
7455
// Alignment: ↓ max_align_t ↓ USize ↓ max_align_t
7556
// ┌────────────────────┬──┬─────────────┬──┬───────────...
7657
// │ SafeNumeric<USize> │░░│ USize │░░│ T[]
@@ -107,7 +88,7 @@ class CowData {
10788
}
10889

10990
_FORCE_INLINE_ static USize _get_alloc_size(USize p_elements) {
110-
return next_po2(p_elements * sizeof(T));
91+
return next_power_of_2(p_elements * (USize)sizeof(T));
11192
}
11293

11394
_FORCE_INLINE_ static bool _get_alloc_size_checked(USize p_elements, USize *out) {
@@ -122,7 +103,7 @@ class CowData {
122103
*out = 0;
123104
return false;
124105
}
125-
*out = next_po2(o);
106+
*out = next_power_of_2(o);
126107
if (__builtin_add_overflow(o, static_cast<USize>(32), &p)) {
127108
return false; // No longer allocated here.
128109
}

core/typedefs.h

Lines changed: 90 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -154,42 +154,84 @@ inline bool is_power_of_2(const T x) {
154154
}
155155

156156
// Function to find the next power of 2 to an integer.
157-
static _FORCE_INLINE_ unsigned int next_power_of_2(unsigned int x) {
158-
if (x == 0) {
157+
constexpr uint64_t next_power_of_2(uint64_t p_number) {
158+
if (p_number == 0) {
159159
return 0;
160160
}
161161

162-
--x;
163-
x |= x >> 1;
164-
x |= x >> 2;
165-
x |= x >> 4;
166-
x |= x >> 8;
167-
x |= x >> 16;
162+
--p_number;
163+
p_number |= p_number >> 1;
164+
p_number |= p_number >> 2;
165+
p_number |= p_number >> 4;
166+
p_number |= p_number >> 8;
167+
p_number |= p_number >> 16;
168+
p_number |= p_number >> 32;
168169

169-
return ++x;
170+
return ++p_number;
171+
}
172+
173+
constexpr uint32_t next_power_of_2(uint32_t p_number) {
174+
if (p_number == 0) {
175+
return 0;
176+
}
177+
178+
--p_number;
179+
p_number |= p_number >> 1;
180+
p_number |= p_number >> 2;
181+
p_number |= p_number >> 4;
182+
p_number |= p_number >> 8;
183+
p_number |= p_number >> 16;
184+
185+
return ++p_number;
170186
}
171187

172188
// Function to find the previous power of 2 to an integer.
173-
static _FORCE_INLINE_ unsigned int previous_power_of_2(unsigned int x) {
174-
x |= x >> 1;
175-
x |= x >> 2;
176-
x |= x >> 4;
177-
x |= x >> 8;
178-
x |= x >> 16;
179-
return x - (x >> 1);
189+
constexpr uint64_t previous_power_of_2(uint64_t p_number) {
190+
p_number |= p_number >> 1;
191+
p_number |= p_number >> 2;
192+
p_number |= p_number >> 4;
193+
p_number |= p_number >> 8;
194+
p_number |= p_number >> 16;
195+
p_number |= p_number >> 32;
196+
return p_number - (p_number >> 1);
197+
}
198+
199+
constexpr uint32_t previous_power_of_2(uint32_t p_number) {
200+
p_number |= p_number >> 1;
201+
p_number |= p_number >> 2;
202+
p_number |= p_number >> 4;
203+
p_number |= p_number >> 8;
204+
p_number |= p_number >> 16;
205+
return p_number - (p_number >> 1);
180206
}
181207

182208
// Function to find the closest power of 2 to an integer.
183-
static _FORCE_INLINE_ unsigned int closest_power_of_2(unsigned int x) {
184-
unsigned int nx = next_power_of_2(x);
185-
unsigned int px = previous_power_of_2(x);
186-
return (nx - x) > (x - px) ? px : nx;
209+
constexpr uint64_t closest_power_of_2(uint64_t p_number) {
210+
uint64_t nx = next_power_of_2(p_number);
211+
uint64_t px = previous_power_of_2(p_number);
212+
return (nx - p_number) > (p_number - px) ? px : nx;
213+
}
214+
215+
constexpr uint32_t closest_power_of_2(uint32_t p_number) {
216+
uint32_t nx = next_power_of_2(p_number);
217+
uint32_t px = previous_power_of_2(p_number);
218+
return (nx - p_number) > (p_number - px) ? px : nx;
187219
}
188220

189221
// Get a shift value from a power of 2.
190-
static inline int get_shift_from_power_of_2(unsigned int p_bits) {
191-
for (unsigned int i = 0; i < 32; i++) {
192-
if (p_bits == (unsigned int)(1 << i)) {
222+
constexpr int32_t get_shift_from_power_of_2(uint64_t p_bits) {
223+
for (uint64_t i = 0; i < (uint64_t)64; i++) {
224+
if (p_bits == (uint64_t)((uint64_t)1 << i)) {
225+
return i;
226+
}
227+
}
228+
229+
return -1;
230+
}
231+
232+
constexpr int32_t get_shift_from_power_of_2(uint32_t p_bits) {
233+
for (uint32_t i = 0; i < (uint32_t)32; i++) {
234+
if (p_bits == (uint32_t)((uint32_t)1 << i)) {
193235
return i;
194236
}
195237
}
@@ -198,30 +240,44 @@ static inline int get_shift_from_power_of_2(unsigned int p_bits) {
198240
}
199241

200242
template <typename T>
201-
static _FORCE_INLINE_ T nearest_power_of_2_templated(T x) {
202-
--x;
243+
static _FORCE_INLINE_ T nearest_power_of_2_templated(T p_number) {
244+
--p_number;
203245

204246
// The number of operations on x is the base two logarithm
205247
// of the number of bits in the type. Add three to account
206248
// for sizeof(T) being in bytes.
207-
size_t num = get_shift_from_power_of_2(sizeof(T)) + 3;
249+
constexpr size_t shift_steps = get_shift_from_power_of_2((uint64_t)sizeof(T)) + 3;
208250

209251
// If the compiler is smart, it unrolls this loop.
210252
// If it's dumb, this is a bit slow.
211-
for (size_t i = 0; i < num; i++) {
212-
x |= x >> (1 << i);
253+
for (size_t i = 0; i < shift_steps; i++) {
254+
p_number |= p_number >> (1 << i);
213255
}
214256

215-
return ++x;
257+
return ++p_number;
216258
}
217259

218260
// Function to find the nearest (bigger) power of 2 to an integer.
219-
static inline unsigned int nearest_shift(unsigned int p_number) {
220-
for (int i = 30; i >= 0; i--) {
221-
if (p_number & (1 << i)) {
222-
return i + 1;
261+
constexpr uint64_t nearest_shift(uint64_t p_number) {
262+
uint64_t i = 63;
263+
do {
264+
i--;
265+
if (p_number & ((uint64_t)1 << i)) {
266+
return i + (uint64_t)1;
223267
}
224-
}
268+
} while (i != 0);
269+
270+
return 0;
271+
}
272+
273+
constexpr uint32_t nearest_shift(uint32_t p_number) {
274+
uint32_t i = 31;
275+
do {
276+
i--;
277+
if (p_number & ((uint32_t)1 << i)) {
278+
return i + (uint32_t)1;
279+
}
280+
} while (i != 0);
225281

226282
return 0;
227283
}

0 commit comments

Comments
 (0)