11/*****************************************************************************
22
3- Copyright (c) 2007, 2023 , Oracle and/or its affiliates.
3+ Copyright (c) 2007, 2024 , Oracle and/or its affiliates.
44
55This program is free software; you can redistribute it and/or modify
66it under the terms of the GNU General Public License, version 2.0,
@@ -35,101 +35,97 @@ Created 2007-03-27 Sunny Bains
3535#ifndef INNOBASE_FTS0VLC_IC
3636#define INNOBASE_FTS0VLC_IC
3737
38+ #include <stddef.h>
39+ #include <stdint.h>
3840#include "fts0types.h"
3941
4042/******************************************************************//**
4143Return length of val if it were encoded using our VLC scheme.
42- FIXME: We will need to be able encode 8 bytes value
4344@return length of value encoded, in bytes */
4445UNIV_INLINE
45- ulint
46+ unsigned int
4647fts_get_encoded_len(
4748/*================*/
48- ulint val) /* in: value to encode */
49+ uint64_t val) /* in: value to encode */
4950{
50- if (val <= 127) {
51- return(1);
52- } else if (val <= 16383) {
53- return(2);
54- } else if (val <= 2097151) {
55- return(3);
56- } else if (val <= 268435455) {
57- return(4);
58- } else {
59- /* Possibly we should care that on 64-bit machines ulint can
60- contain values that we can't encode in 5 bytes, but
61- fts_encode_int doesn't handle them either so it doesn't much
62- matter. */
63-
64- return(5);
65- }
51+ unsigned int length = 1;
52+ for (;;)
53+ {
54+ val >>= 7;
55+ if (val != 0)
56+ {
57+ ++length;
58+ }
59+ else
60+ {
61+ break;
62+ }
63+ }
64+ return length;
6665}
6766
6867/******************************************************************//**
6968Encode an integer using our VLC scheme and return the length in bytes.
7069@return length of value encoded, in bytes */
7170UNIV_INLINE
72- ulint
71+ unsigned int
7372fts_encode_int(
7473/*===========*/
75- ulint val, /* in: value to encode */
74+ uint64_t val, /* in: value to encode */
7675 byte* buf) /* in: buffer, must have enough space */
7776{
78- ulint len;
79-
80- if (val <= 127) {
81- *buf = (byte) val;
82-
83- len = 1;
84- } else if (val <= 16383) {
85- *buf++ = (byte)(val >> 7);
86- *buf = (byte)(val & 0x7F);
87-
88- len = 2;
89- } else if (val <= 2097151) {
90- *buf++ = (byte)(val >> 14);
91- *buf++ = (byte)((val >> 7) & 0x7F);
92- *buf = (byte)(val & 0x7F);
93-
94- len = 3;
95- } else if (val <= 268435455) {
96- *buf++ = (byte)(val >> 21);
97- *buf++ = (byte)((val >> 14) & 0x7F);
98- *buf++ = (byte)((val >> 7) & 0x7F);
99- *buf = (byte)(val & 0x7F);
100-
101- len = 4;
102- } else {
103- /* Best to keep the limitations of the 32/64 bit versions
104- identical, at least for the time being. */
105- ut_ad(val <= 4294967295u);
106-
107- *buf++ = (byte)(val >> 28);
108- *buf++ = (byte)((val >> 21) & 0x7F);
109- *buf++ = (byte)((val >> 14) & 0x7F);
110- *buf++ = (byte)((val >> 7) & 0x7F);
111- *buf = (byte)(val & 0x7F);
112-
113- len = 5;
77+ const unsigned int max_length = 10;
78+ /* skip leading zeros */
79+ unsigned int count = max_length - 1;
80+ while (count > 0)
81+ {
82+ /* We split the value into 7 bit batches); so val >= 2^63 need 10 bytes,
83+ 2^63 > val >= 2^56 needs 9 bytes, 2^56 > val >= 2^49 needs 8 bytes etc.
84+ */
85+ if (val >= uint64_t(1) << (count * 7))
86+ {
87+ break;
88+ }
89+ --count;
11490 }
11591
116- /* High-bit on means "last byte in the encoded integer". */
117- *buf |= 0x80;
118-
119- return(len);
92+ unsigned int length = count + 1;
93+
94+ byte *bufptr= buf;
95+
96+ for (;;)
97+ {
98+ *bufptr = (byte)((val >> (7 * count)) & 0x7f);
99+ if (count == 0)
100+ {
101+ /* High-bit on means "last byte in the encoded integer". */
102+ *bufptr |= 0x80;
103+ break;
104+ }
105+ else
106+ {
107+ --count;
108+ ++bufptr;
109+ }
110+ }
111+
112+ ut_ad(length <= max_length);
113+ ut_a(bufptr - buf == ptrdiff_t(length) - 1);
114+
115+ return length;
120116}
121117
122118/******************************************************************//**
123119Decode and return the integer that was encoded using our VLC scheme.
124120@return value decoded */
125121UNIV_INLINE
126- ulint
122+ uint64_t
127123fts_decode_vlc(
128124/*===========*/
129125 byte** ptr) /* in: ptr to decode from, this ptr is
130126 incremented by the number of bytes decoded */
131127{
132- ulint val = 0;
128+ uint64_t val = 0;
133129
134130 for (;;) {
135131 byte b = **ptr;
@@ -145,7 +141,7 @@ fts_decode_vlc(
145141 }
146142 }
147143
148- return( val) ;
144+ return val;
149145}
150146
151147#endif
0 commit comments