Skip to content

Commit 9ad1e30

Browse files
committed
Merge remote-tracking branch 'origin/candidate-10.0.x'
Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com> # Conflicts: # helm/hpcc/Chart.yaml # helm/hpcc/templates/_helpers.tpl # version.cmake
2 parents 90e972c + b593efa commit 9ad1e30

File tree

10 files changed

+140
-116
lines changed

10 files changed

+140
-116
lines changed

system/jhtree/ctfile.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ bool CLegacyWriteNode::add(offset_t pos, const void *indata, size32_t insize, un
409409
bool rowCompressed = (keyType&HTREE_QUICK_COMPRESSED_KEY)==HTREE_QUICK_COMPRESSED_KEY;
410410
lzwcomp.open(keyPtr, maxBytes-hdr.keyBytes, isVariable, rowCompressed, fixedKeySize);
411411
}
412-
if (0xffff == hdr.numKeys || 0 == lzwcomp.writekey(pos, (const char *)indata, insize))
412+
if (0xffff == hdr.numKeys || 0 == lzwcomp.writekey(pos, (const char *)indata, insize, KeyCompressor::LeadingFilePosition, 0))
413413
return false;
414414
}
415415
else

system/jhtree/hlzw.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ void KeyCompressor::openBlob(CompressionMethod compression, void *blk,int blksiz
9999
method = comp->getCompressionMethod();
100100
}
101101

102-
int KeyCompressor::writekey(offset_t fPtr, const char *key, unsigned datalength, unsigned options)
102+
int KeyCompressor::writekey(offset_t fPtr, const char *key, unsigned datalength, unsigned options, unsigned keyedDelta)
103103
{
104104
assert(!isBlob);
105105
assertex(__BYTE_ORDER == __LITTLE_ENDIAN); // otherwise the following code is wrong.
@@ -108,16 +108,32 @@ int KeyCompressor::writekey(offset_t fPtr, const char *key, unsigned datalength,
108108
tempKeyBuffer.clear();
109109
if (isVariable)
110110
{
111-
KEYRECSIZE_T rs = datalength;
112-
tempKeyBuffer.appendSwap(sizeof(rs), &rs);
111+
if (keyedDelta)
112+
{
113+
unsigned payloadLength = datalength - keyedDelta;
114+
assertex(payloadLength <= 0x7FFF);
115+
if (payloadLength < 0x80)
116+
{
117+
tempKeyBuffer.append((byte)payloadLength);
118+
}
119+
else
120+
{
121+
byte firstByte = (byte)(payloadLength >> 8) | 0x80;
122+
tempKeyBuffer.append(firstByte);
123+
tempKeyBuffer.append((byte)payloadLength);
124+
}
125+
}
126+
else
127+
{
128+
KEYRECSIZE_T rs = datalength;
129+
tempKeyBuffer.appendSwap(sizeof(rs), &rs);
130+
}
113131
}
114132

115-
bool hasTrailingFilePos = (options & TrailingFilePosition) != 0 && (options & NoFilePosition) == 0;
116-
bool hasLeadingFilePos = (options & NoFilePosition) == 0 && !hasTrailingFilePos;
117-
if (hasLeadingFilePos)
133+
if (options & LeadingFilePosition)
118134
tempKeyBuffer.appendSwap(sizeof(offset_t), &fPtr);
119135
tempKeyBuffer.append(datalength, key);
120-
if (hasTrailingFilePos)
136+
if (options & TrailingFilePosition)
121137
tempKeyBuffer.appendSwap(sizeof(offset_t), &fPtr);
122138

123139
size32_t toWrite = tempKeyBuffer.length();

system/jhtree/hlzw.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ class KeyCompressor final
2929
public:
3030
enum KeyWriteOptions
3131
{
32-
DefaultWriteOptions= 0x00,
33-
TrailingFilePosition = 0x01,
34-
NoFilePosition = 0x02
32+
NoFilePosition = 0x00,
33+
LeadingFilePosition = 0x01,
34+
TrailingFilePosition = 0x02,
3535
};
3636

3737
KeyCompressor() {}
@@ -40,7 +40,7 @@ class KeyCompressor final
4040
void open(void *blk,int blksize, ICompressHandler * compressionHandler, const char * options, bool _isVariable, size32_t fixedRowSize);
4141
void open(void *blk,int blksize, ICompressor * compressor, bool _isVariable, size32_t _fixedRowSize);
4242

43-
int writekey(offset_t fPtr, const char *key, unsigned datalength, unsigned writeOptions = DefaultWriteOptions);
43+
int writekey(offset_t fPtr, const char *key, unsigned datalength, unsigned writeOptions, unsigned keyedDelta);
4444
bool write(const void * data, size32_t datalength);
4545

4646
bool compressBlock(size32_t destSize, void * dest, size32_t srcSize, const void * src, ICompressHandler * compressionHandler, const char * options, bool isVariable, size32_t fixedSize);

system/jhtree/jhblockcompressed.cpp

Lines changed: 96 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ void CJHBlockCompressedSearchNode::load(CKeyHdr *_keyHdr, const void *rawData, o
132132

133133
int CJHBlockCompressedSearchNode::compareValueAt(const char *src, unsigned int index) const
134134
{
135+
dbgassertex(index < hdr.numKeys);
136+
135137
return memcmp(src, keyBuf + index*keyRecLen, keyCompareLen);
136138
}
137139

@@ -238,6 +240,98 @@ void CJHNewBlobNode::load(CKeyHdr *_keyHdr, const void *rawData, offset_t _fpos,
238240

239241
//=========================================================================================================
240242

243+
void CJHBlockCompressedVarNode::load(CKeyHdr *_keyHdr, const void *rawData, offset_t _fpos, bool needCopy)
244+
{
245+
CJHBlockCompressedSearchNode::load(_keyHdr, rawData, _fpos, needCopy);
246+
unsigned n = getNumKeys();
247+
offsets.allocateN(n);
248+
sizes.allocateN(n);
249+
const byte *finger = (const byte *)keyBuf;
250+
size32_t keyedLen = keyHdr->getNodeKeyLength();
251+
for (unsigned int i=0; i<getNumKeys(); i++)
252+
{
253+
unsigned payloadSize = *finger++;
254+
if (payloadSize & 0x80)
255+
{
256+
payloadSize &= 0x7f;
257+
payloadSize = (payloadSize << 8) | *finger++;
258+
}
259+
size32_t recsize = payloadSize + keyedLen;
260+
offsets[i] = (const char *)finger - keyBuf;
261+
sizes[i] = recsize;
262+
finger += recsize;
263+
if (!zeroFilePosition)
264+
finger += sizeof(offset_t);
265+
}
266+
}
267+
268+
int CJHBlockCompressedVarNode::compareValueAt(const char *src, unsigned int index) const
269+
{
270+
dbgassertex(index < hdr.numKeys);
271+
return memcmp(src, keyBuf + offsets[index], keyCompareLen);
272+
}
273+
274+
bool CJHBlockCompressedVarNode::fetchPayload(unsigned int num, char *dst, PayloadReference & activePayload) const
275+
{
276+
if (num >= hdr.numKeys) return false;
277+
278+
if (NULL != dst)
279+
{
280+
const char * p = keyBuf + offsets[num];
281+
KEYRECSIZE_T reclen = sizes[num];
282+
if (keyHdr->hasSpecialFileposition())
283+
{
284+
if (zeroFilePosition)
285+
{
286+
memcpy(dst+keyCompareLen, p+keyCompareLen, reclen-keyCompareLen);
287+
*(offset_t*)(dst+keyLen) = 0;
288+
}
289+
else
290+
memcpy(dst+keyCompareLen, p+keyCompareLen, reclen + sizeof(offset_t) - keyCompareLen);
291+
}
292+
else
293+
memcpy(dst+keyCompareLen, p+keyCompareLen, reclen-keyCompareLen);
294+
}
295+
return true;
296+
}
297+
298+
bool CJHBlockCompressedVarNode::getKeyAt(unsigned int num, char *dst) const
299+
{
300+
if (num >= hdr.numKeys) return false;
301+
302+
if (NULL != dst)
303+
{
304+
const char * p = keyBuf + offsets[num];
305+
memcpy(dst, p, keyCompareLen);
306+
}
307+
return true;
308+
}
309+
310+
size32_t CJHBlockCompressedVarNode::getSizeAt(unsigned int num) const
311+
{
312+
dbgassertex(num < hdr.numKeys);
313+
KEYRECSIZE_T reclen = sizes[num];
314+
if (keyHdr->hasSpecialFileposition())
315+
return reclen + sizeof(offset_t);
316+
else
317+
return reclen;
318+
}
319+
320+
offset_t CJHBlockCompressedVarNode::getFPosAt(unsigned int num) const
321+
{
322+
if (num >= hdr.numKeys) return 0;
323+
if (zeroFilePosition) return 0;
324+
325+
const char * p = keyBuf + offsets[num];
326+
KEYRECSIZE_T reclen = sizes[num];
327+
offset_t pos;
328+
memcpy( &pos, p + reclen, sizeof(__int64) );
329+
_WINREV(pos);
330+
return pos;
331+
}
332+
333+
//=========================================================================================================
334+
241335
CBlockCompressedWriteNode::CBlockCompressedWriteNode(offset_t _fpos, CKeyHdr *_keyHdr, bool isLeafNode, const CBlockCompressedBuildContext& ctx) :
242336
CWriteNode(_fpos, _keyHdr, isLeafNode), context(ctx)
243337
{
@@ -283,8 +377,8 @@ bool CBlockCompressedWriteNode::add(offset_t pos, const void *indata, size32_t i
283377
compressor.open(keyPtr, maxBytes-hdr.keyBytes, handler, context.compressionOptions, isVariable, fixedKeySize);
284378
}
285379

286-
unsigned writeOptions = KeyCompressor::TrailingFilePosition | (context.zeroFilePos ? KeyCompressor::NoFilePosition : 0);
287-
if (0xffff == hdr.numKeys || 0 == compressor.writekey(pos, (const char *)indata, insize, writeOptions))
380+
unsigned writeOptions = (context.zeroFilePos ? KeyCompressor::NoFilePosition : KeyCompressor::TrailingFilePosition);
381+
if (0xffff == hdr.numKeys || 0 == compressor.writekey(pos, (const char *)indata, insize, writeOptions, keyHdr->getNodeKeyLength()))
288382
return false;
289383

290384
if (insize>keyLen)
@@ -346,96 +440,3 @@ BlockCompressedIndexCompressor::BlockCompressedIndexCompressor(unsigned keyedSiz
346440
if (!isTLK && helper && (helper->getFlags() & TIWzerofilepos))
347441
context.zeroFilePos = true;
348442
}
349-
350-
CJHBlockCompressedVarNode::CJHBlockCompressedVarNode() {}
351-
CJHBlockCompressedVarNode::~CJHBlockCompressedVarNode()
352-
{
353-
delete [] recArray;
354-
}
355-
356-
void CJHBlockCompressedVarNode::load(CKeyHdr *_keyHdr, const void *rawData, offset_t _fpos, bool needCopy)
357-
{
358-
CJHBlockCompressedSearchNode::load(_keyHdr, rawData, _fpos, needCopy);
359-
unsigned n = getNumKeys();
360-
recArray = new const char * [n];
361-
const char *finger = keyBuf;
362-
for (unsigned int i=0; i<getNumKeys(); i++)
363-
{
364-
recArray[i] = finger + sizeof(KEYRECSIZE_T);
365-
KEYRECSIZE_T recsize = *(KEYRECSIZE_T *)finger;
366-
_WINREV(recsize);
367-
finger += recsize + sizeof(KEYRECSIZE_T);
368-
if (!zeroFilePosition)
369-
finger += sizeof(offset_t);
370-
}
371-
}
372-
373-
int CJHBlockCompressedVarNode::compareValueAt(const char *src, unsigned int index) const
374-
{
375-
return memcmp(src, recArray[index], keyCompareLen);
376-
}
377-
378-
bool CJHBlockCompressedVarNode::fetchPayload(unsigned int num, char *dst, PayloadReference & activePayload) const
379-
{
380-
if (num >= hdr.numKeys) return false;
381-
382-
if (NULL != dst)
383-
{
384-
const char * p = recArray[num];
385-
KEYRECSIZE_T reclen = ((KEYRECSIZE_T *) p)[-1];
386-
_WINREV(reclen);
387-
if (keyHdr->hasSpecialFileposition())
388-
{
389-
if (zeroFilePosition)
390-
{
391-
memcpy(dst+keyCompareLen, p+keyCompareLen, reclen-keyCompareLen);
392-
*(offset_t*)(dst+keyLen) = 0;
393-
}
394-
else
395-
memcpy(dst+keyCompareLen, p+keyCompareLen, reclen + sizeof(offset_t) - keyCompareLen);
396-
}
397-
else
398-
memcpy(dst+keyCompareLen, p+keyCompareLen, reclen-keyCompareLen);
399-
}
400-
return true;
401-
}
402-
403-
bool CJHBlockCompressedVarNode::getKeyAt(unsigned int num, char *dst) const
404-
{
405-
if (num >= hdr.numKeys) return false;
406-
407-
if (NULL != dst)
408-
{
409-
const char * p = recArray[num];
410-
KEYRECSIZE_T reclen = ((KEYRECSIZE_T *) p)[-1];
411-
_WINREV(reclen);
412-
assertex(reclen >= keyCompareLen);
413-
memcpy(dst, p, keyCompareLen);
414-
}
415-
return true;
416-
}
417-
418-
size32_t CJHBlockCompressedVarNode::getSizeAt(unsigned int num) const
419-
{
420-
const char * p = recArray[num];
421-
KEYRECSIZE_T reclen = ((KEYRECSIZE_T *) p)[-1];
422-
_WINREV(reclen);
423-
if (keyHdr->hasSpecialFileposition())
424-
return reclen + sizeof(offset_t);
425-
else
426-
return reclen;
427-
}
428-
429-
offset_t CJHBlockCompressedVarNode::getFPosAt(unsigned int num) const
430-
{
431-
if (num >= hdr.numKeys) return 0;
432-
if (!zeroFilePosition) return 0;
433-
434-
const char * p = recArray[num];
435-
KEYRECSIZE_T reclen = ((KEYRECSIZE_T *) p)[-1];
436-
_WINREV(reclen);
437-
offset_t pos;
438-
memcpy( &pos, p + reclen, sizeof(__int64) );
439-
_WINREV(pos);
440-
return pos;
441-
}

system/jhtree/jhblockcompressed.hpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,9 @@ class CJHBlockCompressedSearchNode : public CJHSearchNode
5959

6060
class CJHBlockCompressedVarNode : public CJHBlockCompressedSearchNode
6161
{
62-
const char **recArray = nullptr;
62+
OwnedMalloc<unsigned> offsets;
63+
OwnedMalloc<KEYRECSIZE_T> sizes;
6364
public:
64-
CJHBlockCompressedVarNode();
65-
~CJHBlockCompressedVarNode();
66-
6765
virtual void load(CKeyHdr *keyHdr, const void *rawData, offset_t pos, bool needCopy) override;
6866
virtual bool getKeyAt(unsigned int num, char *dest) const; // Retrieve keyed fields
6967
virtual bool fetchPayload(unsigned int num, char *dest, PayloadReference & activePayload) const; // Retrieve payload fields. Note destination is assumed to already contain keyed fields

system/jhtree/jhinplace.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ class jhtree_decl InplaceKeyBuildContext
175175
offset_t leafMemorySize = 0;
176176
struct {
177177
double minCompressionThreshold = 0.95; // use uncompressed if compressed is > 95% uncompressed
178-
unsigned maxCompressionFactor = 50; // Don't compress payload to less than 2% of the original by default (because when it is read it will use lots of memory)
178+
unsigned maxCompressionFactor = defaultMaxCompressionFactor; // Avoid compressing more than a set limit because allocating when expanding is painful.
179179
bool recompress = false;
180180
bool reuseCompressor = true;
181181
CompressionMethod blobCompression = COMPRESS_METHOD_LZW;

system/jhtree/keybuild.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,14 @@ class CKeyBuilder : public CInterfaceOf<IKeyBuilder>
223223
{
224224
sequence = options.startSequence;
225225
keyHdr.setown(new CWriteKeyHdr());
226-
keyValueSize = options.rawSize;
227-
keyedSize = options.keyFieldSize != (unsigned) -1 ? options.keyFieldSize : options.rawSize;
226+
227+
//Maximum row length supported is 32K - ensure the value is capped because it is stored in a short int.
228+
unsigned rawSize = options.rawSize;
229+
if (rawSize > KEYBUILD_MAXLENGTH)
230+
rawSize = KEYBUILD_MAXLENGTH; // max supported in ctree
231+
232+
keyValueSize = rawSize;
233+
keyedSize = options.keyFieldSize != (unsigned) -1 ? options.keyFieldSize : rawSize;
228234

229235
levels = 0;
230236
records = 0;

system/jlib/jlzbase.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ class CStreamCompressor : public CSimpleInterfaceOf<ICompressor>
200200
size32_t outputExtra = 0;
201201

202202
//Options for configuring the compressor:
203-
byte maxCompression = 20; // Avoid compressing more than 20x because allocating when expanding is painful.
203+
byte maxCompression = defaultMaxCompressionFactor; // Avoid compressing more than a set limit because allocating when expanding is painful.
204204
byte maxRecompress = 1; // How many times should the code try and recompress all the smaller streams as one?
205205
unsigned minSizeToCompress = 1; // If the uncompressed data is less than this size, don't bother compressing it
206206
};

system/jlib/jlzw.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3347,7 +3347,7 @@ CompressionMethod translateToCompMethod(const char *compStr, CompressionMethod d
33473347
compMethod = COMPRESS_METHOD_LZ4HC3;
33483348
else if (strieq("LZ4", compStr))
33493349
compMethod = COMPRESS_METHOD_LZ4;
3350-
else if (strieq("LZ4SHC", compStr))
3350+
else if (strieq("LZ4SHC", compStr) || strieq("LZ4HCS", compStr))
33513351
compMethod = COMPRESS_METHOD_LZ4SHC;
33523352
else if (strieq("LZ4S", compStr))
33533353
compMethod = COMPRESS_METHOD_LZ4S;

system/jlib/jlzw.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,4 +189,7 @@ extern jlib_decl CompressionMethod translateToCompMethod(const char *compStr, Co
189189
extern jlib_decl const char *translateFromCompMethod(unsigned compMethod);
190190

191191
#define MIN_ROWCOMPRESS_RECSIZE 8
192+
193+
constexpr unsigned defaultMaxCompressionFactor = 100;
194+
192195
#endif

0 commit comments

Comments
 (0)