Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
d6a67ec
Using validity bit map as arrow does
JaroslavTulach Dec 1, 2025
c88bf32
Let's hope validity has been fixed
JaroslavTulach Dec 1, 2025
ea48d1d
Need to set validity bit when appending valid value
JaroslavTulach Dec 1, 2025
08870e4
Missing ! in LongStorage.isNothing
JaroslavTulach Dec 1, 2025
d6f3888
Better validityMap handling in BoolBuilder and BoolStorage
JaroslavTulach Dec 1, 2025
84b51ea
isNothingMap still useful in FillMissingOperation
JaroslavTulach Dec 1, 2025
65f340c
LongStorageIterator also uses validityMap
JaroslavTulach Dec 1, 2025
6f1da71
Iterate over clear bits of the validity bitmap
JaroslavTulach Dec 1, 2025
ffc465a
test/Table_Tests/src/In_Memory/Bool_Spec.enso is passing OK
JaroslavTulach Dec 2, 2025
438864a
Success with --run test/Table_Tests should.support.is_blank
JaroslavTulach Dec 2, 2025
6011c98
One ! fixes runEngineDistribution --run test/Table_Tests cast.to.Deci…
JaroslavTulach Dec 2, 2025
e23ae23
Cardinality of a map represents number of valid entries
JaroslavTulach Dec 2, 2025
215b453
makeValidityMap in IsInOperation
JaroslavTulach Dec 2, 2025
bfa1c62
Keeping the original logic and just flipping the bits fixes all tests…
JaroslavTulach Dec 2, 2025
01ad757
Empty valid map with non-empty storage must mean there is null value
JaroslavTulach Dec 2, 2025
8a0e205
Keep sealed and permit InferredDoubleBuilder
JaroslavTulach Dec 2, 2025
c2676e8
Rewording comment
JaroslavTulach Dec 2, 2025
ab49b81
Reformatted by javafmtAll
JaroslavTulach Dec 2, 2025
171cded
Nothing is present when validity cardinality isn't equal to size
JaroslavTulach Dec 3, 2025
ef972d5
Enforcing immutability of validity map by using ImmutableBitSet
JaroslavTulach Dec 3, 2025
0621d6e
Sharing allTrue bitmap for all sizes
JaroslavTulach Dec 3, 2025
4f78cd0
More of ImmutableBitSet
JaroslavTulach Dec 3, 2025
0c07d60
Avoid allocating validityMap in the builder when all values are valid
JaroslavTulach Dec 3, 2025
6b036af
Unit testing IsNothingOperation
JaroslavTulach Dec 3, 2025
58fc873
Fixing CountNothing.allNothing for storages with validity map
JaroslavTulach Dec 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
/** A builder for boolean columns. */
final class BoolBuilder implements BuilderForBoolean, BuilderWithRetyping {
private final BitSet vals;
private final BitSet isNothing;
private final BitSet validityMap;
int size = 0;

// ** Creates a new builder for boolean columns. Should be built via Builder.getForBoolean. */
BoolBuilder(int capacity) {
vals = new BitSet(capacity);
isNothing = new BitSet(capacity);
validityMap = new BitSet(capacity);
}

@Override
Expand All @@ -31,6 +31,7 @@ public BoolBuilder append(Object o) {
if (b) {
vals.set(size);
}
validityMap.set(size);
} else {
throw new ValueTypeMismatchException(getType(), o);
}
Expand All @@ -54,13 +55,14 @@ public BoolBuilder appendBoolean(boolean value) {
if (value) {
vals.set(size);
}
validityMap.set(size, true);
size++;
return this;
}

@Override
public BoolBuilder appendNulls(int count) {
isNothing.set(size, size + count);
validityMap.set(size, size + count, false);
size += count;
return this;
}
Expand All @@ -71,7 +73,7 @@ public void appendBulkStorage(ColumnStorage<?> storage) {
// We know this is valid for a BoolStorage.
int toCopy = (int) boolStorage.getSize();
BitSets.copy(boolStorage.getValues(), vals, size, toCopy);
BitSets.copy(boolStorage.getIsNothingMap(), isNothing, size, toCopy);
boolStorage.getValidityMap().copyTo(validityMap, size, toCopy);
size += toCopy;
} else if (storage instanceof ColumnBooleanStorage columnBooleanStorage) {
for (long i = 0; i < columnBooleanStorage.getSize(); i++) {
Expand All @@ -90,7 +92,7 @@ public void appendBulkStorage(ColumnStorage<?> storage) {

@Override
public ColumnStorage<Boolean> seal() {
return new BoolStorage(vals, isNothing, size, false);
return new BoolStorage(vals, validityMap, size, false);
}

@Override
Expand All @@ -101,7 +103,7 @@ public long getCurrentSize() {
@Override
public void copyDataTo(Object[] items) {
for (int i = 0; i < size; i++) {
if (isNothing.get(i)) {
if (!validityMap.get(i)) {
items[i] = null;
} else {
items[i] = vals.get(i);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,12 @@ static ColumnStorage<?> fromRepeatedItem(Object item, long size) {
// Create a single storage item based on the type of the item.
return switch (item) {
case null -> new NullBuilder().appendNulls(checkSize(size)).seal();
case Boolean booleanValue ->
new BoolStorage(new BitSet(), new BitSet(), checkSize(size), booleanValue);
case Boolean booleanValue -> {
var s = checkSize(size);
var validity = new BitSet();
validity.set(0, s, true);
yield new BoolStorage(new BitSet(), validity, s, booleanValue);
}
default -> {
var storageType = StorageType.forBoxedItem(item, PreciseTypeOptions.DEFAULT);
var builder = Builder.getForType(storageType, size, BlackholeProblemAggregator.INSTANCE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.error.ValueTypeMismatchException;
import org.enso.table.problems.ProblemAggregator;
import org.enso.table.util.BitSets;

/** A builder for floating point columns. */
class DoubleBuilder extends NumericBuilder implements BuilderForDouble {
sealed class DoubleBuilder extends NumericBuilder implements BuilderForDouble
permits InferredDoubleBuilder {
protected final PrecisionLossAggregator precisionLossAggregator;
protected double[] data;

Expand Down Expand Up @@ -80,6 +80,7 @@ public DoubleBuilder append(Object o) {
}

ensureSpaceToAppend();
setValid(currentSize);
data[currentSize++] = value;
return this;
}
Expand All @@ -91,7 +92,7 @@ public void appendBulkStorage(ColumnStorage<?> storage) {
int n = (int) doubleStorage.getSize();
ensureFreeSpaceFor(n);
System.arraycopy(doubleStorage.getData(), 0, data, currentSize, n);
BitSets.copy(doubleStorage.getIsNothingMap(), isNothing, currentSize, n);
appendValidityMap(doubleStorage.getValidityMap(), n);
currentSize += n;
} else {
var doubleStorage = floatType.asTypedStorage(storage);
Expand Down Expand Up @@ -147,8 +148,10 @@ public void appendBulkStorage(ColumnStorage<?> storage) {
*
* @param value the double to append
*/
@Override
public DoubleBuilder appendDouble(double value) {
ensureSpaceToAppend();
setValid(currentSize);
data[currentSize++] = value;
return this;
}
Expand All @@ -158,14 +161,15 @@ public DoubleBuilder appendDouble(double value) {
*
* <p>It ensures that any loss of precision is reported.
*/
@Override
public DoubleBuilder appendLong(long value) {
appendDouble(convertLongToDouble(value));
return this;
}

@Override
public ColumnStorage<Double> seal() {
return new DoubleStorage(data, currentSize, isNothing);
return new DoubleStorage(data, currentSize, validityMap());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ static InferredDoubleBuilder retypeFromLongBuilder(
public void copyDataTo(Object[] items) {
int rawN = rawData == null ? 0 : rawData.length;
for (int i = 0; i < currentSize; i++) {
if (isNothing.get(i)) {
if (!isValid(i)) {
items[i] = null;
} else {
if (isLongCompactedAsDouble.get(i)) {
Expand Down Expand Up @@ -160,7 +160,7 @@ public Builder retypeTo(StorageType<?> type) {
if (type instanceof BigDecimalType) {
Builder res = Builder.getForBigDecimal(data.length);
for (int i = 0; i < currentSize; i++) {
if (isNothing.get(i)) {
if (!isValid(i)) {
res.appendNulls(1);
} else {
BigDecimal bigDecimal = BigDecimal.valueOf(data[i]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.error.ValueTypeMismatchException;
import org.enso.table.problems.ProblemAggregator;
import org.enso.table.util.BitSets;

/** A builder for integer columns. */
class LongBuilder extends NumericBuilder implements BuilderForLong, BuilderWithRetyping {
sealed class LongBuilder extends NumericBuilder implements BuilderForLong, BuilderWithRetyping
permits BoundCheckedIntegerBuilder {
protected final ProblemAggregator problemAggregator;
protected long[] data;

Expand Down Expand Up @@ -49,7 +49,7 @@ protected void resize(int desiredCapacity) {
@Override
public void copyDataTo(Object[] items) {
for (int i = 0; i < currentSize; i++) {
if (isNothing.get(i)) {
if (!isValid(i)) {
items[i] = null;
} else {
items[i] = data[i];
Expand Down Expand Up @@ -96,7 +96,7 @@ public void appendBulkStorage(ColumnStorage<?> storage) {
int n = (int) longStorage.getSize();
ensureFreeSpaceFor(n);
System.arraycopy(longStorage.getData(), 0, data, currentSize, n);
BitSets.copy(longStorage.getIsNothingMap(), isNothing, currentSize, n);
appendValidityMap(longStorage.getValidityMap(), n);
currentSize += n;
} else {
// No conversions needed, but we need to iterate over the items.
Expand Down Expand Up @@ -134,6 +134,7 @@ public void appendBulkStorage(ColumnStorage<?> storage) {
*/
public LongBuilder appendLong(long value) {
ensureSpaceToAppend();
this.setValid(currentSize);
this.data[currentSize++] = value;
return this;
}
Expand All @@ -143,7 +144,7 @@ public boolean isNothing(long index) {
if (index >= currentSize) {
throw new IndexOutOfBoundsException();
} else {
return isNothing.get((int) index);
return !isValid((int) index);
}
}

Expand Down Expand Up @@ -185,6 +186,6 @@ public LongBuilder append(Object o) {

@Override
public ColumnStorage<Long> seal() {
return new LongStorage(data, currentSize, isNothing, getType());
return new LongStorage(data, currentSize, validityMap(), getType());
}
}
Original file line number Diff line number Diff line change
@@ -1,28 +1,58 @@
package org.enso.table.data.column.builder;

import java.util.BitSet;
import org.enso.table.util.ImmutableBitSet;

/** A common base for numeric builders. */
abstract class NumericBuilder implements Builder {
protected BitSet isNothing;
protected int currentSize;
abstract sealed class NumericBuilder implements Builder permits DoubleBuilder, LongBuilder {
private BitSet validityMap;
int currentSize;

protected NumericBuilder() {
this.isNothing = new BitSet();
this.currentSize = 0;
protected NumericBuilder() {}

private BitSet getValidityMap() {
if (validityMap == null) {
validityMap = new BitSet();
validityMap.set(0, currentSize);
}
return validityMap;
}

protected void doAppendNulls(int count) {
isNothing.set(currentSize, currentSize + count);
protected final void doAppendNulls(int count) {
getValidityMap().set(currentSize, currentSize + count, false);
currentSize += count;
}

protected final boolean isValid(int i) {
return validityMap == null || validityMap.get(i);
}

protected final void setValid(int i) {
if (validityMap != null) {
validityMap.set(i);
}
}

protected final ImmutableBitSet validityMap() {
if (validityMap == null) {
return ImmutableBitSet.allTrue(currentSize);
} else {
return new ImmutableBitSet(validityMap, currentSize);
}
}

protected final void appendValidityMap(ImmutableBitSet validity, int n) {
if (validity.cardinality() < n || validityMap != null) {
validity.copyTo(getValidityMap(), currentSize, n);
}
}

@Override
public long getCurrentSize() {
return currentSize;
}

protected void ensureFreeSpaceFor(int additionalSize) {
protected final void ensureFreeSpaceFor(int additionalSize) {
if (currentSize + additionalSize > getDataSize()) {
resize(currentSize + additionalSize);
}
Expand All @@ -35,7 +65,7 @@ protected void ensureFreeSpaceFor(int additionalSize) {
* appends. It tries to keep the invariant that after calling `grow` the array has at least one
* free slot.
*/
protected void ensureSpaceToAppend() {
protected final void ensureSpaceToAppend() {
int dataLength = getDataSize();

// Check current size. If there is space, we don't need to grow.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import org.enso.table.data.column.storage.ColumnBooleanStorage;
import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.ColumnStorageWithInferredStorage;
import org.enso.table.data.column.storage.ColumnStorageWithNothingMap;
import org.enso.table.data.column.storage.ColumnStorageWithValidityMap;
import org.enso.table.data.column.storage.type.AnyObjectType;
import org.enso.table.data.column.storage.type.BigDecimalType;
import org.enso.table.data.column.storage.type.BigIntegerType;
Expand All @@ -28,6 +28,7 @@
import org.enso.table.data.column.storage.type.TimeOfDayType;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.problems.MapOperationProblemAggregator;
import org.enso.table.util.ImmutableBitSet;

/**
* The IsInOperation class provides a way to check if a value is in a set of values. It checks if
Expand Down Expand Up @@ -265,8 +266,8 @@ private static ColumnStorage<?> applyBooleanIsIn(

// If had both true and false, then return all true when not nothing
if (flags.hadTrue && flags.hadFalse) {
var isNothing = makeIsNothingMap(boolStorage, checkedSize);
return new BoolStorage(new BitSet(), isNothing, checkedSize, true);
var validityMap = makeValidityMap(boolStorage, checkedSize);
return new BoolStorage(new BitSet(), validityMap, checkedSize, true);
}

// Only have one of true or false
Expand Down Expand Up @@ -296,34 +297,41 @@ private static ColumnStorage<?> applyBooleanIsIn(
private static ColumnStorage<?> applyBoolStorage(
boolean keepValue, BoolStorage boolStorage, int checkedSize) {
BitSet values = boolStorage.getValues();
BitSet isNothing = boolStorage.getIsNothingMap();
BitSet isNothing = boolStorage.getValidityMap().cloneBitSet();
isNothing.flip(0, Math.toIntExact(boolStorage.getSize()));

if (keepValue) {
var newIsNothing =
boolStorage.isNegated() ? or(isNothing, values) : orNot(isNothing, values, checkedSize);
boolStorage.isNegated()
? or(isNothing, values, checkedSize)
: orNot(isNothing, values, checkedSize);
newIsNothing.flip(0, checkedSize);
return new BoolStorage(values, newIsNothing, checkedSize, boolStorage.isNegated());
} else {
var newIsNothing =
boolStorage.isNegated() ? orNot(isNothing, values, checkedSize) : or(isNothing, values);
boolStorage.isNegated()
? orNot(isNothing, values, checkedSize)
: or(isNothing, values, checkedSize);
newIsNothing.flip(0, checkedSize);
return new BoolStorage(values, newIsNothing, checkedSize, !boolStorage.isNegated());
}
Comment on lines 303 to 317
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DRY (although it has been like that before)

Copy link
Member Author

@JaroslavTulach JaroslavTulach Dec 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • I had touched the code in previous commits
  • and then I greatly regretted that
  • in bfa1c62 I had to return the original code back
  • it is probably more important to keep the semantics correct
  • than to fulfill any artificial code metrics
  • but the current version is also slower (because of calling flip two times)
  • thus I may give it one more try...

}

private static BitSet makeIsNothingMap(ColumnStorage<?> storage, int size) {
if (storage instanceof ColumnStorageWithNothingMap withNothingMap) {
return withNothingMap.getIsNothingMap();
private static ImmutableBitSet makeValidityMap(ColumnStorage<?> storage, int size) {
if (storage instanceof ColumnStorageWithValidityMap withNothingMap) {
return withNothingMap.getValidityMap();
}

BitSet isNothingMap = new BitSet(size);
BitSet validityMap = new BitSet(size);
for (int i = 0; i < size; i++) {
if (storage.isNothing(i)) {
isNothingMap.set(i);
if (!storage.isNothing(i)) {
validityMap.set(i);
}
}
return isNothingMap;
return new ImmutableBitSet(validityMap, size);
}

private static BitSet or(BitSet left, BitSet right) {
private static BitSet or(BitSet left, BitSet right, int sizeIsIgnored) {
BitSet result = (BitSet) left.clone();
result.or(right);
return result;
Expand Down
Loading
Loading