|
36 | 36 | import static org.apache.parquet.filter2.predicate.FilterApi.notEq; |
37 | 37 | import static org.apache.parquet.filter2.predicate.FilterApi.notIn; |
38 | 38 | import static org.apache.parquet.filter2.predicate.FilterApi.or; |
| 39 | +import static org.apache.parquet.filter2.predicate.FilterApi.size; |
39 | 40 | import static org.apache.parquet.filter2.predicate.FilterApi.userDefined; |
40 | 41 | import static org.apache.parquet.filter2.predicate.LogicalInverter.invert; |
41 | 42 | import static org.apache.parquet.schema.OriginalType.DECIMAL; |
|
56 | 57 | import static org.junit.Assert.assertTrue; |
57 | 58 | import static org.junit.Assert.fail; |
58 | 59 |
|
| 60 | +import com.google.common.collect.ImmutableList; |
59 | 61 | import java.math.BigDecimal; |
60 | 62 | import java.nio.ByteBuffer; |
61 | 63 | import java.util.ArrayList; |
|
64 | 66 | import java.util.List; |
65 | 67 | import java.util.Set; |
66 | 68 | import org.apache.parquet.bytes.BytesUtils; |
| 69 | +import org.apache.parquet.column.statistics.SizeStatistics; |
67 | 70 | import org.apache.parquet.column.statistics.Statistics; |
68 | 71 | import org.apache.parquet.filter2.predicate.ContainsRewriter; |
69 | 72 | import org.apache.parquet.filter2.predicate.FilterPredicate; |
| 73 | +import org.apache.parquet.filter2.predicate.Operators; |
70 | 74 | import org.apache.parquet.filter2.predicate.Operators.BinaryColumn; |
71 | 75 | import org.apache.parquet.filter2.predicate.Operators.BooleanColumn; |
72 | 76 | import org.apache.parquet.filter2.predicate.Operators.DoubleColumn; |
@@ -1627,6 +1631,99 @@ public void testNoOpBuilder() { |
1627 | 1631 | assertNull(builder.build()); |
1628 | 1632 | } |
1629 | 1633 |
|
| 1634 | + @Test |
| 1635 | + public void testSizeRequiredElements() throws Exception { |
| 1636 | + final PrimitiveType type = Types.required(DOUBLE).named("element"); |
| 1637 | + final DoubleColumn col = doubleColumn(type.getName()); |
| 1638 | + |
| 1639 | + final List<List<Double>> pageValueList = new ArrayList<>(); |
| 1640 | + pageValueList.add(ImmutableList.of(1.0, 2.0, 3.0)); |
| 1641 | + pageValueList.add(ImmutableList.of(1.0, 2.0, 3.0, 4.0, 5.0)); |
| 1642 | + pageValueList.add(ImmutableList.of(-1.0)); |
| 1643 | + pageValueList.add(ImmutableList.of()); |
| 1644 | + pageValueList.add(null); |
| 1645 | + |
| 1646 | + final ColumnIndex columnIndex = createArrayColumnIndex(type, pageValueList); |
| 1647 | + |
| 1648 | + assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder()); |
| 1649 | + assertCorrectNullCounts(columnIndex, 0, 0, 0, 0, 0); |
| 1650 | + assertCorrectNullPages(columnIndex, false, false, false, true, true); |
| 1651 | + assertCorrectValues(columnIndex.getMaxValues(), 3.0, 5.0, -1.0, null, null); |
| 1652 | + assertCorrectValues(columnIndex.getMinValues(), 1.0, 1.0, -1.0, null, null); |
| 1653 | + |
| 1654 | + // we know max array size is 5; all elements of page 2 have size 1; and page 3 and 4 are null or empty |
| 1655 | + assertCorrectFiltering(columnIndex, size(col, Operators.Size.Operator.EQ, 0), 0, 1, 3, 4); |
| 1656 | + assertCorrectFiltering(columnIndex, size(col, Operators.Size.Operator.EQ, 4), 1); |
| 1657 | + assertCorrectFiltering(columnIndex, size(col, Operators.Size.Operator.EQ, 3), 0, 1); |
| 1658 | + assertCorrectFiltering(columnIndex, size(col, Operators.Size.Operator.LT, 2), 0, 1, 2, 3, 4); |
| 1659 | + assertCorrectFiltering(columnIndex, size(col, Operators.Size.Operator.LTE, 1), 0, 1, 2, 3, 4); |
| 1660 | + assertCorrectFiltering(columnIndex, size(col, Operators.Size.Operator.GT, 0), 0, 1, 2); |
| 1661 | + assertCorrectFiltering(columnIndex, size(col, Operators.Size.Operator.GTE, 0), 0, 1, 2, 3, 4); |
| 1662 | + } |
| 1663 | + |
| 1664 | + @Test |
| 1665 | + public void testSizeOptionalElements() throws Exception { |
| 1666 | + final PrimitiveType type = Types.optional(DOUBLE).named("element"); |
| 1667 | + final DoubleColumn col = doubleColumn(type.getName()); |
| 1668 | + |
| 1669 | + final List<Double> listWithNulls = new ArrayList<>(); |
| 1670 | + listWithNulls.add(null); |
| 1671 | + listWithNulls.add(3.0); |
| 1672 | + listWithNulls.add(null); |
| 1673 | + |
| 1674 | + final List<List<Double>> pageValueList = new ArrayList<>(); |
| 1675 | + pageValueList.add(listWithNulls); |
| 1676 | + |
| 1677 | + final ColumnIndex columnIndex = createArrayColumnIndex(type, pageValueList); |
| 1678 | + |
| 1679 | + assertCorrectNullCounts(columnIndex, 2); |
| 1680 | + assertCorrectNullPages(columnIndex, false); |
| 1681 | + assertCorrectValues(columnIndex.getMaxValues(), 3.0); |
| 1682 | + assertCorrectValues(columnIndex.getMinValues(), 3.0); |
| 1683 | + |
| 1684 | + // We know that the array values for the page have min size 0 and max size 3 |
| 1685 | + assertCorrectFiltering(columnIndex, size(col, Operators.Size.Operator.EQ, 0), 0); |
| 1686 | + assertCorrectFiltering(columnIndex, size(col, Operators.Size.Operator.EQ, 5)); |
| 1687 | + assertCorrectFiltering(columnIndex, size(col, Operators.Size.Operator.LT, 4), 0); |
| 1688 | + assertCorrectFiltering(columnIndex, size(col, Operators.Size.Operator.LTE, 3), 0); |
| 1689 | + assertCorrectFiltering(columnIndex, size(col, Operators.Size.Operator.GT, 0), 0); |
| 1690 | + assertCorrectFiltering(columnIndex, size(col, Operators.Size.Operator.GT, 3)); |
| 1691 | + assertCorrectFiltering(columnIndex, size(col, Operators.Size.Operator.GTE, 3), 0); |
| 1692 | + assertCorrectFiltering(columnIndex, size(col, Operators.Size.Operator.GTE, 4)); |
| 1693 | + } |
| 1694 | + |
| 1695 | + private static ColumnIndex createArrayColumnIndex(PrimitiveType type, List<List<Double>> pageValueList) { |
| 1696 | + final ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE); |
| 1697 | + |
| 1698 | + for (List<Double> pageValues : pageValueList) { |
| 1699 | + final StatsBuilder sb = new StatsBuilder(); |
| 1700 | + boolean isNullOrEmpty = pageValues == null || pageValues.isEmpty(); |
| 1701 | + |
| 1702 | + final SizeStatistics.Builder sizeStatistics = |
| 1703 | + SizeStatistics.newBuilder(type, isNullOrEmpty ? 0 : 1, isNullOrEmpty ? 0 : 1); |
| 1704 | + |
| 1705 | + if (isNullOrEmpty) sizeStatistics.add(0, 0); |
| 1706 | + |
| 1707 | + if (pageValues != null) { |
| 1708 | + for (int i = 0; i < pageValues.size(); i++) { |
| 1709 | + if (i == 0) { |
| 1710 | + sizeStatistics.add(0, 1); |
| 1711 | + } else { |
| 1712 | + sizeStatistics.add(1, 1); |
| 1713 | + } |
| 1714 | + } |
| 1715 | + } |
| 1716 | + |
| 1717 | + if (pageValues == null) { |
| 1718 | + builder.add(sb.stats(type), sizeStatistics.build()); |
| 1719 | + } else { |
| 1720 | + builder.add(sb.stats(type, pageValues.toArray(new Double[0])), sizeStatistics.build()); |
| 1721 | + } |
| 1722 | + } |
| 1723 | + |
| 1724 | + return builder.build(); |
| 1725 | + } |
| 1726 | + |
1630 | 1727 | private static List<ByteBuffer> toBBList(Binary... values) { |
1631 | 1728 | List<ByteBuffer> buffers = new ArrayList<>(values.length); |
1632 | 1729 | for (Binary value : values) { |
|
0 commit comments