@@ -156,6 +156,80 @@ private static boolean canContainDeletesForFile(
156156
157157 case EQUALITY_DELETES :
158158 return canContainEqDeletesForFile (dataFile , deleteFile , schema );
159+
160+ case PARTIAL_UPDATE :
161+ return canContainPartialDeletesForFile (dataFile , deleteFile , schema );
162+ }
163+
164+ return true ;
165+ }
166+
167+ // todo: add actual implementation
168+ private static boolean canContainPartialDeletesForFile (
169+ DataFile dataFile , DeleteFile deleteFile , Schema schema ) {
170+ // whether to check data ranges or to assume that the ranges match
171+ // if upper/lower bounds are missing, null counts may still be used to determine delete files
172+ // can be skipped
173+ boolean checkRanges =
174+ dataFile .lowerBounds () != null
175+ && dataFile .upperBounds () != null
176+ && deleteFile .lowerBounds () != null
177+ && deleteFile .upperBounds () != null ;
178+
179+ Map <Integer , ByteBuffer > dataLowers = dataFile .lowerBounds ();
180+ Map <Integer , ByteBuffer > dataUppers = dataFile .upperBounds ();
181+ Map <Integer , ByteBuffer > deleteLowers = deleteFile .lowerBounds ();
182+ Map <Integer , ByteBuffer > deleteUppers = deleteFile .upperBounds ();
183+
184+ Map <Integer , Long > dataNullCounts = dataFile .nullValueCounts ();
185+ Map <Integer , Long > dataValueCounts = dataFile .valueCounts ();
186+ Map <Integer , Long > deleteNullCounts = deleteFile .nullValueCounts ();
187+ Map <Integer , Long > deleteValueCounts = deleteFile .valueCounts ();
188+
189+ for (int id : deleteFile .equalityFieldIds ()) {
190+ Types .NestedField field = schema .findField (id );
191+ if (!field .type ().isPrimitiveType ()) {
192+ // stats are not kept for nested types. assume that the delete file may match
193+ continue ;
194+ }
195+
196+ if (containsNull (dataNullCounts , field ) && containsNull (deleteNullCounts , field )) {
197+ // the data has null values and null has been deleted, so the deletes must be applied
198+ continue ;
199+ }
200+
201+ if (allNull (dataNullCounts , dataValueCounts , field ) && allNonNull (deleteNullCounts , field )) {
202+ // the data file contains only null values for this field, but there are no deletes for null
203+ // values
204+ return false ;
205+ }
206+
207+ if (allNull (deleteNullCounts , deleteValueCounts , field )
208+ && allNonNull (dataNullCounts , field )) {
209+ // the delete file removes only null rows with null for this field, but there are no data
210+ // rows with null
211+ return false ;
212+ }
213+
214+ if (!checkRanges ) {
215+ // some upper and lower bounds are missing, assume they match
216+ continue ;
217+ }
218+
219+ ByteBuffer dataLower = dataLowers .get (id );
220+ ByteBuffer dataUpper = dataUppers .get (id );
221+ ByteBuffer deleteLower = deleteLowers .get (id );
222+ ByteBuffer deleteUpper = deleteUppers .get (id );
223+ if (dataLower == null || dataUpper == null || deleteLower == null || deleteUpper == null ) {
224+ // at least one bound is not known, assume the delete file may match
225+ continue ;
226+ }
227+
228+ if (!rangesOverlap (
229+ field .type ().asPrimitiveType (), dataLower , dataUpper , deleteLower , deleteUpper )) {
230+ // no values overlap between the data file and the deletes
231+ return false ;
232+ }
159233 }
160234
161235 return true ;
@@ -474,6 +548,22 @@ DeleteFileIndex build() {
474548 globalApplySeqs = eqFilesSortedBySeq .stream ().mapToLong (Pair ::first ).toArray ();
475549 globalDeletes = eqFilesSortedBySeq .stream ().map (Pair ::second ).toArray (DeleteFile []::new );
476550
551+ // fixme: this will overlap equal deletes
552+ List <Pair <Long , DeleteFile >> partialDeleteSortedBySeq =
553+ deleteFilesByPartition .get (partition ).stream ()
554+ .filter (entry -> entry .file ().content () == FileContent .PARTIAL_UPDATE )
555+ .map (
556+ entry ->
557+ // a delete file is indexed by the sequence number it should be applied to
558+ Pair .of (entry .dataSequenceNumber (), entry .file ()))
559+ .sorted (Comparator .comparingLong (Pair ::first ))
560+ .collect (Collectors .toList ());
561+ if (partialDeleteSortedBySeq .size () > 0 ) {
562+ globalApplySeqs = partialDeleteSortedBySeq .stream ().mapToLong (Pair ::first ).toArray ();
563+ globalDeletes =
564+ partialDeleteSortedBySeq .stream ().map (Pair ::second ).toArray (DeleteFile []::new );
565+ }
566+
477567 List <Pair <Long , DeleteFile >> posFilesSortedBySeq =
478568 deleteFilesByPartition .get (partition ).stream ()
479569 .filter (entry -> entry .file ().content () == FileContent .POSITION_DELETES )
0 commit comments