Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,9 @@ public void close() throws IOException, InterruptedException {
}
finalMetadata.putAll(finalWriteContext.getExtraMetaData());
parquetFileWriter.end(finalMetadata);
AutoCloseables.uncheckedClose(parquetFileWriter);
} finally {
AutoCloseables.uncheckedClose(columnStore, pageStore, bloomFilterWriteStore, parquetFileWriter);
AutoCloseables.uncheckedClose(columnStore, pageStore, bloomFilterWriteStore);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that we have ParquetFileWriter to handle the "aborted" state, this change can be reverted.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I haven't finish my change.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, then. I was too fast. 😄
Ping me when you're ready.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the quick review!

closed = true;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public class ParquetFileWriter implements AutoCloseable {
public static final byte[] EFMAGIC = EF_MAGIC_STR.getBytes(StandardCharsets.US_ASCII);
public static final String PARQUET_COMMON_METADATA_FILE = "_common_metadata";
public static final int CURRENT_VERSION = 1;

// File creation modes
public static enum Mode {
CREATE,
Expand Down Expand Up @@ -173,6 +173,7 @@ public static enum Mode {

// set when end is called
private ParquetMetadata footer = null;
private boolean aborted;
private boolean closed;

private final CRC32 crc;
Expand Down Expand Up @@ -1812,6 +1813,8 @@ public void end(Map<String, String> extraMetaData) throws IOException {
LOG.debug("{}: end", out.getPos());
this.footer = new ParquetMetadata(new FileMetaData(schema, extraMetaData, Version.FULL_VERSION), blocks);
serializeFooter(footer, out, fileEncryptor, metadataConverter);
} catch (Exception e) {
aborted = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do not want to swallow the exception, just set the flag and re-throw.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably do the same pattern for every public method that may throw an exception.

Copy link
Contributor Author

@Jiayi-Wang-db Jiayi-Wang-db Oct 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m not familiar with the direct buffer change, but in InternalParquetRecordWriter, there’s only one place where aborted is marked. Is that the only place that could cause an aborted write? If so, we don’t need to apply the same pattern to every public method in ParquetFileWriter.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does look that way. The write function in InternalParquetRecordWriter is the only public function that can throw an exception (except close). So after we mark it as aborted there and abort the file write in the close call, we should cover all cases.

} finally {
close();
}
Expand All @@ -1823,7 +1826,7 @@ public void close() throws IOException {
return;
}
try (PositionOutputStream temp = out) {
temp.flush();
if (!aborted) temp.flush();
if (crcAllocator != null) {
crcAllocator.close();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -780,4 +780,44 @@ public void testParquetWriterBuilderCanNotConfigurePathAndFile() throws IOExcept
"Cannot set both path and file", IllegalStateException.class, (Callable<ParquetWriter<Group>>) () ->
ExampleParquetWriter.builder(path).withFile(outputFile).build());
}

@Test
public void testNoFlushAfterException() throws Exception {
final File testDir = temp.newFile();
testDir.delete();

final Path file = new Path(testDir.getAbsolutePath(), "test.parquet");

MessageType schema = Types.buildMessage()
.required(BINARY)
.named("binary_field")
.required(INT32)
.named("int32_field")
.named("test_schema_abort");
Configuration conf = new Configuration();

try (ParquetWriter<Group> writer = ExampleParquetWriter.builder(new Path(file.toString()))
.withAllocator(allocator)
.withType(schema)
.build()) {

SimpleGroupFactory f = new SimpleGroupFactory(schema);
writer.write(f.newGroup()
.append("binary_field", "hello")
.append("int32_field", 123));

Field internalWriterField = ParquetWriter.class.getDeclaredField("writer");
internalWriterField.setAccessible(true);
Object internalWriter = internalWriterField.get(writer);

Field abortedField = internalWriter.getClass().getDeclaredField("aborted");
abortedField.setAccessible(true);
abortedField.setBoolean(internalWriter, true);
writer.close();
}

// After closing, check whether file exists or is empty
FileSystem fs = file.getFileSystem(conf);
assertTrue(!fs.exists(file) || fs.getFileStatus(file).getLen() == 0);
}
}
Loading