-
Notifications
You must be signed in to change notification settings - Fork 32
Open
Description
Polars Python code
import polars as pl
df = pl.DataFrame({
'DateTime': [
'2018-02-01 00:00:00', '2018-02-02 00:00:00',
'2018-02-03 00:00:00', '2018-02-04 00:00:00',
'2018-02-05 00:00:00'
],
'String': ['a', 'b', 'c', 'd', 'e'],
'Float': [1.2, 3.4, 5.6, 7.8, 9.0],
}).select(
pl.col('DateTime').str.strptime(pl.Datetime, '%Y-%m-%d %H:%M:%S'),
pl.exclude('DateTime')
)
print(df)
df.write_parquet("test.parquet", compression='zstd')
Note that compression='zstd'
is the default.
Generated Parquet file: test.parquet.zip.
Parquet.jl code
import Pkg
Pkg.activate(temp=true)
Pkg.add([
Pkg.PackageSpec(name="Parquet", version="0.8.4"),
Pkg.PackageSpec(name="DataFrames", version="1.5.0")
])
Pkg.status()
import Parquet
Parquet.read_parquet("test.parquet") |> DataFrame
Parquet.jl error
ERROR: zstd error
Stacktrace:
[1] changemode!(stream::TranscodingStreams.TranscodingStream{CodecZstd.ZstdDecompressor, IOBuffer}, newmode::Symbol)
@ TranscodingStreams ~/.julia/packages/TranscodingStreams/MQucZ/src/stream.jl:742
[2] callprocess(stream::TranscodingStreams.TranscodingStream{CodecZstd.ZstdDecompressor, IOBuffer}, inbuf::TranscodingStreams.Buffer, outbuf::TranscodingStreams.Buffer)
@ TranscodingStreams ~/.julia/packages/TranscodingStreams/MQucZ/src/stream.jl:668
[3] fillbuffer(stream::TranscodingStreams.TranscodingStream{CodecZstd.ZstdDecompressor, IOBuffer}; eager::Bool)
@ TranscodingStreams ~/.julia/packages/TranscodingStreams/MQucZ/src/stream.jl:596
[4] fillbuffer
@ ~/.julia/packages/TranscodingStreams/MQucZ/src/stream.jl:582 [inlined]
[5] eof
@ ~/.julia/packages/TranscodingStreams/MQucZ/src/stream.jl:201 [inlined]
[6] readbytes!(stream::TranscodingStreams.TranscodingStream{CodecZstd.ZstdDecompressor, IOBuffer}, b::Vector{UInt8}, nb::Int64)
@ TranscodingStreams ~/.julia/packages/TranscodingStreams/MQucZ/src/stream.jl:387
[7] readbytes!
@ ~/.julia/packages/TranscodingStreams/MQucZ/src/stream.jl:384 [inlined]
[8] (::Parquet.var"#46#47"{Parquet.ColumnChunkPages, Int64})()
@ Parquet ~/.julia/packages/Parquet/6tj1X/src/reader.jl:187
[9] (::Parquet.var"#40#42"{Parquet.var"#46#47"{Parquet.ColumnChunkPages, Int64}, Parquet.PageLRU, Tuple{Parquet.PAR2.ColumnChunk, Int64}})()
@ Parquet ~/.julia/packages/Parquet/6tj1X/src/reader.jl:34
[10] lock(f::Parquet.var"#40#42"{Parquet.var"#46#47"{Parquet.ColumnChunkPages, Int64}, Parquet.PageLRU, Tuple{Parquet.PAR2.ColumnChunk, Int64}}, l::ReentrantLock)
@ Base ./lock.jl:229
[11] cacheget
@ ~/.julia/packages/Parquet/6tj1X/src/reader.jl:30 [inlined]
[12] iterate(ccp::Parquet.ColumnChunkPages, startpos::Int64)
@ Parquet ~/.julia/packages/Parquet/6tj1X/src/reader.jl:167
[13] iterate(ccpv::Parquet.ColumnChunkPageValues{Int64}, startpos::Int64)
@ Parquet ~/.julia/packages/Parquet/6tj1X/src/reader.jl:262
[14] iterate
@ ~/.julia/packages/Parquet/6tj1X/src/reader.jl:240 [inlined]
[15] setrow(cursor::Parquet.ColCursor{Int64}, row::Int64)
@ Parquet ~/.julia/packages/Parquet/6tj1X/src/cursor.jl:114
[16] Parquet.ColCursor(par::Parquet.File, colname::Vector{String}; rows::UnitRange{Int64}, row::Int64)
@ Parquet ~/.julia/packages/Parquet/6tj1X/src/cursor.jl:62
[17] ColCursor
@ ~/.julia/packages/Parquet/6tj1X/src/cursor.jl:56 [inlined]
[18] #59
@ ./none:0 [inlined]
[19] iterate
@ ./generator.jl:47 [inlined]
[20] collect(itr::Base.Generator{Vector{Vector{String}}, Parquet.var"#59#61"{UnitRange{Int64}, Parquet.File}})
@ Base ./array.jl:782
[21] Parquet.BatchedColumnsCursor(par::Parquet.File; rows::UnitRange{Int64}, batchsize::Int64, reusebuffer::Bool, use_threads::Bool)
@ Parquet ~/.julia/packages/Parquet/6tj1X/src/cursor.jl:254
[22] cursor(table::Parquet.Table)
@ Parquet ~/.julia/packages/Parquet/6tj1X/src/simple_reader.jl:130
[23] load(table::Parquet.Table)
@ Parquet ~/.julia/packages/Parquet/6tj1X/src/simple_reader.jl:134
[24] getcolumn
@ ~/.julia/packages/Parquet/6tj1X/src/simple_reader.jl:186 [inlined]
[25] getcolumn
@ ~/.julia/packages/Parquet/6tj1X/src/simple_reader.jl:184 [inlined]
[26] fromcolumns(x::Parquet.Table, names::Vector{Symbol}; copycols::Bool)
@ DataFrames ~/.julia/packages/DataFrames/LteEl/src/other/tables.jl:36
[27] fromcolumns
@ ~/.julia/packages/DataFrames/LteEl/src/other/tables.jl:36 [inlined]
[28] #fromcolumns#879
@ ~/.julia/packages/DataFrames/LteEl/src/other/tables.jl:45 [inlined]
[29] DataFrame(x::Parquet.Table; copycols::Nothing)
@ DataFrames ~/.julia/packages/DataFrames/LteEl/src/other/tables.jl:59
[30] DataFrame
@ ~/.julia/packages/DataFrames/LteEl/src/other/tables.jl:48 [inlined]
[31] |>(x::Parquet.Table, f::Type{DataFrame})
@ Base ./operators.jl:907
[32] top-level scope
@ REPL[11]:1
Versions
- Parquet v0.8.4
- DataFrames v1.5.0
julia> versioninfo()
Julia Version 1.9.0-rc2
Commit 72aec423c2a (2023-04-01 10:41 UTC)
Platform Info:
OS: macOS (x86_64-apple-darwin21.4.0)
CPU: 4 × Intel(R) Core(TM) i5-3330S CPU @ 2.70GHz
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-14.0.6 (ORCJIT, ivybridge)
Threads: 1 on 4 virtual cores
Metadata
Metadata
Assignees
Labels
No labels