@@ -42,6 +42,136 @@ public:
4242\brief Common user-tunable settings for storing ntuples
4343
4444All page sink classes need to support the common options.
45+
46+ <table>
47+ <tr>
48+ <th>Option name</th>
49+ <th>Type</th>
50+ <th>Default</th>
51+ <th>Description</th>
52+ </tr>
53+
54+ <tr>
55+ <td>`Compression`</td>
56+ <td>`std::uint32_t`</td>
57+ <td>RCompressionSetting::EDefaults::kUseGeneralPurpose</td>
58+ <td>
59+ The compression settings for this ntuple
60+ </td>
61+ </tr>
62+
63+ <tr>
64+ <td>`ApproxZippedClusterSize`</td>
65+ <td>`std::size_t`</td>
66+ <td>128 MiB</td>
67+ <td>
68+ Approximation of the target compressed cluster size
69+ </td>
70+ </tr>
71+
72+ <tr>
73+ <td>`MaxUnzippedClusterSize`</td>
74+ <td>`std::size_t`</td>
75+ <td>1280 MiB</td>
76+ <td>
77+ Memory limit for committing a cluster: with very high compression ratio, we need a limit
78+ on how large the I/O buffer can grow during writing.
79+ </td>
80+ </tr>
81+
82+ <tr>
83+ <td>`InitialUnzippedPageSize`</td>
84+ <td>`std::size_t`</td>
85+ <td>256</td>
86+ <td>
87+ Initially, columns start with a page of this size. The default value is chosen to accomodate at least 32 elements
88+ of 64 bits, or 64 elements of 32 bits. If more elements are needed, pages are increased up until the byte limit
89+ given by the option `MaxUnzippedPageSize` or until the total page buffer limit is reached (as a sum of all page buffers).
90+ The total write buffer limit needs to be large enough to hold the initial pages of all columns.
91+ </td>
92+ </tr>
93+
94+ <tr>
95+ <td>`MaxUnzippedPageSize`</td>
96+ <td>`std::size_t`</td>
97+ <td>1 MiB</td>
98+ <td>
99+ Pages can grow only to the given limit in bytes.
100+ </td>
101+ </tr>
102+
103+ <tr>
104+ <td>`PageBufferBudget`</td>
105+ <td>`std::size_t`</td>
106+ <td>0 / auto</td>
107+ <td>
108+ The maximum size that the sum of all page buffers used for writing into a persistent sink are allowed to use.
109+ If set to zero, RNTuple will auto-adjust the budget based on the value of `ApproxZippedClusterSize`.
110+ If set manually, the size needs to be large enough to hold all initial page buffers.
111+ The total amount of memory for writing is larger, e.g. for the additional compressed buffers etc.
112+ Use RNTupleModel::EstimateWriteMemoryUsage() for the total estimated memory use for writing.
113+ The default values are tuned for a total write memory of around 400 MiB per fill context.
114+ </td>
115+ </tr>
116+
117+ <tr>
118+ <td>`UseBufferedWrite`</td>
119+ <td>`bool`</td>
120+ <td>`true`</td>
121+ <td>
122+ Whether to use buffered writing (with RPageSinkBuf). This buffers compressed pages in memory, reorders them
123+ to keep pages of the same column adjacent, and coalesces the writes when committing a cluster.
124+ </td>
125+ </tr>
126+
127+ <tr>
128+ <td>`UseDirectIO`</td>
129+ <td>`bool`</td>
130+ <td>`false`</td>
131+ <td>
132+ Whether to use Direct I/O for writing. Note that this introduces alignment requirements that may very between
133+ filesystems and platforms.
134+ </td>
135+ </tr>
136+
137+ <tr>
138+ <td>`WriteBufferSize`</td>
139+ <td>`std::size_t`</td>
140+ <td>4 MiB</td>
141+ <td>
142+ Buffer size to use for writing to files, must be a multiple of 4096 bytes. Testing suggests that 4MiB gives best
143+ performance (with Direct I/O) at a reasonable memory consumption.
144+ </td>
145+ </tr>
146+
147+ <tr>
148+ <td>`UseImplicitMT`</td>
149+ <td>EImplicitMT</td>
150+ <td>EImplicitMT::kDefault</td>
151+ <td>
152+ Whether to use implicit multi-threading to compress pages. Only has an effect if buffered writing is turned on.
153+ </td>
154+ </tr>
155+
156+ <tr>
157+ <td>`EnablePageChecksums`</td>
158+ <td>`bool`</td>
159+ <td>`true`</td>
160+ <td>
161+ If set, checksums will be calculated and written for every page.
162+ </td>
163+ </tr>
164+
165+ <tr>
166+ <td>`EnableSamePageMerging`</td>
167+ <td>`bool`</td>
168+ <td>`true`</td>
169+ <td>
170+ If set, identical pages are deduplicated and aliased on disk. Requires page checksums.
171+ </td>
172+ </tr>
173+
174+ </table>
45175*/
46176// clang-format on
47177class RNTupleWriteOptions {
@@ -53,45 +183,22 @@ public:
53183
54184 // clang-format off
55185 static constexpr std::uint64_t kDefaultMaxKeySize = 0x4000'0000 ; // 1 GiB
186+ // clang-format on
56187
57188 friend Internal::RNTupleWriteOptionsManip;
58- // clang-format on
59189
60190protected:
61191 std::uint32_t fCompression {RCompressionSetting::EDefaults::kUseGeneralPurpose };
62- // / Approximation of the target compressed cluster size
63192 std::size_t fApproxZippedClusterSize = 128 * 1024 * 1024 ;
64- // / Memory limit for committing a cluster: with very high compression ratio, we need a limit
65- // / on how large the I/O buffer can grow during writing.
66193 std::size_t fMaxUnzippedClusterSize = 10 * fApproxZippedClusterSize ;
67- // / Initially, columns start with a page of this size. The default value is chosen to accomodate at least 32 elements
68- // / of 64 bits, or 64 elements of 32 bits. If more elements are needed, pages are increased up until the byte limit
69- // / given by fMaxUnzippedPageSize or until the total page buffer limit is reached (as a sum of all page buffers).
70- // / The total write buffer limit needs to be large enough to hold the initial pages of all columns.
71194 std::size_t fInitialUnzippedPageSize = 256 ;
72- // / Pages can grow only to the given limit in bytes.
73195 std::size_t fMaxUnzippedPageSize = 1024 * 1024 ;
74- // / The maximum size that the sum of all page buffers used for writing into a persistent sink are allowed to use.
75- // / If set to zero, RNTuple will auto-adjust the budget based on the value of fApproxZippedClusterSize.
76- // / If set manually, the size needs to be large enough to hold all initial page buffers.
77- // / The total amount of memory for writing is larger, e.g. for the additional compressed buffers etc.
78- // / Use RNTupleModel::EstimateWriteMemoryUsage() for the total estimated memory use for writing.
79- // / The default values are tuned for a total write memory of around 300 MB per fill context.
80196 std::size_t fPageBufferBudget = 0 ;
81- // / Whether to use buffered writing (with RPageSinkBuf). This buffers compressed pages in memory, reorders them
82- // / to keep pages of the same column adjacent, and coalesces the writes when committing a cluster.
83197 bool fUseBufferedWrite = true ;
84- // / Whether to use Direct I/O for writing. Note that this introduces alignment requirements that may very between
85- // / filesystems and platforms.
86198 bool fUseDirectIO = false ;
87- // / Buffer size to use for writing to files, must be a multiple of 4096 bytes. Testing suggests that 4MiB gives best
88- // / performance (with Direct I/O) at a reasonable memory consumption.
89199 std::size_t fWriteBufferSize = 4 * 1024 * 1024 ;
90- // / Whether to use implicit multi-threading to compress pages. Only has an effect if buffered writing is turned on.
91200 EImplicitMT fUseImplicitMT = EImplicitMT::kDefault ;
92- // / If set, checksums will be calculated and written for every page.
93201 bool fEnablePageChecksums = true ;
94- // / If set, identical pages are deduplicated and aliased on disk. Requires page checksums.
95202 bool fEnableSamePageMerging = true ;
96203 // / Specifies the max size of a payload storeable into a single TKey. When writing an RNTuple to a ROOT file,
97204 // / any payload whose size exceeds this will be split into multiple keys.
0 commit comments