@@ -1522,6 +1522,41 @@ unsafe fn ZDICT_trainFromBuffer_unsafe_legacy(
15221522 dictSize
15231523}
15241524
1525+ /// Train a dictionary from an array of samples.
1526+ ///
1527+ /// Samples must be stored concatenated in a single flat buffer `samplesBuffer`, supplied with an
1528+ /// array of sizes `samplesSizes`, providing the size of each sample, in order.
1529+ ///
1530+ /// The resulting dictionary will be saved into `dictBuffer`.
1531+ ///
1532+ /// `params` is optional and can be provided with values set to 0 to mean "default".
1533+ ///
1534+ /// In general, a reasonable dictionary has a size of ~100 KB. It's possible to select smaller or
1535+ /// larger size, just by specifying `dictBufferCapacity`. In general, it's recommended to provide a
1536+ /// few thousands samples, though this can vary a lot. It's recommended that total size of all
1537+ /// samples be about ~x100 times the target size of dictionary.
1538+ ///
1539+ /// # Returns
1540+ ///
1541+ /// - the size of the dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
1542+ /// - an error code, which can be tested with [`ZDICT_isError`]
1543+ ///
1544+ /// Dictionary training will fail if there are not enough samples to construct a dictionary, or if
1545+ /// most of the samples are too small (< 8 bytes being the lower limit). If dictionary training
1546+ /// fails, you should use zstd without a dictionary, as the dictionary would've been ineffective
1547+ /// anyways. If you believe your samples would benefit from a dictionary please open an issue with
1548+ /// details, and we can look into it.
1549+ ///
1550+ /// # Safety
1551+ ///
1552+ /// Behavior is undefined if any of the following conditions are violated:
1553+ ///
1554+ /// - `dictBufferCapacity` is 0 or `dictBuffer` and `dictBufferCapacity` satisfy the requirements
1555+ /// of [`core::slice::from_raw_parts_mut`].
1556+ /// - `nbSamples` is 0 or `samplesSizes` and `nbSamples` satisfy the requirements
1557+ /// of [`core::slice::from_raw_parts`].
1558+ /// - `sum(samplesSizes)` is 0 or `samplesBuffer` and `sum(samplesSizes)` satisfy the requirements
1559+ /// of [`core::slice::from_raw_parts`].
15251560#[ cfg_attr( feature = "export-symbols" , export_name = crate :: prefix!( ZDICT_trainFromBuffer_legacy ) ) ]
15261561pub unsafe extern "C" fn ZDICT_trainFromBuffer_legacy (
15271562 dictBuffer : * mut core:: ffi:: c_void ,
0 commit comments