Function zstd::dict::from_sample_iterator
source · pub fn from_sample_iterator<I, R>(
samples: I,
max_size: usize,
) -> Result<Vec<u8>>
Expand description
Train a dictionary from multiple samples.
Unlike from_samples
, this does not require having a list of all samples.
It also allows running into an error when iterating through the samples.
They will still be copied to a continuous array and fed to from_continuous
.
samples
is an iterator of individual samples to train on.max_size
is the maximum size of the dictionary to generate.
The result is the dictionary data. You can, for example, feed it to CDict::create
.
§Examples
// Train from a couple of json files.
let dict_buffer = zstd::dict::from_sample_iterator(
["file_a.json", "file_b.json"]
.into_iter()
.map(|filename| std::fs::File::open(filename)),
10_000, // 10kB dictionary
).unwrap();
use std::io::BufRead as _;
// Treat each line from stdin as a separate sample.
let dict_buffer = zstd::dict::from_sample_iterator(
std::io::stdin().lock().lines().map(|line: std::io::Result<String>| {
// Transform each line into a `Cursor<Vec<u8>>` so they implement Read.
line.map(String::into_bytes)
.map(std::io::Cursor::new)
}),
10_000, // 10kB dictionary
).unwrap();