Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 25 additions & 9 deletions provider/icu4x-datagen/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ struct Cli {
#[arg(short = 't', long, value_name = "TAG", default_value = "latest")]
#[arg(
help = "Download CLDR JSON data from this GitHub tag (https://github.com/unicode-org/cldr-json/tags)\n\
Use 'latest' for the latest version verified to work with this version of the binary.\n\
Use 'latest' for the latest version verified to work with this version of the binary, \
and 'latest-tag' for the literal tag 'latest' on GitHub.\n\
Ignored if '--cldr-root' is present. Requires binary to be built with `networking` Cargo feature (enabled by default).\n\
Note that some markers do not support versions before 41.0.0."
)]
Expand All @@ -156,7 +157,8 @@ struct Cli {
#[arg(long, value_name = "TAG", default_value = "latest")]
#[arg(
help = "Download ICU data from this GitHub tag (https://github.com/unicode-org/icu/tags)\n\
Use 'latest' for the latest version verified to work with this version of the binary.\n\
Use 'latest' for the latest version verified to work with this version of the binary, \
and 'latest-tag' for the literal tag 'latest' on GitHub.\n\
Ignored if '--icuexport-root' is present. Requires binary to be built with `networking` Cargo feature (enabled by default).\n\
Note that some markers do not support versions before release-71-1."
)]
Expand All @@ -173,7 +175,9 @@ struct Cli {
icuexport_root: Option<PathBuf>,

#[arg(long, value_name = "TAG", default_value = "17.0.0")]
#[arg(help = "Download versioned UCD from unicode.org.")]
#[arg(help = "Download versioned UCD from unicode.org. \
Use 'latest' for the latest version verified to work with this version of the binary, \
and 'latest-tag' for the literal tag 'latest' on unicode.org.")]
#[cfg_attr(not(feature = "networking"), arg(hide = true))]
#[cfg(feature = "provider")]
ucd_tag: String,
Expand All @@ -186,7 +190,8 @@ struct Cli {
#[arg(long, value_name = "TAG", default_value = "latest")]
#[arg(
help = "Download segmentation LSTM models from this GitHub tag (https://github.com/unicode-org/lstm_word_segmentation/tags)\n\
Use 'latest' for the latest version verified to work with this version of the binary.\n\
Use 'latest' for the latest version verified to work with this version of the binary, \
and 'latest-tag' for the literal tag 'latest' on GitHub.\n\
Ignored if '--segmenter-lstm-root' is present. Requires binary to be built with `networking` Cargo feature (enabled by default)."
)]
#[cfg_attr(not(feature = "networking"), arg(hide = true))]
Expand All @@ -203,7 +208,8 @@ struct Cli {
#[arg(long, value_name = "TAG", default_value = "latest")]
#[arg(
help = "Download tzdb from this IANA tag (https://data.iana.org/time-zones/releases/)\n\
Use 'latest' for the latest version verified to work with this version of the binary.\n\
Use 'latest' for the latest version verified to work with this version of the binary, \
and 'latest-tag' for the literal tag 'latest' on IANA.\n\
Ignored if '--tzdb-root' is present. Requires binary to be built with `networking` Cargo feature (enabled by default)."
)]
#[cfg_attr(not(feature = "networking"), arg(hide = true))]
Expand Down Expand Up @@ -428,20 +434,20 @@ fn run(cli: Cli) -> eyre::Result<()> {
fn missing_data_message<T>(e: DataError) -> Result<T, eyre::Report> {
#[cfg(feature = "provider")]
if SourceDataProvider::is_missing_cldr_error(e) {
eyre::bail!("CLDR data is required for this invocation, set --cldr-path or --cldr-tag");
eyre::bail!("CLDR data is required for this invocation, set --cldr-root or --cldr-tag");
} else if SourceDataProvider::is_missing_icuexport_error(e) {
eyre::bail!(
"ICU data is required for this invocation, set --icuexport-path or --icuexport-tag"
"ICU data is required for this invocation, set --icuexport-root or --icuexport-tag"
);
} else if SourceDataProvider::is_missing_segmenter_lstm_error(e) {
eyre::bail!("Segmentation LSTM data is required for this invocation, set --segementer-lstm-path or --segementer-lstm-tag");
eyre::bail!("Segmentation LSTM data is required for this invocation, set --segmenter-lstm-root or --segmenter-lstm-tag");
} else if SourceDataProvider::is_missing_unihan_error(e) {
eyre::bail!(
"Unihan data is required for this invocation, set --unihan-root or --ucd-tag"
);
} else if SourceDataProvider::is_missing_tzdb_error(e) {
eyre::bail!(
"Timezone data is required for this invocation, set --tzdb-path or --tzdb-tag"
"Timezone data is required for this invocation, set --tzdb-root or --tzdb-tag"
);
}

Expand Down Expand Up @@ -489,6 +495,8 @@ fn run(cli: Cli) -> eyre::Result<()> {
#[cfg(feature = "networking")]
(_, "latest") => p.with_cldr_for_tag(SourceDataProvider::TESTED_CLDR_TAG),
#[cfg(feature = "networking")]
(_, "latest-tag") => p.with_cldr_for_tag("latest"),
#[cfg(feature = "networking")]
(_, tag) => p.with_cldr_for_tag(tag),
#[cfg(not(feature = "networking"))]
(None, _) => p,
Expand All @@ -501,6 +509,8 @@ fn run(cli: Cli) -> eyre::Result<()> {
p.with_icuexport_for_tag(SourceDataProvider::TESTED_ICUEXPORT_TAG)
}
#[cfg(feature = "networking")]
(_, "latest-tag") => p.with_icuexport_for_tag("latest"),
#[cfg(feature = "networking")]
(_, tag) => p.with_icuexport_for_tag(tag),
#[cfg(not(feature = "networking"))]
(None, _) => p,
Expand All @@ -513,6 +523,8 @@ fn run(cli: Cli) -> eyre::Result<()> {
p.with_segmenter_lstm_for_tag(SourceDataProvider::TESTED_SEGMENTER_LSTM_TAG)
}
#[cfg(feature = "networking")]
(_, "latest-tag") => p.with_segmenter_lstm_for_tag("latest"),
#[cfg(feature = "networking")]
(_, tag) => p.with_segmenter_lstm_for_tag(tag),
#[cfg(not(feature = "networking"))]
(None, _) => p,
Expand All @@ -525,6 +537,8 @@ fn run(cli: Cli) -> eyre::Result<()> {
p.with_unihan_for_tag(SourceDataProvider::TESTED_UCD_TAG)
}
#[cfg(feature = "networking")]
(_, "latest-tag") => p.with_unihan_for_tag("latest"),
#[cfg(feature = "networking")]
(_, tag) => p.with_unihan_for_tag(tag),
#[cfg(not(feature = "networking"))]
(None, _) => p,
Expand All @@ -537,6 +551,8 @@ fn run(cli: Cli) -> eyre::Result<()> {
p.with_tzdb_for_tag(SourceDataProvider::TESTED_TZDB_TAG)
}
#[cfg(feature = "networking")]
(_, "latest-tag") => p.with_tzdb_for_tag("latest"),
#[cfg(feature = "networking")]
(_, tag) => p.with_tzdb_for_tag(tag),
#[cfg(not(feature = "networking"))]
(None, _) => p,
Expand Down
2 changes: 1 addition & 1 deletion provider/source/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ impl SourceDataProvider {
})
}

/// Adds segmenter LSTM source data to the provider. The path should point to the Unihan ZIP file
/// Adds Unihan source data to the provider. The path should point to the Unihan ZIP file
/// (see [Unicode Character Database](https://www.unicode.org/ucd/)).
pub fn with_unihan(self, root: &Path) -> Result<Self, DataError> {
Ok(Self {
Expand Down