whoami/
langs.rs

1use alloc::{string::String, vec::Vec};
2use core::{
3    fmt::{self, Display, Formatter},
4    num::NonZeroU8,
5    str::FromStr,
6};
7
8use crate::{Error, Result};
9
10/// A spoken language identifier
11///
12/// Returned from various methods on [`LanguagePreferences`]
13///
14/// Use [`ToString::to_string()`] to convert to string of two letter lowercase
15/// language code followed an forward slash and uppercase country code (example:
16/// `en/US`).
17///
18/// The [`Default`] implementation can be used for fallbacks, and is set to
19/// `en/US` since it's a common choice for lingua franca.  It is not guaranteed
20/// to stay the same across whoami versions.
21///
22/// Language codes are defined in an unspecified superset of
23/// [ISO 639](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes),
24/// Country codes are defined in an unspecified superset of
25/// [ISO 3166](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2).
26///
27/// You can compare languages with strings (where the separator can be any of
28/// `-`, `_`, or `/`).
29///
30/// [`ToString::to_string()`]: std::string::ToString::to_string
31#[derive(Copy, Clone, Eq, PartialEq, Debug)]
32pub struct Language {
33    /// The language code for this language
34    ///
35    /// Uses <https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes>
36    lang: [NonZeroU8; 2],
37    /// The optional country code for this language dialect
38    ///
39    /// Uses <https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2>
40    country: Option<[NonZeroU8; 2]>,
41}
42
43impl Default for Language {
44    fn default() -> Self {
45        Self::from_str("en/US")
46            .expect("this is an internal bug (failed to parse en/US)")
47    }
48}
49
50impl FromStr for Language {
51    type Err = Error;
52
53    /// Reads an `language{/_-}COUNTRY.Encoding` formatted string into a
54    /// `Language` where language is a two letter language code and country is a
55    /// two letter country code.  The encoding is ignored.
56    fn from_str(s: &str) -> Result<Self> {
57        // Strip the encoding off the end if it exists
58        let lang = s.split_terminator('.').next().unwrap_or_default();
59
60        if lang.is_empty() {
61            return Err(Error::empty_record());
62        }
63
64        // Split apart lang and country
65        let mut parts = lang.split(SEPARATORS);
66        let lang = parts
67            .next()
68            .ok_or_else(|| Error::with_invalid_data("No lang"))?
69            .as_bytes();
70        let country = parts.next().unwrap_or("\0\0").as_bytes();
71
72        // Verify that the lengths are valid
73        if parts.next().is_some() {
74            return Err(Error::with_invalid_data("Invalid locale"));
75        } else if lang.len() != 2 {
76            return Err(Error::with_invalid_data("Invalid length lang code"));
77        } else if country.len() != 2 {
78            return Err(Error::with_invalid_data(
79                "Invalid length country code",
80            ));
81        }
82
83        // Verify the contents are valid
84        let Some(lang) = NonZeroU8::new(lang[0]).zip(NonZeroU8::new(lang[1]))
85        else {
86            return Err(Error::with_invalid_data("Lang code contains NUL"));
87        };
88        let lang = [lang.0, lang.1];
89
90        if (country[0] == 0 || country[1] == 0)
91            && (country[0] != 0 || country[1] != 0)
92        {
93            return Err(Error::with_invalid_data("Country code contains NUL"));
94        }
95
96        let country = NonZeroU8::new(country[0])
97            .zip(NonZeroU8::new(country[1]))
98            .map(|country| [country.0, country.1]);
99
100        if !(lang[0].get().is_ascii_lowercase()
101            && lang[1].get().is_ascii_lowercase())
102        {
103            return Err(Error::with_invalid_data(
104                "Lang code not ascii lowercase",
105            ));
106        }
107
108        if let Some(ref country) = country {
109            if !(country[0].get().is_ascii_uppercase()
110                && country[1].get().is_ascii_uppercase())
111            {
112                return Err(Error::with_invalid_data(
113                    "Country code not ascii uppercase",
114                ));
115            }
116        }
117
118        Ok(Self { lang, country })
119    }
120}
121
122impl Display for Language {
123    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
124        f.write_str(&String::from_utf8_lossy(&[
125            self.lang[0].get(),
126            self.lang[1].get(),
127        ]))?;
128
129        let Some(country) = self.country.as_ref() else {
130            return Ok(());
131        };
132
133        f.write_str("/")?;
134        f.write_str(&String::from_utf8_lossy(&[
135            country[0].get(),
136            country[1].get(),
137        ]))
138    }
139}
140
141impl PartialEq<Language> for str {
142    fn eq(&self, lang: &Language) -> bool {
143        lang_str_eq(lang, self)
144    }
145}
146
147impl PartialEq<Language> for &str {
148    fn eq(&self, lang: &Language) -> bool {
149        lang_str_eq(lang, self)
150    }
151}
152
153impl PartialEq<Language> for String {
154    fn eq(&self, lang: &Language) -> bool {
155        lang_str_eq(lang, self)
156    }
157}
158
159impl PartialEq<String> for Language {
160    fn eq(&self, string: &String) -> bool {
161        lang_str_eq(self, string)
162    }
163}
164
165impl PartialEq<str> for Language {
166    fn eq(&self, string: &str) -> bool {
167        lang_str_eq(self, string)
168    }
169}
170
171impl PartialEq<&str> for Language {
172    fn eq(&self, string: &&str) -> bool {
173        lang_str_eq(self, string)
174    }
175}
176
177/// [`Language`] preferences for a user
178///
179/// Returned from [`lang_prefs()`](crate::lang_prefs)
180///
181/// Fields are sorted in order of the user's preference.
182///
183/// POSIX locale values and GNU nonstandard categories are defined in
184/// <https://man7.org/linux/man-pages/man7/locale.7.html>. Windows locale values
185/// are defined in <https://learn.microsoft.com/en-us/cpp/c-runtime-library/locale-categories>.
186#[derive(Debug, Clone, Default)]
187pub struct LanguagePreferences {
188    /// Determines general user language preference, should be used in
189    /// situations which are not encompassed by other [`LanguagePreferences`].
190    pub(crate) fallbacks: Vec<Language>,
191
192    /// Determines collation rules used for sorting and regular expressions,
193    /// including character equivalence classes and multicharacter collating
194    /// elements.
195    pub(crate) collation: Option<Language>,
196
197    /// Determines the interpretation of byte sequences as characters (e.g.,
198    /// single versus multibyte characters), character classifications (e.g.,
199    /// alphabetic or digit), and the behavior of character classes.
200    pub(crate) char_classes: Option<Language>,
201
202    /// Determines the formatting used for monetary-related numeric values,
203    /// i.e, the way numbers are usually printed with details such as
204    /// decimal point versus decimal comma.
205    pub(crate) monetary: Option<Language>,
206
207    /// Determines the language in which messages are
208    /// displayed and what an affirmative or negative answer looks
209    /// like.
210    pub(crate) messages: Option<Language>,
211
212    /// Determines the formatting rules used for nonmonetary numeric values.
213    /// For example, the thousands separator and the radix character.
214    pub(crate) numeric: Option<Language>,
215
216    /// Determines format and contents of date and time information.
217    pub(crate) time: Option<Language>,
218}
219
220impl Display for LanguagePreferences {
221    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
222        let langs: [(&str, Vec<Language>); 6] = [
223            ("Collation", self.collation_langs().collect()),
224            ("CharClasses", self.char_class_langs().collect()),
225            ("Monetary", self.monetary_langs().collect()),
226            ("Messages", self.message_langs().collect()),
227            ("Numeric", self.numeric_langs().collect()),
228            ("Time", self.time_langs().collect()),
229        ];
230        for (i, (name, langs)) in langs.iter().enumerate() {
231            if i != 0 {
232                f.write_str(",")?;
233            }
234            write!(f, "{name}=")?;
235            for (j, lang) in langs.iter().enumerate() {
236                if j != 0 {
237                    f.write_str(":")?;
238                }
239                write!(f, "{lang}")?;
240            }
241        }
242
243        Ok(())
244    }
245}
246
247impl LanguagePreferences {
248    fn chain_fallbacks<'a>(
249        &'a self,
250        l: &Option<Language>,
251    ) -> impl Iterator<Item = Language> + 'a {
252        let lang_without_country = if let Some(ref lang) = l {
253            lang.country.is_some().then_some(Language {
254                lang: lang.lang,
255                country: None,
256            })
257        } else {
258            None
259        };
260
261        (*l).into_iter()
262            .chain(lang_without_country)
263            .chain(self.fallbacks.iter().cloned())
264    }
265
266    /// Returns the collation langs of this [`LanguagePreferences`] in order of
267    /// the user's preference
268    ///
269    /// Collation langs are used for sorting and regular expressions,
270    /// including character equivalence classes and multicharacter collating
271    /// elements.
272    pub fn collation_langs(&self) -> impl Iterator<Item = Language> + '_ {
273        self.chain_fallbacks(&self.collation)
274    }
275
276    /// Returns the char class langs of this [`LanguagePreferences`] in order of
277    /// the user's preference
278    ///
279    /// Char class langs determine the interpretation of byte sequences as
280    /// characters (e.g., single versus multibyte characters), character
281    /// classifications (e.g., alphabetic or digit), and the behavior of
282    /// character classes.
283    pub fn char_class_langs(&self) -> impl Iterator<Item = Language> + '_ {
284        self.chain_fallbacks(&self.char_classes)
285    }
286
287    /// Returns the monetary langs of this [`LanguagePreferences`] in order of
288    /// the user's preference
289    ///
290    /// Monetary langs determine the formatting used for monetary-related
291    /// numeric values, i.e, the way numbers are usually printed with details
292    /// such as decimal point versus decimal comma.
293    ///
294    /// For nonmonetary numeric values, see
295    /// [`LanguagePreferences::numeric_langs`]
296    pub fn monetary_langs(&self) -> impl Iterator<Item = Language> + '_ {
297        self.chain_fallbacks(&self.monetary)
298    }
299
300    /// Returns the messages langs of this [`LanguagePreferences`] in order of
301    /// the user's preference
302    ///
303    /// Message determines the language in which messages are
304    /// displayed and what an affirmative or negative answer looks
305    /// like.
306    pub fn message_langs(&self) -> impl Iterator<Item = Language> + '_ {
307        self.chain_fallbacks(&self.messages)
308    }
309
310    /// Returns the numeric langs of this [`LanguagePreferences`] in order of
311    /// the user's preference
312    ///
313    /// Numeric langs determine the formatting rules used for nonmonetary
314    /// numeric values. For example, the thousands separator and the radix
315    /// character.
316    ///
317    /// For monetary formatting, see [`LanguagePreferences::monetary_langs`].
318    pub fn numeric_langs(&self) -> impl Iterator<Item = Language> + '_ {
319        self.chain_fallbacks(&self.numeric)
320    }
321
322    /// Returns the time langs of this [`LanguagePreferences`] in order of the
323    /// user's preference
324    ///
325    /// Time langs determine format and contents of date and time information.
326    pub fn time_langs(&self) -> impl Iterator<Item = Language> + '_ {
327        self.chain_fallbacks(&self.time)
328    }
329
330    pub(crate) fn add_stripped_fallbacks(mut self) -> Self {
331        let mut no_country_langs = Vec::new();
332
333        for lang in self.fallbacks.iter() {
334            if lang.country.is_some() {
335                no_country_langs.push(Language {
336                    lang: lang.lang,
337                    country: None,
338                });
339            } else {
340                let Some(i) = no_country_langs.iter().position(|x| x == lang)
341                else {
342                    continue;
343                };
344
345                no_country_langs.remove(i);
346            }
347        }
348
349        self.fallbacks.extend(no_country_langs);
350        self
351    }
352}
353
354fn lang_str_eq(language: &Language, string: &str) -> bool {
355    let mut iter = string.split(SEPARATORS);
356    let string_lang = iter.next().map(|s| s.as_bytes());
357    let string_country = iter.next().map(|s| s.as_bytes());
358    let end = iter.next();
359    let lang = [language.lang[0].get(), language.lang[1].get()];
360    let Some(country) = language.country.as_ref() else {
361        return end.is_none()
362            && string_lang == Some(&lang)
363            && string_country.is_none();
364    };
365    let country = [country[0].get(), country[1].get()];
366
367    end.is_none()
368        && string_lang == Some(&lang)
369        && string_country == Some(&country)
370}
371
372const SEPARATORS: &[char] = &['_', '-', '/'];