iri_string/parser/trusted/
authority.rs

1//! Parsers for trusted `authority` string.
2
3use crate::components::AuthorityComponents;
4use crate::parser::str::{find_split_hole, rfind_split2};
5
6/// Decomposes the authority into `(userinfo, host, port)`.
7///
8/// The leading `:` is truncated.
9///
10/// # Precondition
11///
12/// The given string must be a valid IRI reference.
13#[inline]
14#[must_use]
15pub(crate) fn decompose_authority(authority: &str) -> AuthorityComponents<'_> {
16    let i = authority;
17    let (i, host_start) = match find_split_hole(i, b'@') {
18        Some((userinfo, rest)) => (rest, userinfo.len() + 1),
19        None => (authority, 0),
20    };
21    let colon_port_len = match rfind_split2(i, b':', b']') {
22        Some((_, suffix)) if suffix.starts_with(':') => suffix.len(),
23        _ => 0,
24    };
25    let host_end = authority.len() - colon_port_len;
26
27    AuthorityComponents {
28        authority,
29        host_start,
30        host_end,
31    }
32}
33
34/// Returns `true` if the given valid host matches [`reg-name`] (or [`ireg-name`]) production rule.
35///
36/// # Precondition
37///
38/// This function expects that the parameter `host` is a valid `host` (or
39/// `ihost`). When this is not satisfied, the function may panic or return a
40/// meaningless value. However, it won't cause any undefined behavior, so this
41/// function is safe.
42///
43/// [`reg-name`]: https://www.rfc-editor.org/rfc/rfc3986.html#page-21
44/// [`ireg-name`]: https://www.rfc-editor.org/rfc/rfc3987.html#page-8
45#[must_use]
46pub(crate) fn is_host_reg_name(host: &str) -> bool {
47    if host.starts_with('[') {
48        // `IP-literal`.
49        debug_assert!(host.ends_with(']'), "the host {host:?} must be IP-literal");
50        return false;
51    }
52
53    /// Minimum length of an IPv4 address.
54    const IPV4_MIN_LEN: usize = "0.0.0.0".len();
55    /// Maximum length of an IPv4 address.
56    const IPV4_MAX_LEN: usize = "255.255.255.255".len();
57
58    if !(IPV4_MIN_LEN..=IPV4_MAX_LEN).contains(&host.len()) {
59        // Not a IPv4 address, and already confirmed not to be `IP-literal`.
60        return true;
61    }
62
63    let mut rest = host;
64    let mut octets: [&str; 4] = Default::default();
65
66    (octets[0], rest) = match find_split_hole(rest, b'.') {
67        Some(v) => v,
68        None => return true,
69    };
70    (octets[1], rest) = match find_split_hole(rest, b'.') {
71        Some(v) => v,
72        None => return true,
73    };
74    (octets[2], octets[3]) = match find_split_hole(rest, b'.') {
75        Some(v) => v,
76        None => return true,
77    };
78
79    let is_ipv4_addr = octets
80        .into_iter()
81        .all(|s| is_decimal_repr_of_octet(s.as_bytes()));
82    !is_ipv4_addr
83}
84
85/// Returns `true` if the given decimal string is inside `0..=255`.
86#[must_use]
87const fn is_decimal_repr_of_octet(digits: &[u8]) -> bool {
88    matches!(
89        digits,
90        [b'0'..=b'9']
91            | [b'1'..=b'9', b'0'..=b'9']
92            | [b'1', b'0'..=b'9', b'0'..=b'9']
93            | [b'2', b'0'..=b'4', b'0'..=b'9']
94            | [b'2', b'5', b'0'..=b'5']
95    )
96}
97
98#[cfg(test)]
99mod tests {
100    #[test]
101    fn is_host_reg_name() {
102        const REG_NAMES: &[&str] = &[
103            "www.example.com",
104            "example.com",
105            "localhost",
106            "a",
107            "",
108            "255.255.255.256",
109            "127.0.00.1",
110            "127.0.0.01",
111        ];
112        const NON_REG_NAMES: &[&str] = &[
113            "0.0.0.0",
114            "127.0.0.1",
115            "255.255.255.255",
116            "[::1]",
117            "[v999.zzz:zzz:zzz:zzz]",
118        ];
119
120        for reg_name in REG_NAMES {
121            debug_assert!(super::is_host_reg_name(reg_name), "reg_name={reg_name:?}");
122        }
123        for non_reg_name in NON_REG_NAMES {
124            debug_assert!(
125                !super::is_host_reg_name(non_reg_name),
126                "non_reg_name={non_reg_name:?}"
127            );
128        }
129    }
130}