iri_string/parser/validate/
authority.rs

1//! Parsers for authority.
2
3use core::mem;
4
5use crate::parser::char;
6use crate::parser::str::{
7    find_split_hole, get_wrapped_inner, rfind_split_hole, satisfy_chars_with_pct_encoded,
8    strip_ascii_char_prefix,
9};
10use crate::parser::trusted as trusted_parser;
11use crate::spec::Spec;
12use crate::validate::{Error, ErrorKind};
13
14/// Returns `Ok(_)` if the string matches `userinfo` or `iuserinfo`.
15pub(crate) fn validate_userinfo<S: Spec>(i: &str) -> Result<(), Error> {
16    let is_valid = satisfy_chars_with_pct_encoded(
17        i,
18        char::is_ascii_userinfo_ipvfutureaddr,
19        char::is_nonascii_userinfo::<S>,
20    );
21    if is_valid {
22        Ok(())
23    } else {
24        Err(Error::with_kind(ErrorKind::InvalidUserInfo))
25    }
26}
27
28/// Returns `true` if the string matches `dec-octet`.
29///
30/// In other words, this tests whether the string is decimal "0" to "255".
31#[must_use]
32fn is_dec_octet(i: &str) -> bool {
33    matches!(
34        i.as_bytes(),
35        [b'0'..=b'9']
36            | [b'1'..=b'9', b'0'..=b'9']
37            | [b'1', b'0'..=b'9', b'0'..=b'9']
38            | [b'2', b'0'..=b'4', b'0'..=b'9']
39            | [b'2', b'5', b'0'..=b'5']
40    )
41}
42
43/// Returns `Ok(_)` if the string matches `IPv4address`.
44fn validate_ipv4address(i: &str) -> Result<(), Error> {
45    /// Returns `Ok(_)` if the string matches `IPv4address`, or `Err(())` if not.
46    fn validate_ipv4address_impl(i: &str) -> Result<(), ()> {
47        let (first, rest) = find_split_hole(i, b'.').ok_or(())?;
48        if !is_dec_octet(first) {
49            return Err(());
50        }
51        let (second, rest) = find_split_hole(rest, b'.').ok_or(())?;
52        if !is_dec_octet(second) {
53            return Err(());
54        }
55        let (third, fourth) = find_split_hole(rest, b'.').ok_or(())?;
56        if is_dec_octet(third) && is_dec_octet(fourth) {
57            Ok(())
58        } else {
59            Err(())
60        }
61    }
62
63    validate_ipv4address_impl(i).map_err(|_| Error::with_kind(ErrorKind::InvalidHost))
64}
65
66/// A part of IPv6 addr.
67#[derive(Clone, Copy)]
68enum V6AddrPart {
69    /// `[0-9a-fA-F]{1,4}::`.
70    H16Omit,
71    /// `[0-9a-fA-F]{1,4}:`.
72    H16Cont,
73    /// `[0-9a-fA-F]{1,4}`.
74    H16End,
75    /// IPv4 address.
76    V4,
77    /// `::`.
78    Omit,
79}
80
81/// Splits the IPv6 address string into the next component and the rest substring.
82fn split_v6_addr_part(i: &str) -> Result<(&str, V6AddrPart), Error> {
83    debug_assert!(!i.is_empty());
84    match find_split_hole(i, b':') {
85        Some((prefix, rest)) => {
86            if prefix.len() >= 5 {
87                return Err(Error::with_kind(ErrorKind::InvalidHost));
88            }
89
90            if prefix.is_empty() {
91                return match strip_ascii_char_prefix(rest, b':') {
92                    Some(rest) => Ok((rest, V6AddrPart::Omit)),
93                    None => Err(Error::with_kind(ErrorKind::InvalidHost)),
94                };
95            }
96
97            // Should be `h16`.
98            debug_assert!((1..=4).contains(&prefix.len()));
99            if !prefix.bytes().all(|b| b.is_ascii_hexdigit()) {
100                return Err(Error::with_kind(ErrorKind::InvalidHost));
101            }
102            match strip_ascii_char_prefix(rest, b':') {
103                Some(rest) => Ok((rest, V6AddrPart::H16Omit)),
104                None => Ok((rest, V6AddrPart::H16Cont)),
105            }
106        }
107        None => {
108            if i.len() >= 5 {
109                // Possibly `IPv4address`.
110                validate_ipv4address(i)?;
111                return Ok(("", V6AddrPart::V4));
112            }
113            if i.bytes().all(|b| b.is_ascii_hexdigit()) {
114                Ok(("", V6AddrPart::H16End))
115            } else {
116                Err(Error::with_kind(ErrorKind::InvalidHost))
117            }
118        }
119    }
120}
121
122/// Returns `Ok(_)` if the string matches `IPv6address`.
123fn validate_ipv6address(mut i: &str) -> Result<(), Error> {
124    let mut h16_count = 0;
125    let mut is_omitted = false;
126    while !i.is_empty() {
127        let (rest, part) = split_v6_addr_part(i)?;
128        match part {
129            V6AddrPart::H16Omit => {
130                h16_count += 1;
131                if mem::replace(&mut is_omitted, true) {
132                    // Omitted twice.
133                    return Err(Error::with_kind(ErrorKind::InvalidHost));
134                }
135            }
136            V6AddrPart::H16Cont => {
137                h16_count += 1;
138                if rest.is_empty() {
139                    // `H16Cont` cannot be the last part of an IPv6 address.
140                    return Err(Error::with_kind(ErrorKind::InvalidHost));
141                }
142            }
143            V6AddrPart::H16End => {
144                h16_count += 1;
145                break;
146            }
147            V6AddrPart::V4 => {
148                debug_assert!(rest.is_empty());
149                h16_count += 2;
150                break;
151            }
152            V6AddrPart::Omit => {
153                if mem::replace(&mut is_omitted, true) {
154                    // Omitted twice.
155                    return Err(Error::with_kind(ErrorKind::InvalidHost));
156                }
157            }
158        }
159        if h16_count > 8 {
160            return Err(Error::with_kind(ErrorKind::InvalidHost));
161        }
162        i = rest;
163    }
164    let is_valid = if is_omitted {
165        h16_count < 8
166    } else {
167        h16_count == 8
168    };
169    if is_valid {
170        Ok(())
171    } else {
172        Err(Error::with_kind(ErrorKind::InvalidHost))
173    }
174}
175
176/// Returns `Ok(_)` if the string matches `authority` or `iauthority`.
177pub(crate) fn validate_authority<S: Spec>(i: &str) -> Result<(), Error> {
178    // Strip and validate `userinfo`.
179    let (i, _userinfo) = match find_split_hole(i, b'@') {
180        Some((maybe_userinfo, i)) => {
181            validate_userinfo::<S>(maybe_userinfo)?;
182            (i, Some(maybe_userinfo))
183        }
184        None => (i, None),
185    };
186    // `host` can contain colons, but `port` cannot.
187    // Strip and validate `port`.
188    let (maybe_host, _port) = match rfind_split_hole(i, b':') {
189        Some((maybe_host, maybe_port)) => {
190            if maybe_port.bytes().all(|b| b.is_ascii_digit()) {
191                (maybe_host, Some(maybe_port))
192            } else {
193                (i, None)
194            }
195        }
196        None => (i, None),
197    };
198    // Validate `host`.
199    validate_host::<S>(maybe_host)
200}
201
202/// Returns `Ok(_)` if the string matches `host` or `ihost`.
203pub(crate) fn validate_host<S: Spec>(i: &str) -> Result<(), Error> {
204    match get_wrapped_inner(i, b'[', b']') {
205        Some(maybe_addr) => {
206            // `IP-literal`.
207            // Note that `v` here is case insensitive. See RFC 3987 section 3.2.2.
208            if let Some(maybe_addr_rest) = strip_ascii_char_prefix(maybe_addr, b'v')
209                .or_else(|| strip_ascii_char_prefix(maybe_addr, b'V'))
210            {
211                // `IPvFuture`.
212                let (maybe_ver, maybe_addr) = find_split_hole(maybe_addr_rest, b'.')
213                    .ok_or(Error::with_kind(ErrorKind::InvalidHost))?;
214                // Validate version.
215                if maybe_ver.is_empty() || !maybe_ver.bytes().all(|b| b.is_ascii_hexdigit()) {
216                    return Err(Error::with_kind(ErrorKind::InvalidHost));
217                }
218                // Validate address.
219                if !maybe_addr.is_empty()
220                    && maybe_addr.is_ascii()
221                    && maybe_addr
222                        .bytes()
223                        .all(char::is_ascii_userinfo_ipvfutureaddr)
224                {
225                    Ok(())
226                } else {
227                    Err(Error::with_kind(ErrorKind::InvalidHost))
228                }
229            } else {
230                // `IPv6address`.
231                validate_ipv6address(maybe_addr)
232            }
233        }
234        None => {
235            // `IPv4address` or `reg-name`. No need to distinguish them here
236            // because `IPv4address` is also syntactically valid as `reg-name`.
237            let is_valid = satisfy_chars_with_pct_encoded(
238                i,
239                char::is_ascii_regname,
240                char::is_nonascii_regname::<S>,
241            );
242            if is_valid {
243                Ok(())
244            } else {
245                Err(Error::with_kind(ErrorKind::InvalidHost))
246            }
247        }
248    }
249}
250
251/// Returns `Ok(_)` if the string matches `reg-name` or `ireg-name`.
252pub(crate) fn validate_reg_name<S: Spec>(i: &str) -> Result<(), Error> {
253    let is_valid_ipv4addr_or_reg_name =
254        satisfy_chars_with_pct_encoded(i, char::is_ascii_regname, char::is_nonascii_regname::<S>);
255
256    // Note that if `is_valid_ipv4addr_or_reg_name` is true, then `i` is
257    // guaranteed to be a valid IPv4Address or a valid reg-name.
258    if is_valid_ipv4addr_or_reg_name && trusted_parser::authority::is_host_reg_name(i) {
259        Ok(())
260    } else {
261        Err(Error::with_kind(ErrorKind::InvalidRegName))
262    }
263}
264
265#[cfg(test)]
266#[cfg(feature = "alloc")]
267mod tests {
268    use super::*;
269
270    use alloc::format;
271
272    macro_rules! assert_validate {
273        ($parser:expr, $($input:expr),* $(,)?) => {{
274            $({
275                let input = $input;
276                let input: &str = input.as_ref();
277                assert_eq!($parser(input), Ok(()), "input={:?}", input);
278            })*
279        }};
280    }
281
282    #[test]
283    fn test_ipv6address() {
284        use core::cmp::Ordering;
285
286        assert_validate!(validate_ipv6address, "a:bB:cCc:dDdD:e:F:a:B");
287        assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1:1");
288        assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1.1.1.1");
289        assert_validate!(validate_ipv6address, "2001:db8::7");
290
291        // Generate IPv6 addresses with `::`.
292        let make_sub = |n: usize| {
293            let mut s = "1:".repeat(n);
294            s.pop();
295            s
296        };
297        for len_pref in 0..=7 {
298            let prefix = make_sub(len_pref);
299            for len_suf in 1..=(7 - len_pref) {
300                assert_validate!(
301                    validate_ipv6address,
302                    &format!("{}::{}", prefix, make_sub(len_suf))
303                );
304                match len_suf.cmp(&2) {
305                    Ordering::Greater => assert_validate!(
306                        validate_ipv6address,
307                        &format!("{}::{}:1.1.1.1", prefix, make_sub(len_suf - 2))
308                    ),
309                    Ordering::Equal => {
310                        assert_validate!(validate_ipv6address, &format!("{}::1.1.1.1", prefix))
311                    }
312                    Ordering::Less => {}
313                }
314            }
315        }
316    }
317}