iri_string/parser/
validate.rs

1//! Validating parsers for non-trusted (possibly invalid) input.
2
3mod authority;
4mod path;
5
6use crate::parser::char;
7use crate::parser::str::{
8    find_split, find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded,
9};
10use crate::spec::Spec;
11use crate::validate::{Error, ErrorKind};
12
13pub(crate) use self::authority::{validate_authority, validate_host, validate_userinfo};
14pub(crate) use self::path::{validate_path, validate_path_segment};
15use self::path::{
16    validate_path_abempty, validate_path_absolute_authority_absent,
17    validate_path_relative_authority_absent,
18};
19
20/// Returns `Ok(_)` if the string matches `scheme`.
21pub(crate) fn validate_scheme(i: &str) -> Result<(), Error> {
22    let bytes = i.as_bytes();
23    if !i.is_empty()
24        && bytes[0].is_ascii_alphabetic()
25        && bytes[1..]
26            .iter()
27            .all(|&b| b.is_ascii() && char::is_ascii_scheme_continue(b))
28    {
29        Ok(())
30    } else {
31        Err(Error::with_kind(ErrorKind::InvalidScheme))
32    }
33}
34
35/// Returns `Ok(_)` if the string matches `query` or `iquery`.
36pub(crate) fn validate_query<S: Spec>(i: &str) -> Result<(), Error> {
37    let is_valid =
38        satisfy_chars_with_pct_encoded(i, char::is_ascii_frag_query, char::is_nonascii_query::<S>);
39    if is_valid {
40        Ok(())
41    } else {
42        Err(Error::with_kind(ErrorKind::InvalidQuery))
43    }
44}
45
46/// Returns `Ok(_)` if the string matches `authority path-abempty` rule sequence.
47fn validate_authority_path_abempty<S: Spec>(i: &str) -> Result<(), Error> {
48    let (maybe_authority, maybe_path) = match find_split(i, b'/') {
49        Some(v) => v,
50        None => (i, ""),
51    };
52    validate_authority::<S>(maybe_authority)?;
53    validate_path_abempty::<S>(maybe_path)
54}
55
56/// Returns `Ok(_)` if the string matches `URI`/`IRI` rules.
57#[inline]
58pub(crate) fn validate_uri<S: Spec>(i: &str) -> Result<(), Error> {
59    validate_uri_reference_common::<S>(i, UriReferenceRule::Absolute)
60}
61
62/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules.
63#[inline]
64pub(crate) fn validate_uri_reference<S: Spec>(i: &str) -> Result<(), Error> {
65    validate_uri_reference_common::<S>(i, UriReferenceRule::Any)
66}
67
68/// Returns `Ok(_)` if the string matches `absolute-URI`/`absolute-IRI` rules.
69#[inline]
70pub(crate) fn validate_absolute_uri<S: Spec>(i: &str) -> Result<(), Error> {
71    validate_uri_reference_common::<S>(i, UriReferenceRule::AbsoluteWithoutFragment)
72}
73
74/// Syntax rule for URI/IRI references.
75#[derive(Clone, Copy, PartialEq, Eq, Hash)]
76enum UriReferenceRule {
77    /// `URI` and `IRI`.
78    ///
79    /// This can have a fragment.
80    Absolute,
81    /// `absolute-URI` and `absolute-IRI`.
82    ///
83    /// This cannot have a fragment.
84    AbsoluteWithoutFragment,
85    /// `URI-reference` and `IRI-reference`.
86    ///
87    /// This can be relative.
88    Any,
89}
90
91impl UriReferenceRule {
92    /// Returns `true` is the relative reference is allowed.
93    #[inline]
94    #[must_use]
95    fn is_relative_allowed(self) -> bool {
96        self == Self::Any
97    }
98
99    /// Returns `true` is the fragment part is allowed.
100    #[inline]
101    #[must_use]
102    fn is_fragment_allowed(self) -> bool {
103        matches!(self, Self::Absolute | Self::Any)
104    }
105}
106
107/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules.
108fn validate_uri_reference_common<S: Spec>(
109    i: &str,
110    ref_rule: UriReferenceRule,
111) -> Result<(), Error> {
112    // Validate `scheme ":"`.
113    let (i, _scheme) = match find_split_hole(i, b':') {
114        None => {
115            if ref_rule.is_relative_allowed() {
116                return validate_relative_ref::<S>(i);
117            } else {
118                return Err(Error::with_kind(ErrorKind::UnexpectedRelative));
119            }
120        }
121        Some((maybe_scheme, rest)) => {
122            if validate_scheme(maybe_scheme).is_err() {
123                // The string before the first colon is not a scheme.
124                // Falling back to `relative-ref` parsing.
125                if ref_rule.is_relative_allowed() {
126                    return validate_relative_ref::<S>(i);
127                } else {
128                    return Err(Error::with_kind(ErrorKind::InvalidScheme));
129                }
130            }
131            (rest, maybe_scheme)
132        }
133    };
134
135    // Validate `hier-part`.
136    let after_path = match i.strip_prefix("//") {
137        Some(i) => {
138            let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') {
139                Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))),
140                None => (i, None),
141            };
142            validate_authority_path_abempty::<S>(maybe_authority_path)?;
143            after_path
144        }
145        None => {
146            let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') {
147                Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))),
148                None => (i, None),
149            };
150            // Authority is absent.
151            validate_path_absolute_authority_absent::<S>(maybe_path)?;
152            after_path
153        }
154    };
155
156    // Validate `[ "?" query ] [ "#" fragment ]`.
157    if let Some((first, rest)) = after_path {
158        validate_after_path::<S>(first, rest, ref_rule.is_fragment_allowed())?;
159    }
160    Ok(())
161}
162
163/// Returns `Ok(_)` if the string matches `relative-ref`/`irelative-ref` rules.
164pub(crate) fn validate_relative_ref<S: Spec>(i: &str) -> Result<(), Error> {
165    // Validate `relative-part`.
166    let after_path = match i.strip_prefix("//") {
167        Some(i) => {
168            let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') {
169                Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))),
170                None => (i, None),
171            };
172            validate_authority_path_abempty::<S>(maybe_authority_path)?;
173            after_path
174        }
175        None => {
176            let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') {
177                Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))),
178                None => (i, None),
179            };
180            // Authority is absent.
181            validate_path_relative_authority_absent::<S>(maybe_path)?;
182            after_path
183        }
184    };
185
186    // Validate `[ "?" query ] [ "#" fragment ]`.
187    if let Some((first, rest)) = after_path {
188        validate_after_path::<S>(first, rest, true)?;
189    }
190    Ok(())
191}
192
193/// Returns `Ok(_)` if the string matches `[ "?" query ] [ "#" fragment ]` (or IRI version).
194fn validate_after_path<S: Spec>(first: u8, rest: &str, accept_fragment: bool) -> Result<(), Error> {
195    let (maybe_query, maybe_fragment) = if first == b'?' {
196        match find_split_hole(rest, b'#') {
197            Some(v) => v,
198            None => (rest, ""),
199        }
200    } else {
201        debug_assert_eq!(first, b'#');
202        ("", rest)
203    };
204    validate_query::<S>(maybe_query)?;
205    if !accept_fragment && !maybe_fragment.is_empty() {
206        return Err(Error::with_kind(ErrorKind::UnexpectedFragment));
207    }
208    validate_fragment::<S>(maybe_fragment)
209}
210
211/// Returns `Ok(_)` if the string matches `fragment`/`ifragment` rules.
212pub(crate) fn validate_fragment<S: Spec>(i: &str) -> Result<(), Error> {
213    let is_valid = satisfy_chars_with_pct_encoded(
214        i,
215        char::is_ascii_frag_query,
216        char::is_nonascii_fragment::<S>,
217    );
218    if is_valid {
219        Ok(())
220    } else {
221        Err(Error::with_kind(ErrorKind::InvalidFragment))
222    }
223}