mediatype/
parse.rs

1use super::{error::*, name::*};
2use std::{num::NonZeroU8, ops::Range};
3
4#[derive(Debug, Clone)]
5pub struct Indices {
6    ty: NonZeroU8,
7    subty: NonZeroU8,
8    suffix: u8,
9    params: Box<[[usize; 4]]>,
10}
11
12impl Indices {
13    pub const fn ty(&self) -> Range<usize> {
14        0..self.ty.get() as _
15    }
16
17    pub const fn subty(&self) -> Range<usize> {
18        let start = self.ty.get() as usize + 1;
19        let end = start + self.subty.get() as usize;
20        start..end
21    }
22
23    pub const fn suffix(&self) -> Option<Range<usize>> {
24        let start = self.ty.get() as usize + 1 + self.subty.get() as usize + 1;
25        let end = start + self.suffix as usize;
26        if start < end {
27            Some(start..end)
28        } else {
29            None
30        }
31    }
32
33    pub const fn params(&self) -> &[[usize; 4]] {
34        &self.params
35    }
36
37    pub fn parse(s: &str) -> Result<(Self, usize), MediaTypeError> {
38        let (ty, right) = match s.bytes().take(Name::MAX_LENGTH + 1).position(|b| b == b'/') {
39            Some(slash) => (&s[..slash], &s[slash + 1..]),
40            None => {
41                return Err(MediaTypeError::InvalidTypeName);
42            }
43        };
44
45        if !is_restricted_name(ty) {
46            return Err(MediaTypeError::InvalidTypeName);
47        }
48
49        let suffix_end = right
50            .find(|c: char| !is_restricted_char(c))
51            .unwrap_or(right.len());
52        let suffix_start = right[..suffix_end].rfind('+');
53
54        let (subty, suffix) = suffix_start.map_or_else(
55            || (&right[..suffix_end], ""),
56            |suffix_start| (&right[..suffix_start], &right[suffix_start + 1..suffix_end]),
57        );
58
59        if !is_restricted_name(subty) {
60            return Err(MediaTypeError::InvalidSubtypeName);
61        }
62
63        if !suffix.is_empty() && !is_restricted_name(&suffix[1..]) {
64            return Err(MediaTypeError::InvalidSuffix);
65        }
66
67        let sub_end = ty.len() + 1 + subty.len();
68        let params_start = sub_end
69            + if suffix.is_empty() {
70                0
71            } else {
72                suffix.len() + 1
73            };
74
75        let (mut params, params_len) = parse_params(&s[params_start..])?;
76        for elem in &mut params {
77            for v in elem.iter_mut() {
78                *v += params_start;
79            }
80        }
81
82        Ok((
83            Self {
84                ty: NonZeroU8::new(ty.len().try_into().unwrap()).unwrap(),
85                subty: NonZeroU8::new(subty.len().try_into().unwrap()).unwrap(),
86                suffix: suffix.len().try_into().unwrap(),
87                params: params.into_boxed_slice(),
88            },
89            params_start + params_len,
90        ))
91    }
92}
93
94#[cfg(test)]
95fn parse_to_string(s: &str) -> Result<String, MediaTypeError> {
96    use std::fmt::Write;
97
98    let mut out = String::new();
99    let (indices, _) = Indices::parse(s)?;
100
101    write!(out, "{}/{}", &s[indices.ty()], &s[indices.subty()]).unwrap();
102    if let Some(suffix) = indices.suffix() {
103        write!(out, "+{}", &s[suffix]).unwrap();
104    }
105    for param in indices.params() {
106        write!(
107            out,
108            "; {}={}",
109            &s[param[0]..param[1]],
110            &s[param[2]..param[3]]
111        )
112        .unwrap();
113    }
114
115    Ok(out)
116}
117
118pub fn is_restricted_name(s: &str) -> bool {
119    s.len() <= Name::MAX_LENGTH
120        && s.starts_with(|c: char| c.is_ascii_alphanumeric() || c == '*')
121        && is_restricted_str(s)
122}
123
124pub fn is_restricted_str(s: &str) -> bool {
125    s.chars().all(is_restricted_char)
126}
127
128pub fn is_restricted_char(c: char) -> bool {
129    c.is_ascii_alphanumeric()
130        || matches!(
131            c,
132            '!' | '#' | '$' | '&' | '-' | '^' | '_' | '.' | '+' | '%' | '*' | '\''
133        )
134}
135
136pub const fn is_ows(c: char) -> bool {
137    c == ' ' || c == '\t'
138}
139
140fn parse_params(s: &str) -> Result<(Vec<[usize; 4]>, usize), MediaTypeError> {
141    let mut vec = Vec::new();
142    let mut offset = 0;
143    let mut len = 0;
144
145    while let Some((name, value)) = parse_param(&s[offset..])? {
146        vec.push([
147            offset + name.start,
148            offset + name.end,
149            offset + value.start,
150            offset + value.end,
151        ]);
152        len = offset + value.end;
153        offset += value.end;
154    }
155
156    Ok((vec, len))
157}
158
159type ParamRange = (Range<usize>, Range<usize>);
160
161fn parse_param(s: &str) -> Result<Option<ParamRange>, MediaTypeError> {
162    let (ows, right) = match s.split_once(';') {
163        Some((ows, right)) if ows.chars().all(is_ows) && right.chars().all(is_ows) => {
164            return Ok(None)
165        }
166        Some((ows, right)) if ows.chars().all(is_ows) => (ows, right),
167        _ if s.chars().all(is_ows) => return Ok(None),
168        _ => return Err(MediaTypeError::InvalidParams),
169    };
170
171    let (name, value) = match right.split_once('=') {
172        Some(pair) => pair,
173        _ => return Err(MediaTypeError::InvalidParams),
174    };
175
176    let key_trimmed = name.trim_start_matches(is_ows).len();
177    let key_start = ows.len() + 1 + name.len() - key_trimmed;
178    let key_range = key_start..key_start + key_trimmed;
179    if !is_restricted_name(&s[key_range.clone()]) {
180        return Err(MediaTypeError::InvalidParamName);
181    }
182
183    let value_start = key_range.end + 1;
184    if let Some(value) = value.strip_prefix('\"') {
185        let value_end = value_start + parse_quoted_value(value)? + 1;
186        let value_range = value_start..value_end;
187        Ok(Some((key_range, value_range)))
188    } else {
189        let value_end = value_start
190            + value
191                .chars()
192                .take_while(|&c| is_restricted_char(c))
193                .map(char::len_utf8)
194                .sum::<usize>();
195        let value_range = value_start..value_end;
196        Ok(Some((key_range, value_range)))
197    }
198}
199
200pub fn parse_quoted_value(s: &str) -> Result<usize, MediaTypeError> {
201    let mut len = 0;
202    let mut escaped = false;
203    for c in s.chars() {
204        len += c.len_utf8();
205        match c {
206            _ if escaped => {
207                escaped = false;
208            }
209            '\\' => {
210                escaped = true;
211            }
212            '"' => return Ok(len),
213            '\n' => return Err(MediaTypeError::InvalidParamValue),
214            _ => (),
215        }
216    }
217    Err(MediaTypeError::InvalidParamValue)
218}
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223
224    #[test]
225    fn parse() {
226        assert_eq!(parse_to_string("*/*"), Ok("*/*".into()));
227        assert_eq!(parse_to_string("text/plain"), Ok("text/plain".into()));
228        assert_eq!(parse_to_string("text/plain;"), Ok("text/plain".into()));
229        assert_eq!(parse_to_string("image/svg+xml"), Ok("image/svg+xml".into()));
230        assert_eq!(
231            parse_to_string("image/svg+xml;"),
232            Ok("image/svg+xml".into())
233        );
234        assert_eq!(
235            parse_to_string("image/svg+xml; charset=UTF-8"),
236            Ok("image/svg+xml; charset=UTF-8".into())
237        );
238        assert_eq!(
239            parse_to_string("image/svg+xml; charset=UTF-8;"),
240            Ok("image/svg+xml; charset=UTF-8".into())
241        );
242        assert_eq!(
243            parse_to_string("image/svg+xml; charset=US-ASCII; charset=UTF-8;"),
244            Ok("image/svg+xml; charset=US-ASCII; charset=UTF-8".into())
245        );
246        assert_eq!(
247            parse_to_string("image/svg+xml; charset=US-ASCII; hello=WORLD; charset=UTF-8;"),
248            Ok("image/svg+xml; charset=US-ASCII; hello=WORLD; charset=UTF-8".into())
249        );
250        assert_eq!(
251            parse_to_string("image/svg+xml    ; charset=UTF-8   "),
252            Ok("image/svg+xml; charset=UTF-8".into())
253        );
254        assert_eq!(
255            parse_to_string("image/svg+xml; charset=\"UTF-8\""),
256            Ok("image/svg+xml; charset=\"UTF-8\"".into())
257        );
258        assert_eq!(
259            parse_to_string("image/svg+xml; charset=\"UT\\\"F-8\""),
260            Ok("image/svg+xml; charset=\"UT\\\"F-8\"".into())
261        );
262        assert_eq!(
263            parse_to_string("multipart/form-data ; boundary=--boundary13234"),
264            Ok("multipart/form-data; boundary=--boundary13234".into())
265        );
266
267        let s = "text/plain";
268        let long_str = format!("{};{}", s, " ".repeat(u16::MAX as usize - 2 - s.len()));
269        assert_eq!(parse_to_string(&long_str), Ok("text/plain".into()));
270
271        let long_name = "a".repeat(Name::MAX_LENGTH);
272        let long_str = format!("{}/{}+{}", long_name, long_name, long_name);
273        assert_eq!(parse_to_string(&long_str), Ok(long_str));
274    }
275
276    #[test]
277    fn parse_error() {
278        assert_eq!(parse_to_string(""), Err(MediaTypeError::InvalidTypeName));
279        assert_eq!(
280            parse_to_string("textplain"),
281            Err(MediaTypeError::InvalidTypeName)
282        );
283        assert_eq!(
284            parse_to_string("text//plain"),
285            Err(MediaTypeError::InvalidSubtypeName)
286        );
287        assert_eq!(
288            parse_to_string(" text/plain"),
289            Err(MediaTypeError::InvalidTypeName)
290        );
291        assert_eq!(
292            parse_to_string("text/plain; charsetUTF-8"),
293            Err(MediaTypeError::InvalidParams)
294        );
295        assert_eq!(
296            parse_to_string("text/plain;;"),
297            Err(MediaTypeError::InvalidParams)
298        );
299        assert_eq!(
300            parse_to_string("text/plain;;;"),
301            Err(MediaTypeError::InvalidParams)
302        );
303        assert_eq!(
304            parse_to_string("text/plain; charset=\"UTF-8"),
305            Err(MediaTypeError::InvalidParamValue)
306        );
307        assert_eq!(
308            parse_to_string("text/plain; charset==UTF-8"),
309            Err(MediaTypeError::InvalidParams)
310        );
311        assert_eq!(
312            parse_to_string("text/plain; \r\n charset=UTF-8;"),
313            Err(MediaTypeError::InvalidParamName)
314        );
315
316        let long_str = format!("{}/plain", "t".repeat(u16::MAX as usize));
317        assert_eq!(
318            parse_to_string(&long_str),
319            Err(MediaTypeError::InvalidTypeName)
320        );
321        let multibyte_str = "a\u{FFFF}".repeat(Name::MAX_LENGTH);
322        assert_eq!(
323            parse_to_string(&multibyte_str),
324            Err(MediaTypeError::InvalidTypeName)
325        );
326
327        assert_eq!(
328            parse_to_string("текст/plain"),
329            Err(MediaTypeError::InvalidTypeName)
330        );
331        assert_eq!(
332            parse_to_string("text/plain; кодування=UTF-8"),
333            Err(MediaTypeError::InvalidParamName)
334        );
335    }
336}