url/
host.rs

1// Copyright 2013-2016 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9use crate::net::{Ipv4Addr, Ipv6Addr};
10use alloc::borrow::Cow;
11use alloc::borrow::ToOwned;
12use alloc::string::String;
13use alloc::vec::Vec;
14use core::cmp;
15use core::fmt::{self, Formatter};
16
17use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
18#[cfg(feature = "serde")]
19use serde::{Deserialize, Serialize};
20
21use crate::parser::{ParseError, ParseResult};
22
23#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
24#[derive(Copy, Clone, Debug, Eq, PartialEq)]
25pub(crate) enum HostInternal {
26    None,
27    Domain,
28    Ipv4(Ipv4Addr),
29    Ipv6(Ipv6Addr),
30}
31
32impl From<Host<Cow<'_, str>>> for HostInternal {
33    fn from(host: Host<Cow<'_, str>>) -> Self {
34        match host {
35            Host::Domain(ref s) if s.is_empty() => Self::None,
36            Host::Domain(_) => Self::Domain,
37            Host::Ipv4(address) => Self::Ipv4(address),
38            Host::Ipv6(address) => Self::Ipv6(address),
39        }
40    }
41}
42
43/// The host name of an URL.
44#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
45#[derive(Clone, Debug, Eq, Ord, PartialOrd, Hash)]
46pub enum Host<S = String> {
47    /// A DNS domain name, as '.' dot-separated labels.
48    /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
49    /// a special URL, or percent encoded for non-special URLs. Hosts for
50    /// non-special URLs are also called opaque hosts.
51    Domain(S),
52
53    /// An IPv4 address.
54    /// `Url::host_str` returns the serialization of this address,
55    /// as four decimal integers separated by `.` dots.
56    Ipv4(Ipv4Addr),
57
58    /// An IPv6 address.
59    /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
60    /// in the format per [RFC 5952 *A Recommendation
61    /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
62    /// lowercase hexadecimal with maximal `::` compression.
63    Ipv6(Ipv6Addr),
64}
65
66impl Host<&str> {
67    /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
68    pub fn to_owned(&self) -> Host<String> {
69        match *self {
70            Host::Domain(domain) => Host::Domain(domain.to_owned()),
71            Host::Ipv4(address) => Host::Ipv4(address),
72            Host::Ipv6(address) => Host::Ipv6(address),
73        }
74    }
75}
76
77impl Host<String> {
78    /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
79    ///
80    /// <https://url.spec.whatwg.org/#host-parsing>
81    pub fn parse(input: &str) -> Result<Self, ParseError> {
82        Host::<Cow<str>>::parse_cow(input.into()).map(|i| i.into_owned())
83    }
84
85    /// <https://url.spec.whatwg.org/#concept-opaque-host-parser>
86    pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
87        Host::<Cow<str>>::parse_opaque_cow(input.into()).map(|i| i.into_owned())
88    }
89}
90
91impl<'a> Host<Cow<'a, str>> {
92    pub(crate) fn parse_cow(input: Cow<'a, str>) -> Result<Self, ParseError> {
93        if input.starts_with('[') {
94            if !input.ends_with(']') {
95                return Err(ParseError::InvalidIpv6Address);
96            }
97            return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
98        }
99        let domain: Cow<'_, [u8]> = percent_decode(input.as_bytes()).into();
100        let domain: Cow<'a, [u8]> = match domain {
101            Cow::Owned(v) => Cow::Owned(v),
102            // if borrowed then we can use the original cow
103            Cow::Borrowed(_) => match input {
104                Cow::Borrowed(input) => Cow::Borrowed(input.as_bytes()),
105                Cow::Owned(input) => Cow::Owned(input.into_bytes()),
106            },
107        };
108
109        let domain = idna::domain_to_ascii_from_cow(domain, idna::AsciiDenyList::URL)?;
110
111        if domain.is_empty() {
112            return Err(ParseError::EmptyHost);
113        }
114
115        if ends_in_a_number(&domain) {
116            let address = parse_ipv4addr(&domain)?;
117            Ok(Host::Ipv4(address))
118        } else {
119            Ok(Host::Domain(domain))
120        }
121    }
122
123    pub(crate) fn parse_opaque_cow(input: Cow<'a, str>) -> Result<Self, ParseError> {
124        if input.starts_with('[') {
125            if !input.ends_with(']') {
126                return Err(ParseError::InvalidIpv6Address);
127            }
128            return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
129        }
130
131        let is_invalid_host_char = |c| {
132            matches!(
133                c,
134                '\0' | '\t'
135                    | '\n'
136                    | '\r'
137                    | ' '
138                    | '#'
139                    | '/'
140                    | ':'
141                    | '<'
142                    | '>'
143                    | '?'
144                    | '@'
145                    | '['
146                    | '\\'
147                    | ']'
148                    | '^'
149                    | '|'
150            )
151        };
152
153        if input.find(is_invalid_host_char).is_some() {
154            Err(ParseError::InvalidDomainCharacter)
155        } else {
156            Ok(Host::Domain(
157                match utf8_percent_encode(&input, CONTROLS).into() {
158                    Cow::Owned(v) => Cow::Owned(v),
159                    // if we're borrowing, then we can return the original Cow
160                    Cow::Borrowed(_) => input,
161                },
162            ))
163        }
164    }
165
166    pub(crate) fn into_owned(self) -> Host<String> {
167        match self {
168            Host::Domain(s) => Host::Domain(s.into_owned()),
169            Host::Ipv4(ip) => Host::Ipv4(ip),
170            Host::Ipv6(ip) => Host::Ipv6(ip),
171        }
172    }
173}
174
175impl<S: AsRef<str>> fmt::Display for Host<S> {
176    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
177        match *self {
178            Self::Domain(ref domain) => domain.as_ref().fmt(f),
179            Self::Ipv4(ref addr) => addr.fmt(f),
180            Self::Ipv6(ref addr) => {
181                f.write_str("[")?;
182                write_ipv6(addr, f)?;
183                f.write_str("]")
184            }
185        }
186    }
187}
188
189impl<S, T> PartialEq<Host<T>> for Host<S>
190where
191    S: PartialEq<T>,
192{
193    fn eq(&self, other: &Host<T>) -> bool {
194        match (self, other) {
195            (Self::Domain(a), Host::Domain(b)) => a == b,
196            (Self::Ipv4(a), Host::Ipv4(b)) => a == b,
197            (Self::Ipv6(a), Host::Ipv6(b)) => a == b,
198            (_, _) => false,
199        }
200    }
201}
202
203fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result {
204    let segments = addr.segments();
205    let (compress_start, compress_end) = longest_zero_sequence(&segments);
206    let mut i = 0;
207    while i < 8 {
208        if i == compress_start {
209            f.write_str(":")?;
210            if i == 0 {
211                f.write_str(":")?;
212            }
213            if compress_end < 8 {
214                i = compress_end;
215            } else {
216                break;
217            }
218        }
219        write!(f, "{:x}", segments[i as usize])?;
220        if i < 7 {
221            f.write_str(":")?;
222        }
223        i += 1;
224    }
225    Ok(())
226}
227
228// https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
229fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
230    let mut longest = -1;
231    let mut longest_length = -1;
232    let mut start = -1;
233    macro_rules! finish_sequence(
234        ($end: expr) => {
235            if start >= 0 {
236                let length = $end - start;
237                if length > longest_length {
238                    longest = start;
239                    longest_length = length;
240                }
241            }
242        };
243    );
244    for i in 0..8 {
245        if pieces[i as usize] == 0 {
246            if start < 0 {
247                start = i;
248            }
249        } else {
250            finish_sequence!(i);
251            start = -1;
252        }
253    }
254    finish_sequence!(8);
255    // https://url.spec.whatwg.org/#concept-ipv6-serializer
256    // step 3: ignore lone zeroes
257    if longest_length < 2 {
258        (-1, -2)
259    } else {
260        (longest, longest + longest_length)
261    }
262}
263
264/// <https://url.spec.whatwg.org/#ends-in-a-number-checker>
265fn ends_in_a_number(input: &str) -> bool {
266    let mut parts = input.rsplit('.');
267    let last = parts.next().unwrap();
268    let last = if last.is_empty() {
269        if let Some(last) = parts.next() {
270            last
271        } else {
272            return false;
273        }
274    } else {
275        last
276    };
277    if !last.is_empty() && last.as_bytes().iter().all(|c| c.is_ascii_digit()) {
278        return true;
279    }
280
281    parse_ipv4number(last).is_ok()
282}
283
284/// <https://url.spec.whatwg.org/#ipv4-number-parser>
285/// Ok(None) means the input is a valid number, but it overflows a `u32`.
286fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
287    if input.is_empty() {
288        return Err(());
289    }
290
291    let mut r = 10;
292    if input.starts_with("0x") || input.starts_with("0X") {
293        input = &input[2..];
294        r = 16;
295    } else if input.len() >= 2 && input.starts_with('0') {
296        input = &input[1..];
297        r = 8;
298    }
299
300    if input.is_empty() {
301        return Ok(Some(0));
302    }
303
304    let valid_number = match r {
305        8 => input.as_bytes().iter().all(|c| (b'0'..=b'7').contains(c)),
306        10 => input.as_bytes().iter().all(|c| c.is_ascii_digit()),
307        16 => input.as_bytes().iter().all(|c| c.is_ascii_hexdigit()),
308        _ => false,
309    };
310    if !valid_number {
311        return Err(());
312    }
313
314    match u32::from_str_radix(input, r) {
315        Ok(num) => Ok(Some(num)),
316        Err(_) => Ok(None), // The only possible error kind here is an integer overflow.
317                            // The validity of the chars in the input is checked above.
318    }
319}
320
321/// <https://url.spec.whatwg.org/#concept-ipv4-parser>
322fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> {
323    let mut parts: Vec<&str> = input.split('.').collect();
324    if parts.last() == Some(&"") {
325        parts.pop();
326    }
327    if parts.len() > 4 {
328        return Err(ParseError::InvalidIpv4Address);
329    }
330    let mut numbers: Vec<u32> = Vec::new();
331    for part in parts {
332        match parse_ipv4number(part) {
333            Ok(Some(n)) => numbers.push(n),
334            Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow
335            Err(()) => return Err(ParseError::InvalidIpv4Address),
336        };
337    }
338    let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
339    // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
340    if ipv4 > u32::MAX >> (8 * numbers.len() as u32) {
341        return Err(ParseError::InvalidIpv4Address);
342    }
343    if numbers.iter().any(|x| *x > 255) {
344        return Err(ParseError::InvalidIpv4Address);
345    }
346    for (counter, n) in numbers.iter().enumerate() {
347        ipv4 += n << (8 * (3 - counter as u32))
348    }
349    Ok(Ipv4Addr::from(ipv4))
350}
351
352/// <https://url.spec.whatwg.org/#concept-ipv6-parser>
353fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
354    let input = input.as_bytes();
355    let len = input.len();
356    let mut is_ip_v4 = false;
357    let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
358    let mut piece_pointer = 0;
359    let mut compress_pointer = None;
360    let mut i = 0;
361
362    if len < 2 {
363        return Err(ParseError::InvalidIpv6Address);
364    }
365
366    if input[0] == b':' {
367        if input[1] != b':' {
368            return Err(ParseError::InvalidIpv6Address);
369        }
370        i = 2;
371        piece_pointer = 1;
372        compress_pointer = Some(1);
373    }
374
375    while i < len {
376        if piece_pointer == 8 {
377            return Err(ParseError::InvalidIpv6Address);
378        }
379        if input[i] == b':' {
380            if compress_pointer.is_some() {
381                return Err(ParseError::InvalidIpv6Address);
382            }
383            i += 1;
384            piece_pointer += 1;
385            compress_pointer = Some(piece_pointer);
386            continue;
387        }
388        let start = i;
389        let end = cmp::min(len, start + 4);
390        let mut value = 0u16;
391        while i < end {
392            match (input[i] as char).to_digit(16) {
393                Some(digit) => {
394                    value = value * 0x10 + digit as u16;
395                    i += 1;
396                }
397                None => break,
398            }
399        }
400        if i < len {
401            match input[i] {
402                b'.' => {
403                    if i == start {
404                        return Err(ParseError::InvalidIpv6Address);
405                    }
406                    i = start;
407                    if piece_pointer > 6 {
408                        return Err(ParseError::InvalidIpv6Address);
409                    }
410                    is_ip_v4 = true;
411                }
412                b':' => {
413                    i += 1;
414                    if i == len {
415                        return Err(ParseError::InvalidIpv6Address);
416                    }
417                }
418                _ => return Err(ParseError::InvalidIpv6Address),
419            }
420        }
421        if is_ip_v4 {
422            break;
423        }
424        pieces[piece_pointer] = value;
425        piece_pointer += 1;
426    }
427
428    if is_ip_v4 {
429        if piece_pointer > 6 {
430            return Err(ParseError::InvalidIpv6Address);
431        }
432        let mut numbers_seen = 0;
433        while i < len {
434            if numbers_seen > 0 {
435                if numbers_seen < 4 && (i < len && input[i] == b'.') {
436                    i += 1
437                } else {
438                    return Err(ParseError::InvalidIpv6Address);
439                }
440            }
441
442            let mut ipv4_piece = None;
443            while i < len {
444                let digit = match input[i] {
445                    c @ b'0'..=b'9' => c - b'0',
446                    _ => break,
447                };
448                match ipv4_piece {
449                    None => ipv4_piece = Some(digit as u16),
450                    Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
451                    Some(ref mut v) => {
452                        *v = *v * 10 + digit as u16;
453                        if *v > 255 {
454                            return Err(ParseError::InvalidIpv6Address);
455                        }
456                    }
457                }
458                i += 1;
459            }
460
461            pieces[piece_pointer] = if let Some(v) = ipv4_piece {
462                pieces[piece_pointer] * 0x100 + v
463            } else {
464                return Err(ParseError::InvalidIpv6Address);
465            };
466            numbers_seen += 1;
467
468            if numbers_seen == 2 || numbers_seen == 4 {
469                piece_pointer += 1;
470            }
471        }
472
473        if numbers_seen != 4 {
474            return Err(ParseError::InvalidIpv6Address);
475        }
476    }
477
478    if i < len {
479        return Err(ParseError::InvalidIpv6Address);
480    }
481
482    match compress_pointer {
483        Some(compress_pointer) => {
484            let mut swaps = piece_pointer - compress_pointer;
485            piece_pointer = 7;
486            while swaps > 0 {
487                pieces.swap(piece_pointer, compress_pointer + swaps - 1);
488                swaps -= 1;
489                piece_pointer -= 1;
490            }
491        }
492        _ => {
493            if piece_pointer != 8 {
494                return Err(ParseError::InvalidIpv6Address);
495            }
496        }
497    }
498    Ok(Ipv6Addr::new(
499        pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7],
500    ))
501}