url/lib.rs
1// Copyright 2013-2015 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9/*!
10
11rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/)
12for the [Rust](http://rust-lang.org/) programming language.
13
14
15# URL parsing and data structures
16
17First, URL parsing may fail for various reasons and therefore returns a `Result`.
18
19```
20use url::{Url, ParseError};
21
22assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address))
23```
24
25Let’s parse a valid URL and look at its components.
26
27```
28use url::{Url, Host, Position};
29# use url::ParseError;
30# fn run() -> Result<(), ParseError> {
31let issue_list_url = Url::parse(
32 "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
33)?;
34
35
36assert!(issue_list_url.scheme() == "https");
37assert!(issue_list_url.username() == "");
38assert!(issue_list_url.password() == None);
39assert!(issue_list_url.host_str() == Some("github.com"));
40assert!(issue_list_url.host() == Some(Host::Domain("github.com")));
41assert!(issue_list_url.port() == None);
42assert!(issue_list_url.path() == "/rust-lang/rust/issues");
43assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) ==
44 Some(vec!["rust-lang", "rust", "issues"]));
45assert!(issue_list_url.query() == Some("labels=E-easy&state=open"));
46assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open");
47assert!(issue_list_url.fragment() == None);
48assert!(!issue_list_url.cannot_be_a_base());
49# Ok(())
50# }
51# run().unwrap();
52```
53
54Some URLs are said to be *cannot-be-a-base*:
55they don’t have a username, password, host, or port,
56and their "path" is an arbitrary string rather than slash-separated segments:
57
58```
59use url::Url;
60# use url::ParseError;
61
62# fn run() -> Result<(), ParseError> {
63let data_url = Url::parse("data:text/plain,Hello?World#")?;
64
65assert!(data_url.cannot_be_a_base());
66assert!(data_url.scheme() == "data");
67assert!(data_url.path() == "text/plain,Hello");
68assert!(data_url.path_segments().is_none());
69assert!(data_url.query() == Some("World"));
70assert!(data_url.fragment() == Some(""));
71# Ok(())
72# }
73# run().unwrap();
74```
75
76## Default Features
77
78Versions `<= 2.5.2` of the crate have no default features. Versions `> 2.5.2` have the default feature 'std'.
79If you are upgrading across this boundary and you have specified `default-features = false`, then
80you will need to add the 'std' feature or the 'alloc' feature to your dependency.
81The 'std' feature has the same behavior as the previous versions. The 'alloc' feature
82provides no_std support.
83
84## Serde
85
86Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`.
87
88# Base URL
89
90Many contexts allow URL *references* that can be relative to a *base URL*:
91
92```html
93<link rel="stylesheet" href="../main.css">
94```
95
96Since parsed URLs are absolute, giving a base is required for parsing relative URLs:
97
98```
99use url::{Url, ParseError};
100
101assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase))
102```
103
104Use the `join` method on an `Url` to use it as a base URL:
105
106```
107use url::Url;
108# use url::ParseError;
109
110# fn run() -> Result<(), ParseError> {
111let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?;
112let css_url = this_document.join("../main.css")?;
113assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
114# Ok(())
115# }
116# run().unwrap();
117```
118
119# Feature: `serde`
120
121If you enable the `serde` feature, [`Url`](struct.Url.html) will implement
122[`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and
123[`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html).
124See [serde documentation](https://serde.rs) for more information.
125
126```toml
127url = { version = "2", features = ["serde"] }
128```
129
130# Feature: `debugger_visualizer`
131
132If you enable the `debugger_visualizer` feature, the `url` crate will include
133a [natvis file](https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects)
134for [Visual Studio](https://www.visualstudio.com/) that allows you to view
135[`Url`](struct.Url.html) objects in the debugger.
136
137This feature requires Rust 1.71 or later.
138
139```toml
140url = { version = "2", features = ["debugger_visualizer"] }
141```
142
143*/
144
145#![no_std]
146#![doc(html_root_url = "https://docs.rs/url/2.5.7")]
147#![cfg_attr(
148 feature = "debugger_visualizer",
149 debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis")
150)]
151
152pub use form_urlencoded;
153
154// For forwards compatibility
155#[cfg(feature = "std")]
156extern crate std;
157
158#[macro_use]
159extern crate alloc;
160
161#[cfg(feature = "serde")]
162extern crate serde;
163
164use crate::host::HostInternal;
165
166use crate::net::IpAddr;
167#[cfg(feature = "std")]
168#[cfg(any(
169 unix,
170 windows,
171 target_os = "redox",
172 target_os = "wasi",
173 target_os = "hermit"
174))]
175use crate::net::{SocketAddr, ToSocketAddrs};
176use crate::parser::{to_u32, Context, Parser, SchemeType, USERINFO};
177use alloc::borrow::Cow;
178use alloc::borrow::ToOwned;
179use alloc::str;
180use alloc::string::{String, ToString};
181use core::borrow::Borrow;
182use core::convert::TryFrom;
183use core::fmt::Write;
184use core::ops::{Range, RangeFrom, RangeTo};
185use core::{cmp, fmt, hash, mem};
186use percent_encoding::utf8_percent_encode;
187#[cfg(feature = "std")]
188#[cfg(any(
189 unix,
190 windows,
191 target_os = "redox",
192 target_os = "wasi",
193 target_os = "hermit"
194))]
195use std::io;
196#[cfg(feature = "std")]
197use std::path::{Path, PathBuf};
198
199/// `std` version of `net`
200#[cfg(feature = "std")]
201pub(crate) mod net {
202 pub use std::net::*;
203}
204/// `no_std` nightly version of `net`
205#[cfg(not(feature = "std"))]
206pub(crate) mod net {
207 pub use core::net::*;
208}
209
210pub use crate::host::Host;
211pub use crate::origin::{OpaqueOrigin, Origin};
212pub use crate::parser::{ParseError, SyntaxViolation};
213pub use crate::path_segments::PathSegmentsMut;
214pub use crate::slicing::Position;
215pub use form_urlencoded::EncodingOverride;
216
217mod host;
218mod origin;
219mod parser;
220mod path_segments;
221mod slicing;
222
223#[doc(hidden)]
224pub mod quirks;
225
226/// A parsed URL record.
227#[derive(Clone)]
228pub struct Url {
229 /// Syntax in pseudo-BNF:
230 ///
231 /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]?
232 /// non-hierarchical = non-hierarchical-path
233 /// non-hierarchical-path = /* Does not start with "/" */
234 /// hierarchical = authority? hierarchical-path
235 /// authority = "//" userinfo? host [ ":" port ]?
236 /// userinfo = username [ ":" password ]? "@"
237 /// hierarchical-path = [ "/" path-segment ]+
238 serialization: String,
239
240 // Components
241 scheme_end: u32, // Before ':'
242 username_end: u32, // Before ':' (if a password is given) or '@' (if not)
243 host_start: u32,
244 host_end: u32,
245 host: HostInternal,
246 port: Option<u16>,
247 path_start: u32, // Before initial '/', if any
248 query_start: Option<u32>, // Before '?', unlike Position::QueryStart
249 fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart
250}
251
252/// Full configuration for the URL parser.
253#[derive(Copy, Clone)]
254#[must_use]
255pub struct ParseOptions<'a> {
256 base_url: Option<&'a Url>,
257 encoding_override: EncodingOverride<'a>,
258 violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
259}
260
261impl<'a> ParseOptions<'a> {
262 /// Change the base URL
263 ///
264 /// See the notes of [`Url::join`] for more details about how this base is considered
265 /// when parsing.
266 pub fn base_url(mut self, new: Option<&'a Url>) -> Self {
267 self.base_url = new;
268 self
269 }
270
271 /// Override the character encoding of query strings.
272 /// This is a legacy concept only relevant for HTML.
273 pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self {
274 self.encoding_override = new;
275 self
276 }
277
278 /// Call the provided function or closure for a non-fatal `SyntaxViolation`
279 /// when it occurs during parsing. Note that since the provided function is
280 /// `Fn`, the caller might need to utilize _interior mutability_, such as with
281 /// a `RefCell`, to collect the violations.
282 ///
283 /// ## Example
284 /// ```
285 /// use std::cell::RefCell;
286 /// use url::{Url, SyntaxViolation};
287 /// # use url::ParseError;
288 /// # fn run() -> Result<(), url::ParseError> {
289 /// let violations = RefCell::new(Vec::new());
290 /// let url = Url::options()
291 /// .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v)))
292 /// .parse("https:////example.com")?;
293 /// assert_eq!(url.as_str(), "https://example.com/");
294 /// assert_eq!(violations.into_inner(),
295 /// vec!(SyntaxViolation::ExpectedDoubleSlash));
296 /// # Ok(())
297 /// # }
298 /// # run().unwrap();
299 /// ```
300 pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self {
301 self.violation_fn = new;
302 self
303 }
304
305 /// Parse an URL string with the configuration so far.
306 pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> {
307 Parser {
308 serialization: String::with_capacity(input.len()),
309 base_url: self.base_url,
310 query_encoding_override: self.encoding_override,
311 violation_fn: self.violation_fn,
312 context: Context::UrlParser,
313 }
314 .parse_url(input)
315 }
316}
317
318impl Url {
319 /// Parse an absolute URL from a string.
320 ///
321 /// # Examples
322 ///
323 /// ```rust
324 /// use url::Url;
325 /// # use url::ParseError;
326 ///
327 /// # fn run() -> Result<(), ParseError> {
328 /// let url = Url::parse("https://example.net")?;
329 /// # Ok(())
330 /// # }
331 /// # run().unwrap();
332 /// ```
333 ///
334 /// # Errors
335 ///
336 /// If the function can not parse an absolute URL from the given string,
337 /// a [`ParseError`] variant will be returned.
338 ///
339 /// [`ParseError`]: enum.ParseError.html
340 #[inline]
341 pub fn parse(input: &str) -> Result<Self, crate::ParseError> {
342 Self::options().parse(input)
343 }
344
345 /// Parse an absolute URL from a string and add params to its query string.
346 ///
347 /// Existing params are not removed.
348 ///
349 /// # Examples
350 ///
351 /// ```rust
352 /// use url::Url;
353 /// # use url::ParseError;
354 ///
355 /// # fn run() -> Result<(), ParseError> {
356 /// let url = Url::parse_with_params("https://example.net?dont=clobberme",
357 /// &[("lang", "rust"), ("browser", "servo")])?;
358 /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str());
359 /// # Ok(())
360 /// # }
361 /// # run().unwrap();
362 /// ```
363 ///
364 /// # Errors
365 ///
366 /// If the function can not parse an absolute URL from the given string,
367 /// a [`ParseError`] variant will be returned.
368 ///
369 /// [`ParseError`]: enum.ParseError.html
370 #[inline]
371 pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Self, crate::ParseError>
372 where
373 I: IntoIterator,
374 I::Item: Borrow<(K, V)>,
375 K: AsRef<str>,
376 V: AsRef<str>,
377 {
378 let mut url = Self::options().parse(input);
379
380 if let Ok(ref mut url) = url {
381 url.query_pairs_mut().extend_pairs(iter);
382 }
383
384 url
385 }
386
387 /// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
388 fn strip_trailing_spaces_from_opaque_path(&mut self) {
389 if !self.cannot_be_a_base() {
390 return;
391 }
392
393 if self.fragment_start.is_some() {
394 return;
395 }
396
397 if self.query_start.is_some() {
398 return;
399 }
400
401 let trailing_space_count = self
402 .serialization
403 .chars()
404 .rev()
405 .take_while(|c| *c == ' ')
406 .count();
407
408 let start = self.serialization.len() - trailing_space_count;
409
410 self.serialization.truncate(start);
411 }
412
413 /// Parse a string as an URL, with this URL as the base URL.
414 ///
415 /// The inverse of this is [`make_relative`].
416 ///
417 /// # Notes
418 ///
419 /// - A trailing slash is significant.
420 /// Without it, the last path component is considered to be a “file” name
421 /// to be removed to get at the “directory” that is used as the base.
422 /// - A [scheme relative special URL](https://url.spec.whatwg.org/#scheme-relative-special-url-string)
423 /// as input replaces everything in the base URL after the scheme.
424 /// - An absolute URL (with a scheme) as input replaces the whole base URL (even the scheme).
425 ///
426 /// # Examples
427 ///
428 /// ```rust
429 /// use url::Url;
430 /// # use url::ParseError;
431 ///
432 /// // Base without a trailing slash
433 /// # fn run() -> Result<(), ParseError> {
434 /// let base = Url::parse("https://example.net/a/b.html")?;
435 /// let url = base.join("c.png")?;
436 /// assert_eq!(url.as_str(), "https://example.net/a/c.png"); // Not /a/b.html/c.png
437 ///
438 /// // Base with a trailing slash
439 /// let base = Url::parse("https://example.net/a/b/")?;
440 /// let url = base.join("c.png")?;
441 /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png");
442 ///
443 /// // Input as scheme relative special URL
444 /// let base = Url::parse("https://alice.com/a")?;
445 /// let url = base.join("//eve.com/b")?;
446 /// assert_eq!(url.as_str(), "https://eve.com/b");
447 ///
448 /// // Input as base url relative special URL
449 /// let base = Url::parse("https://alice.com/a")?;
450 /// let url = base.join("/v1/meta")?;
451 /// assert_eq!(url.as_str(), "https://alice.com/v1/meta");
452 ///
453 /// // Input as absolute URL
454 /// let base = Url::parse("https://alice.com/a")?;
455 /// let url = base.join("http://eve.com/b")?;
456 /// assert_eq!(url.as_str(), "http://eve.com/b"); // http instead of https
457 ///
458 /// # Ok(())
459 /// # }
460 /// # run().unwrap();
461 /// ```
462 ///
463 /// # Errors
464 ///
465 /// If the function can not parse an URL from the given string
466 /// with this URL as the base URL, a [`ParseError`] variant will be returned.
467 ///
468 /// [`ParseError`]: enum.ParseError.html
469 /// [`make_relative`]: #method.make_relative
470 #[inline]
471 pub fn join(&self, input: &str) -> Result<Self, crate::ParseError> {
472 Self::options().base_url(Some(self)).parse(input)
473 }
474
475 /// Creates a relative URL if possible, with this URL as the base URL.
476 ///
477 /// This is the inverse of [`join`].
478 ///
479 /// # Examples
480 ///
481 /// ```rust
482 /// use url::Url;
483 /// # use url::ParseError;
484 ///
485 /// # fn run() -> Result<(), ParseError> {
486 /// let base = Url::parse("https://example.net/a/b.html")?;
487 /// let url = Url::parse("https://example.net/a/c.png")?;
488 /// let relative = base.make_relative(&url);
489 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
490 ///
491 /// let base = Url::parse("https://example.net/a/b/")?;
492 /// let url = Url::parse("https://example.net/a/b/c.png")?;
493 /// let relative = base.make_relative(&url);
494 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
495 ///
496 /// let base = Url::parse("https://example.net/a/b/")?;
497 /// let url = Url::parse("https://example.net/a/d/c.png")?;
498 /// let relative = base.make_relative(&url);
499 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png"));
500 ///
501 /// let base = Url::parse("https://example.net/a/b.html?c=d")?;
502 /// let url = Url::parse("https://example.net/a/b.html?e=f")?;
503 /// let relative = base.make_relative(&url);
504 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f"));
505 /// # Ok(())
506 /// # }
507 /// # run().unwrap();
508 /// ```
509 ///
510 /// # Errors
511 ///
512 /// If this URL can't be a base for the given URL, `None` is returned.
513 /// This is for example the case if the scheme, host or port are not the same.
514 ///
515 /// [`join`]: #method.join
516 pub fn make_relative(&self, url: &Self) -> Option<String> {
517 if self.cannot_be_a_base() {
518 return None;
519 }
520
521 // Scheme, host and port need to be the same
522 if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() {
523 return None;
524 }
525
526 // We ignore username/password at this point
527
528 // The path has to be transformed
529 let mut relative = String::new();
530
531 // Extract the filename of both URIs, these need to be handled separately
532 fn extract_path_filename(s: &str) -> (&str, &str) {
533 let last_slash_idx = s.rfind('/').unwrap_or(0);
534 let (path, filename) = s.split_at(last_slash_idx);
535 if filename.is_empty() {
536 (path, "")
537 } else {
538 (path, &filename[1..])
539 }
540 }
541
542 let (base_path, base_filename) = extract_path_filename(self.path());
543 let (url_path, url_filename) = extract_path_filename(url.path());
544
545 let mut base_path = base_path.split('/').peekable();
546 let mut url_path = url_path.split('/').peekable();
547
548 // Skip over the common prefix
549 while base_path.peek().is_some() && base_path.peek() == url_path.peek() {
550 base_path.next();
551 url_path.next();
552 }
553
554 // Add `..` segments for the remainder of the base path
555 for base_path_segment in base_path {
556 // Skip empty last segments
557 if base_path_segment.is_empty() {
558 break;
559 }
560
561 if !relative.is_empty() {
562 relative.push('/');
563 }
564
565 relative.push_str("..");
566 }
567
568 // Append the remainder of the other URI
569 for url_path_segment in url_path {
570 if !relative.is_empty() {
571 relative.push('/');
572 }
573
574 relative.push_str(url_path_segment);
575 }
576
577 // Add the filename if they are not the same
578 if !relative.is_empty() || base_filename != url_filename {
579 // If the URIs filename is empty this means that it was a directory
580 // so we'll have to append a '/'.
581 //
582 // Otherwise append it directly as the new filename.
583 if url_filename.is_empty() {
584 relative.push('/');
585 } else {
586 if !relative.is_empty() {
587 relative.push('/');
588 }
589 relative.push_str(url_filename);
590 }
591 }
592
593 // Query and fragment are only taken from the other URI
594 if let Some(query) = url.query() {
595 relative.push('?');
596 relative.push_str(query);
597 }
598
599 if let Some(fragment) = url.fragment() {
600 relative.push('#');
601 relative.push_str(fragment);
602 }
603
604 Some(relative)
605 }
606
607 /// Return a default `ParseOptions` that can fully configure the URL parser.
608 ///
609 /// # Examples
610 ///
611 /// Get default `ParseOptions`, then change base url
612 ///
613 /// ```rust
614 /// use url::Url;
615 /// # use url::ParseError;
616 /// # fn run() -> Result<(), ParseError> {
617 /// let options = Url::options();
618 /// let api = Url::parse("https://api.example.com")?;
619 /// let base_url = options.base_url(Some(&api));
620 /// let version_url = base_url.parse("version.json")?;
621 /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json");
622 /// # Ok(())
623 /// # }
624 /// # run().unwrap();
625 /// ```
626 pub fn options<'a>() -> ParseOptions<'a> {
627 ParseOptions {
628 base_url: None,
629 encoding_override: None,
630 violation_fn: None,
631 }
632 }
633
634 /// Return the serialization of this URL.
635 ///
636 /// This is fast since that serialization is already stored in the `Url` struct.
637 ///
638 /// # Examples
639 ///
640 /// ```rust
641 /// use url::Url;
642 /// # use url::ParseError;
643 ///
644 /// # fn run() -> Result<(), ParseError> {
645 /// let url_str = "https://example.net/";
646 /// let url = Url::parse(url_str)?;
647 /// assert_eq!(url.as_str(), url_str);
648 /// # Ok(())
649 /// # }
650 /// # run().unwrap();
651 /// ```
652 #[inline]
653 pub fn as_str(&self) -> &str {
654 &self.serialization
655 }
656
657 /// Return the serialization of this URL.
658 ///
659 /// This consumes the `Url` and takes ownership of the `String` stored in it.
660 ///
661 /// # Examples
662 ///
663 /// ```rust
664 /// use url::Url;
665 /// # use url::ParseError;
666 ///
667 /// # fn run() -> Result<(), ParseError> {
668 /// let url_str = "https://example.net/";
669 /// let url = Url::parse(url_str)?;
670 /// assert_eq!(String::from(url), url_str);
671 /// # Ok(())
672 /// # }
673 /// # run().unwrap();
674 /// ```
675 #[inline]
676 #[deprecated(since = "2.3.0", note = "use Into<String>")]
677 pub fn into_string(self) -> String {
678 self.into()
679 }
680
681 /// For internal testing, not part of the public API.
682 ///
683 /// Methods of the `Url` struct assume a number of invariants.
684 /// This checks each of these invariants and panic if one is not met.
685 /// This is for testing rust-url itself.
686 #[doc(hidden)]
687 pub fn check_invariants(&self) -> Result<(), String> {
688 macro_rules! assert {
689 ($x: expr) => {
690 if !$x {
691 return Err(format!(
692 "!( {} ) for URL {:?}",
693 stringify!($x),
694 self.serialization
695 ));
696 }
697 };
698 }
699
700 macro_rules! assert_eq {
701 ($a: expr, $b: expr) => {
702 {
703 let a = $a;
704 let b = $b;
705 if a != b {
706 return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}",
707 a, b, stringify!($a), stringify!($b),
708 self.serialization))
709 }
710 }
711 }
712 }
713
714 assert!(self.scheme_end >= 1);
715 assert!(self.byte_at(0).is_ascii_alphabetic());
716 assert!(self
717 .slice(1..self.scheme_end)
718 .chars()
719 .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.')));
720 assert_eq!(self.byte_at(self.scheme_end), b':');
721
722 if self.slice(self.scheme_end + 1..).starts_with("//") {
723 // URL with authority
724 if self.username_end != self.serialization.len() as u32 {
725 match self.byte_at(self.username_end) {
726 b':' => {
727 assert!(self.host_start >= self.username_end + 2);
728 assert_eq!(self.byte_at(self.host_start - 1), b'@');
729 }
730 b'@' => assert!(self.host_start == self.username_end + 1),
731 _ => assert_eq!(self.username_end, self.scheme_end + 3),
732 }
733 }
734 assert!(self.host_start >= self.username_end);
735 assert!(self.host_end >= self.host_start);
736 let host_str = self.slice(self.host_start..self.host_end);
737 match self.host {
738 HostInternal::None => assert_eq!(host_str, ""),
739 HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()),
740 HostInternal::Ipv6(address) => {
741 let h: Host<String> = Host::Ipv6(address);
742 assert_eq!(host_str, h.to_string())
743 }
744 HostInternal::Domain => {
745 if SchemeType::from(self.scheme()).is_special() {
746 assert!(!host_str.is_empty())
747 }
748 }
749 }
750 if self.path_start == self.host_end {
751 assert_eq!(self.port, None);
752 } else {
753 assert_eq!(self.byte_at(self.host_end), b':');
754 let port_str = self.slice(self.host_end + 1..self.path_start);
755 assert_eq!(
756 self.port,
757 Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
758 );
759 }
760 assert!(
761 self.path_start as usize == self.serialization.len()
762 || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?')
763 );
764 } else {
765 // Anarchist URL (no authority)
766 assert_eq!(self.username_end, self.scheme_end + 1);
767 assert_eq!(self.host_start, self.scheme_end + 1);
768 assert_eq!(self.host_end, self.scheme_end + 1);
769 assert_eq!(self.host, HostInternal::None);
770 assert_eq!(self.port, None);
771 if self.path().starts_with("//") {
772 // special case when first path segment is empty
773 assert_eq!(self.byte_at(self.scheme_end + 1), b'/');
774 assert_eq!(self.byte_at(self.scheme_end + 2), b'.');
775 assert_eq!(self.path_start, self.scheme_end + 3);
776 } else {
777 assert_eq!(self.path_start, self.scheme_end + 1);
778 }
779 }
780 if let Some(start) = self.query_start {
781 assert!(start >= self.path_start);
782 assert_eq!(self.byte_at(start), b'?');
783 }
784 if let Some(start) = self.fragment_start {
785 assert!(start >= self.path_start);
786 assert_eq!(self.byte_at(start), b'#');
787 }
788 if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
789 assert!(fragment_start > query_start);
790 }
791
792 let other = Self::parse(self.as_str()).expect("Failed to parse myself?");
793 assert_eq!(&self.serialization, &other.serialization);
794 assert_eq!(self.scheme_end, other.scheme_end);
795 assert_eq!(self.username_end, other.username_end);
796 assert_eq!(self.host_start, other.host_start);
797 assert_eq!(self.host_end, other.host_end);
798 assert!(
799 self.host == other.host ||
800 // XXX No host round-trips to empty host.
801 // See https://github.com/whatwg/url/issues/79
802 (self.host_str(), other.host_str()) == (None, Some(""))
803 );
804 assert_eq!(self.port, other.port);
805 assert_eq!(self.path_start, other.path_start);
806 assert_eq!(self.query_start, other.query_start);
807 assert_eq!(self.fragment_start, other.fragment_start);
808 Ok(())
809 }
810
811 /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>)
812 ///
813 /// Note: this returns an opaque origin for `file:` URLs, which causes
814 /// `url.origin() != url.origin()`.
815 ///
816 /// # Examples
817 ///
818 /// URL with `ftp` scheme:
819 ///
820 /// ```rust
821 /// use url::{Host, Origin, Url};
822 /// # use url::ParseError;
823 ///
824 /// # fn run() -> Result<(), ParseError> {
825 /// let url = Url::parse("ftp://example.com/foo")?;
826 /// assert_eq!(url.origin(),
827 /// Origin::Tuple("ftp".into(),
828 /// Host::Domain("example.com".into()),
829 /// 21));
830 /// # Ok(())
831 /// # }
832 /// # run().unwrap();
833 /// ```
834 ///
835 /// URL with `blob` scheme:
836 ///
837 /// ```rust
838 /// use url::{Host, Origin, Url};
839 /// # use url::ParseError;
840 ///
841 /// # fn run() -> Result<(), ParseError> {
842 /// let url = Url::parse("blob:https://example.com/foo")?;
843 /// assert_eq!(url.origin(),
844 /// Origin::Tuple("https".into(),
845 /// Host::Domain("example.com".into()),
846 /// 443));
847 /// # Ok(())
848 /// # }
849 /// # run().unwrap();
850 /// ```
851 ///
852 /// URL with `file` scheme:
853 ///
854 /// ```rust
855 /// use url::{Host, Origin, Url};
856 /// # use url::ParseError;
857 ///
858 /// # fn run() -> Result<(), ParseError> {
859 /// let url = Url::parse("file:///tmp/foo")?;
860 /// assert!(!url.origin().is_tuple());
861 ///
862 /// let other_url = Url::parse("file:///tmp/foo")?;
863 /// assert!(url.origin() != other_url.origin());
864 /// # Ok(())
865 /// # }
866 /// # run().unwrap();
867 /// ```
868 ///
869 /// URL with other scheme:
870 ///
871 /// ```rust
872 /// use url::{Host, Origin, Url};
873 /// # use url::ParseError;
874 ///
875 /// # fn run() -> Result<(), ParseError> {
876 /// let url = Url::parse("foo:bar")?;
877 /// assert!(!url.origin().is_tuple());
878 /// # Ok(())
879 /// # }
880 /// # run().unwrap();
881 /// ```
882 #[inline]
883 pub fn origin(&self) -> Origin {
884 origin::url_origin(self)
885 }
886
887 /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter.
888 ///
889 /// # Examples
890 ///
891 /// ```
892 /// use url::Url;
893 /// # use url::ParseError;
894 ///
895 /// # fn run() -> Result<(), ParseError> {
896 /// let url = Url::parse("file:///tmp/foo")?;
897 /// assert_eq!(url.scheme(), "file");
898 /// # Ok(())
899 /// # }
900 /// # run().unwrap();
901 /// ```
902 #[inline]
903 pub fn scheme(&self) -> &str {
904 self.slice(..self.scheme_end)
905 }
906
907 /// Return whether the URL is special (has a special scheme)
908 ///
909 /// # Examples
910 ///
911 /// ```
912 /// use url::Url;
913 /// # use url::ParseError;
914 ///
915 /// # fn run() -> Result<(), ParseError> {
916 /// assert!(Url::parse("http:///tmp/foo")?.is_special());
917 /// assert!(Url::parse("file:///tmp/foo")?.is_special());
918 /// assert!(!Url::parse("moz:///tmp/foo")?.is_special());
919 /// # Ok(())
920 /// # }
921 /// # run().unwrap();
922 /// ```
923 pub fn is_special(&self) -> bool {
924 let scheme_type = SchemeType::from(self.scheme());
925 scheme_type.is_special()
926 }
927
928 /// Return whether the URL has an 'authority',
929 /// which can contain a username, password, host, and port number.
930 ///
931 /// URLs that do *not* are either path-only like `unix:/run/foo.socket`
932 /// or cannot-be-a-base like `data:text/plain,Stuff`.
933 ///
934 /// See also the `authority` method.
935 ///
936 /// # Examples
937 ///
938 /// ```
939 /// use url::Url;
940 /// # use url::ParseError;
941 ///
942 /// # fn run() -> Result<(), ParseError> {
943 /// let url = Url::parse("ftp://rms@example.com")?;
944 /// assert!(url.has_authority());
945 ///
946 /// let url = Url::parse("unix:/run/foo.socket")?;
947 /// assert!(!url.has_authority());
948 ///
949 /// let url = Url::parse("data:text/plain,Stuff")?;
950 /// assert!(!url.has_authority());
951 /// # Ok(())
952 /// # }
953 /// # run().unwrap();
954 /// ```
955 #[inline]
956 pub fn has_authority(&self) -> bool {
957 debug_assert!(self.byte_at(self.scheme_end) == b':');
958 self.slice(self.scheme_end..).starts_with("://")
959 }
960
961 /// Return the authority of this URL as an ASCII string.
962 ///
963 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
964 /// of a special URL, or percent encoded for non-special URLs.
965 /// IPv6 addresses are given between `[` and `]` brackets.
966 /// Ports are omitted if they match the well known port of a special URL.
967 ///
968 /// Username and password are percent-encoded.
969 ///
970 /// See also the `has_authority` method.
971 ///
972 /// # Examples
973 ///
974 /// ```
975 /// use url::Url;
976 /// # use url::ParseError;
977 ///
978 /// # fn run() -> Result<(), ParseError> {
979 /// let url = Url::parse("unix:/run/foo.socket")?;
980 /// assert_eq!(url.authority(), "");
981 /// let url = Url::parse("file:///tmp/foo")?;
982 /// assert_eq!(url.authority(), "");
983 /// let url = Url::parse("https://user:password@example.com/tmp/foo")?;
984 /// assert_eq!(url.authority(), "user:password@example.com");
985 /// let url = Url::parse("irc://àlex.рф.example.com:6667/foo")?;
986 /// assert_eq!(url.authority(), "%C3%A0lex.%D1%80%D1%84.example.com:6667");
987 /// let url = Url::parse("http://àlex.рф.example.com:80/foo")?;
988 /// assert_eq!(url.authority(), "xn--lex-8ka.xn--p1ai.example.com");
989 /// # Ok(())
990 /// # }
991 /// # run().unwrap();
992 /// ```
993 pub fn authority(&self) -> &str {
994 let scheme_separator_len = "://".len() as u32;
995 if self.has_authority() && self.path_start > self.scheme_end + scheme_separator_len {
996 self.slice(self.scheme_end + scheme_separator_len..self.path_start)
997 } else {
998 ""
999 }
1000 }
1001
1002 /// Return whether this URL is a cannot-be-a-base URL,
1003 /// meaning that parsing a relative URL string with this URL as the base will return an error.
1004 ///
1005 /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash,
1006 /// as is typically the case of `data:` and `mailto:` URLs.
1007 ///
1008 /// # Examples
1009 ///
1010 /// ```
1011 /// use url::Url;
1012 /// # use url::ParseError;
1013 ///
1014 /// # fn run() -> Result<(), ParseError> {
1015 /// let url = Url::parse("ftp://rms@example.com")?;
1016 /// assert!(!url.cannot_be_a_base());
1017 ///
1018 /// let url = Url::parse("unix:/run/foo.socket")?;
1019 /// assert!(!url.cannot_be_a_base());
1020 ///
1021 /// let url = Url::parse("data:text/plain,Stuff")?;
1022 /// assert!(url.cannot_be_a_base());
1023 /// # Ok(())
1024 /// # }
1025 /// # run().unwrap();
1026 /// ```
1027 #[inline]
1028 pub fn cannot_be_a_base(&self) -> bool {
1029 !self.slice(self.scheme_end + 1..).starts_with('/')
1030 }
1031
1032 /// Return the username for this URL (typically the empty string)
1033 /// as a percent-encoded ASCII string.
1034 ///
1035 /// # Examples
1036 ///
1037 /// ```
1038 /// use url::Url;
1039 /// # use url::ParseError;
1040 ///
1041 /// # fn run() -> Result<(), ParseError> {
1042 /// let url = Url::parse("ftp://rms@example.com")?;
1043 /// assert_eq!(url.username(), "rms");
1044 ///
1045 /// let url = Url::parse("ftp://:secret123@example.com")?;
1046 /// assert_eq!(url.username(), "");
1047 ///
1048 /// let url = Url::parse("https://example.com")?;
1049 /// assert_eq!(url.username(), "");
1050 /// # Ok(())
1051 /// # }
1052 /// # run().unwrap();
1053 /// ```
1054 pub fn username(&self) -> &str {
1055 let scheme_separator_len = "://".len() as u32;
1056 if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len {
1057 self.slice(self.scheme_end + scheme_separator_len..self.username_end)
1058 } else {
1059 ""
1060 }
1061 }
1062
1063 /// Return the password for this URL, if any, as a percent-encoded ASCII string.
1064 ///
1065 /// # Examples
1066 ///
1067 /// ```
1068 /// use url::Url;
1069 /// # use url::ParseError;
1070 ///
1071 /// # fn run() -> Result<(), ParseError> {
1072 /// let url = Url::parse("ftp://rms:secret123@example.com")?;
1073 /// assert_eq!(url.password(), Some("secret123"));
1074 ///
1075 /// let url = Url::parse("ftp://:secret123@example.com")?;
1076 /// assert_eq!(url.password(), Some("secret123"));
1077 ///
1078 /// let url = Url::parse("ftp://rms@example.com")?;
1079 /// assert_eq!(url.password(), None);
1080 ///
1081 /// let url = Url::parse("https://example.com")?;
1082 /// assert_eq!(url.password(), None);
1083 /// # Ok(())
1084 /// # }
1085 /// # run().unwrap();
1086 /// ```
1087 pub fn password(&self) -> Option<&str> {
1088 // This ':' is not the one marking a port number since a host can not be empty.
1089 // (Except for file: URLs, which do not have port numbers.)
1090 if self.has_authority()
1091 && self.username_end != self.serialization.len() as u32
1092 && self.byte_at(self.username_end) == b':'
1093 {
1094 debug_assert!(self.byte_at(self.host_start - 1) == b'@');
1095 Some(self.slice(self.username_end + 1..self.host_start - 1))
1096 } else {
1097 None
1098 }
1099 }
1100
1101 /// Equivalent to `url.host().is_some()`.
1102 ///
1103 /// # Examples
1104 ///
1105 /// ```
1106 /// use url::Url;
1107 /// # use url::ParseError;
1108 ///
1109 /// # fn run() -> Result<(), ParseError> {
1110 /// let url = Url::parse("ftp://rms@example.com")?;
1111 /// assert!(url.has_host());
1112 ///
1113 /// let url = Url::parse("unix:/run/foo.socket")?;
1114 /// assert!(!url.has_host());
1115 ///
1116 /// let url = Url::parse("data:text/plain,Stuff")?;
1117 /// assert!(!url.has_host());
1118 /// # Ok(())
1119 /// # }
1120 /// # run().unwrap();
1121 /// ```
1122 pub fn has_host(&self) -> bool {
1123 !matches!(self.host, HostInternal::None)
1124 }
1125
1126 /// Return the string representation of the host (domain or IP address) for this URL, if any.
1127 ///
1128 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1129 /// of a special URL, or percent encoded for non-special URLs.
1130 /// IPv6 addresses are given between `[` and `]` brackets.
1131 ///
1132 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1133 /// don’t have a host.
1134 ///
1135 /// See also the `host` method.
1136 ///
1137 /// # Examples
1138 ///
1139 /// ```
1140 /// use url::Url;
1141 /// # use url::ParseError;
1142 ///
1143 /// # fn run() -> Result<(), ParseError> {
1144 /// let url = Url::parse("https://127.0.0.1/index.html")?;
1145 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
1146 ///
1147 /// let url = Url::parse("https://subdomain.example.com")?;
1148 /// assert_eq!(url.host_str(), Some("subdomain.example.com"));
1149 ///
1150 /// let url = Url::parse("ftp://rms@example.com")?;
1151 /// assert_eq!(url.host_str(), Some("example.com"));
1152 ///
1153 /// let url = Url::parse("unix:/run/foo.socket")?;
1154 /// assert_eq!(url.host_str(), None);
1155 ///
1156 /// let url = Url::parse("data:text/plain,Stuff")?;
1157 /// assert_eq!(url.host_str(), None);
1158 /// # Ok(())
1159 /// # }
1160 /// # run().unwrap();
1161 /// ```
1162 pub fn host_str(&self) -> Option<&str> {
1163 if self.has_host() {
1164 Some(self.slice(self.host_start..self.host_end))
1165 } else {
1166 None
1167 }
1168 }
1169
1170 /// Return the parsed representation of the host for this URL.
1171 /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host
1172 /// of a special URL, or percent encoded for non-special URLs.
1173 ///
1174 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1175 /// don’t have a host.
1176 ///
1177 /// See also the `host_str` method.
1178 ///
1179 /// # Examples
1180 ///
1181 /// ```
1182 /// use url::Url;
1183 /// # use url::ParseError;
1184 ///
1185 /// # fn run() -> Result<(), ParseError> {
1186 /// let url = Url::parse("https://127.0.0.1/index.html")?;
1187 /// assert!(url.host().is_some());
1188 ///
1189 /// let url = Url::parse("ftp://rms@example.com")?;
1190 /// assert!(url.host().is_some());
1191 ///
1192 /// let url = Url::parse("unix:/run/foo.socket")?;
1193 /// assert!(url.host().is_none());
1194 ///
1195 /// let url = Url::parse("data:text/plain,Stuff")?;
1196 /// assert!(url.host().is_none());
1197 /// # Ok(())
1198 /// # }
1199 /// # run().unwrap();
1200 /// ```
1201 pub fn host(&self) -> Option<Host<&str>> {
1202 match self.host {
1203 HostInternal::None => None,
1204 HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))),
1205 HostInternal::Ipv4(address) => Some(Host::Ipv4(address)),
1206 HostInternal::Ipv6(address) => Some(Host::Ipv6(address)),
1207 }
1208 }
1209
1210 /// If this URL has a host and it is a domain name (not an IP address), return it.
1211 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1212 /// of a special URL, or percent encoded for non-special URLs.
1213 ///
1214 /// # Examples
1215 ///
1216 /// ```
1217 /// use url::Url;
1218 /// # use url::ParseError;
1219 ///
1220 /// # fn run() -> Result<(), ParseError> {
1221 /// let url = Url::parse("https://127.0.0.1/")?;
1222 /// assert_eq!(url.domain(), None);
1223 ///
1224 /// let url = Url::parse("mailto:rms@example.net")?;
1225 /// assert_eq!(url.domain(), None);
1226 ///
1227 /// let url = Url::parse("https://example.com/")?;
1228 /// assert_eq!(url.domain(), Some("example.com"));
1229 ///
1230 /// let url = Url::parse("https://subdomain.example.com/")?;
1231 /// assert_eq!(url.domain(), Some("subdomain.example.com"));
1232 ///
1233 /// # Ok(())
1234 /// # }
1235 /// # run().unwrap();
1236 /// ```
1237 pub fn domain(&self) -> Option<&str> {
1238 match self.host {
1239 HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)),
1240 _ => None,
1241 }
1242 }
1243
1244 /// Return the port number for this URL, if any.
1245 ///
1246 /// Note that default port numbers are never reflected by the serialization,
1247 /// use the `port_or_known_default()` method if you want a default port number returned.
1248 ///
1249 /// # Examples
1250 ///
1251 /// ```
1252 /// use url::Url;
1253 /// # use url::ParseError;
1254 ///
1255 /// # fn run() -> Result<(), ParseError> {
1256 /// let url = Url::parse("https://example.com")?;
1257 /// assert_eq!(url.port(), None);
1258 ///
1259 /// let url = Url::parse("https://example.com:443/")?;
1260 /// assert_eq!(url.port(), None);
1261 ///
1262 /// let url = Url::parse("ssh://example.com:22")?;
1263 /// assert_eq!(url.port(), Some(22));
1264 /// # Ok(())
1265 /// # }
1266 /// # run().unwrap();
1267 /// ```
1268 #[inline]
1269 pub fn port(&self) -> Option<u16> {
1270 self.port
1271 }
1272
1273 /// Return the port number for this URL, or the default port number if it is known.
1274 ///
1275 /// This method only knows the default port number
1276 /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes.
1277 ///
1278 /// For URLs in these schemes, this method always returns `Some(_)`.
1279 /// For other schemes, it is the same as `Url::port()`.
1280 ///
1281 /// # Examples
1282 ///
1283 /// ```
1284 /// use url::Url;
1285 /// # use url::ParseError;
1286 ///
1287 /// # fn run() -> Result<(), ParseError> {
1288 /// let url = Url::parse("foo://example.com")?;
1289 /// assert_eq!(url.port_or_known_default(), None);
1290 ///
1291 /// let url = Url::parse("foo://example.com:1456")?;
1292 /// assert_eq!(url.port_or_known_default(), Some(1456));
1293 ///
1294 /// let url = Url::parse("https://example.com")?;
1295 /// assert_eq!(url.port_or_known_default(), Some(443));
1296 /// # Ok(())
1297 /// # }
1298 /// # run().unwrap();
1299 /// ```
1300 #[inline]
1301 pub fn port_or_known_default(&self) -> Option<u16> {
1302 self.port.or_else(|| parser::default_port(self.scheme()))
1303 }
1304
1305 /// Resolve a URL’s host and port number to `SocketAddr`.
1306 ///
1307 /// If the URL has the default port number of a scheme that is unknown to this library,
1308 /// `default_port_number` provides an opportunity to provide the actual port number.
1309 /// In non-example code this should be implemented either simply as `|| None`,
1310 /// or by matching on the URL’s `.scheme()`.
1311 ///
1312 /// If the host is a domain, it is resolved using the standard library’s DNS support.
1313 ///
1314 /// # Examples
1315 ///
1316 /// ```no_run
1317 /// let url = url::Url::parse("https://example.net/").unwrap();
1318 /// let addrs = url.socket_addrs(|| None).unwrap();
1319 /// std::net::TcpStream::connect(&*addrs)
1320 /// # ;
1321 /// ```
1322 ///
1323 /// ```
1324 /// /// With application-specific known default port numbers
1325 /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> {
1326 /// url.socket_addrs(|| match url.scheme() {
1327 /// "socks5" | "socks5h" => Some(1080),
1328 /// _ => None,
1329 /// })
1330 /// }
1331 /// ```
1332 #[cfg(feature = "std")]
1333 #[cfg(any(
1334 unix,
1335 windows,
1336 target_os = "redox",
1337 target_os = "wasi",
1338 target_os = "hermit"
1339 ))]
1340 pub fn socket_addrs(
1341 &self,
1342 default_port_number: impl Fn() -> Option<u16>,
1343 ) -> io::Result<alloc::vec::Vec<SocketAddr>> {
1344 // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>`
1345 // causes borrowck issues because the return value borrows `default_port_number`:
1346 //
1347 // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters
1348 //
1349 // > This RFC proposes that *all* type parameters are considered in scope
1350 // > for `impl Trait` in return position
1351
1352 fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> {
1353 opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message))
1354 }
1355
1356 let host = io_result(self.host(), "No host name in the URL")?;
1357 let port = io_result(
1358 self.port_or_known_default().or_else(default_port_number),
1359 "No port number in the URL",
1360 )?;
1361 Ok(match host {
1362 Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(),
1363 Host::Ipv4(ip) => vec![(ip, port).into()],
1364 Host::Ipv6(ip) => vec![(ip, port).into()],
1365 })
1366 }
1367
1368 /// Return the path for this URL, as a percent-encoded ASCII string.
1369 /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'.
1370 /// For other URLs, this starts with a '/' slash
1371 /// and continues with slash-separated path segments.
1372 ///
1373 /// # Examples
1374 ///
1375 /// ```rust
1376 /// use url::{Url, ParseError};
1377 ///
1378 /// # fn run() -> Result<(), ParseError> {
1379 /// let url = Url::parse("https://example.com/api/versions?page=2")?;
1380 /// assert_eq!(url.path(), "/api/versions");
1381 ///
1382 /// let url = Url::parse("https://example.com")?;
1383 /// assert_eq!(url.path(), "/");
1384 ///
1385 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1386 /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam");
1387 /// # Ok(())
1388 /// # }
1389 /// # run().unwrap();
1390 /// ```
1391 pub fn path(&self) -> &str {
1392 match (self.query_start, self.fragment_start) {
1393 (None, None) => self.slice(self.path_start..),
1394 (Some(next_component_start), _) | (None, Some(next_component_start)) => {
1395 self.slice(self.path_start..next_component_start)
1396 }
1397 }
1398 }
1399
1400 /// Unless this URL is cannot-be-a-base,
1401 /// return an iterator of '/' slash-separated path segments,
1402 /// each as a percent-encoded ASCII string.
1403 ///
1404 /// Return `None` for cannot-be-a-base URLs.
1405 ///
1406 /// When `Some` is returned, the iterator always contains at least one string
1407 /// (which may be empty).
1408 ///
1409 /// # Examples
1410 ///
1411 /// ```
1412 /// use url::Url;
1413 ///
1414 /// # #[cfg(feature = "std")]
1415 /// # use std::error::Error;
1416 /// # #[cfg(not(feature = "std"))]
1417 /// # use core::error::Error;
1418 ///
1419 /// # fn run() -> Result<(), Box<dyn Error>> {
1420 /// let url = Url::parse("https://example.com/foo/bar")?;
1421 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1422 /// assert_eq!(path_segments.next(), Some("foo"));
1423 /// assert_eq!(path_segments.next(), Some("bar"));
1424 /// assert_eq!(path_segments.next(), None);
1425 ///
1426 /// let url = Url::parse("https://example.com")?;
1427 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1428 /// assert_eq!(path_segments.next(), Some(""));
1429 /// assert_eq!(path_segments.next(), None);
1430 ///
1431 /// let url = Url::parse("data:text/plain,HelloWorld")?;
1432 /// assert!(url.path_segments().is_none());
1433 ///
1434 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1435 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1436 /// assert_eq!(path_segments.next(), Some("countries"));
1437 /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam"));
1438 /// # Ok(())
1439 /// # }
1440 /// # run().unwrap();
1441 /// ```
1442 pub fn path_segments(&self) -> Option<str::Split<'_, char>> {
1443 let path = self.path();
1444 path.strip_prefix('/').map(|remainder| remainder.split('/'))
1445 }
1446
1447 /// Return this URL’s query string, if any, as a percent-encoded ASCII string.
1448 ///
1449 /// # Examples
1450 ///
1451 /// ```rust
1452 /// use url::Url;
1453 /// # use url::ParseError;
1454 ///
1455 /// fn run() -> Result<(), ParseError> {
1456 /// let url = Url::parse("https://example.com/products?page=2")?;
1457 /// let query = url.query();
1458 /// assert_eq!(query, Some("page=2"));
1459 ///
1460 /// let url = Url::parse("https://example.com/products")?;
1461 /// let query = url.query();
1462 /// assert!(query.is_none());
1463 ///
1464 /// let url = Url::parse("https://example.com/?country=español")?;
1465 /// let query = url.query();
1466 /// assert_eq!(query, Some("country=espa%C3%B1ol"));
1467 /// # Ok(())
1468 /// # }
1469 /// # run().unwrap();
1470 /// ```
1471 pub fn query(&self) -> Option<&str> {
1472 match (self.query_start, self.fragment_start) {
1473 (None, _) => None,
1474 (Some(query_start), None) => {
1475 debug_assert!(self.byte_at(query_start) == b'?');
1476 Some(self.slice(query_start + 1..))
1477 }
1478 (Some(query_start), Some(fragment_start)) => {
1479 debug_assert!(self.byte_at(query_start) == b'?');
1480 Some(self.slice(query_start + 1..fragment_start))
1481 }
1482 }
1483 }
1484
1485 /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded`
1486 /// and return an iterator of (key, value) pairs.
1487 ///
1488 /// # Examples
1489 ///
1490 /// ```rust
1491 /// use std::borrow::Cow;
1492 ///
1493 /// use url::Url;
1494 /// # use url::ParseError;
1495 ///
1496 /// # fn run() -> Result<(), ParseError> {
1497 /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
1498 /// let mut pairs = url.query_pairs();
1499 ///
1500 /// assert_eq!(pairs.count(), 2);
1501 ///
1502 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
1503 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
1504 /// # Ok(())
1505 /// # }
1506 /// # run().unwrap();
1507 /// ```
1508 #[inline]
1509 pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> {
1510 form_urlencoded::parse(self.query().unwrap_or("").as_bytes())
1511 }
1512
1513 /// Return this URL’s fragment identifier, if any.
1514 ///
1515 /// A fragment is the part of the URL after the `#` symbol.
1516 /// The fragment is optional and, if present, contains a fragment identifier
1517 /// that identifies a secondary resource, such as a section heading
1518 /// of a document.
1519 ///
1520 /// In HTML, the fragment identifier is usually the id attribute of a an element
1521 /// that is scrolled to on load. Browsers typically will not send the fragment portion
1522 /// of a URL to the server.
1523 ///
1524 /// **Note:** the parser did *not* percent-encode this component,
1525 /// but the input may have been percent-encoded already.
1526 ///
1527 /// # Examples
1528 ///
1529 /// ```rust
1530 /// use url::Url;
1531 /// # use url::ParseError;
1532 ///
1533 /// # fn run() -> Result<(), ParseError> {
1534 /// let url = Url::parse("https://example.com/data.csv#row=4")?;
1535 ///
1536 /// assert_eq!(url.fragment(), Some("row=4"));
1537 ///
1538 /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?;
1539 ///
1540 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1541 /// # Ok(())
1542 /// # }
1543 /// # run().unwrap();
1544 /// ```
1545 pub fn fragment(&self) -> Option<&str> {
1546 self.fragment_start.map(|start| {
1547 debug_assert!(self.byte_at(start) == b'#');
1548 self.slice(start + 1..)
1549 })
1550 }
1551
1552 fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R {
1553 let mut parser = Parser::for_setter(mem::take(&mut self.serialization));
1554 let result = f(&mut parser);
1555 self.serialization = parser.serialization;
1556 result
1557 }
1558
1559 /// Change this URL’s fragment identifier.
1560 ///
1561 /// # Examples
1562 ///
1563 /// ```rust
1564 /// use url::Url;
1565 /// # use url::ParseError;
1566 ///
1567 /// # fn run() -> Result<(), ParseError> {
1568 /// let mut url = Url::parse("https://example.com/data.csv")?;
1569 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1570 ///
1571 /// url.set_fragment(Some("cell=4,1-6,2"));
1572 /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2");
1573 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1574 ///
1575 /// url.set_fragment(None);
1576 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1577 /// assert!(url.fragment().is_none());
1578 /// # Ok(())
1579 /// # }
1580 /// # run().unwrap();
1581 /// ```
1582 pub fn set_fragment(&mut self, fragment: Option<&str>) {
1583 // Remove any previous fragment
1584 if let Some(start) = self.fragment_start {
1585 debug_assert!(self.byte_at(start) == b'#');
1586 self.serialization.truncate(start as usize);
1587 }
1588 // Write the new one
1589 if let Some(input) = fragment {
1590 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1591 self.serialization.push('#');
1592 self.mutate(|parser| parser.parse_fragment(parser::Input::new_no_trim(input)))
1593 } else {
1594 self.fragment_start = None;
1595 self.strip_trailing_spaces_from_opaque_path();
1596 }
1597 }
1598
1599 fn take_fragment(&mut self) -> Option<String> {
1600 self.fragment_start.take().map(|start| {
1601 debug_assert!(self.byte_at(start) == b'#');
1602 let fragment = self.slice(start + 1..).to_owned();
1603 self.serialization.truncate(start as usize);
1604 fragment
1605 })
1606 }
1607
1608 fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) {
1609 if let Some(ref fragment) = fragment {
1610 assert!(self.fragment_start.is_none());
1611 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1612 self.serialization.push('#');
1613 self.serialization.push_str(fragment);
1614 }
1615 }
1616
1617 /// Change this URL’s query string. If `query` is `None`, this URL's
1618 /// query string will be cleared.
1619 ///
1620 /// # Examples
1621 ///
1622 /// ```rust
1623 /// use url::Url;
1624 /// # use url::ParseError;
1625 ///
1626 /// # fn run() -> Result<(), ParseError> {
1627 /// let mut url = Url::parse("https://example.com/products")?;
1628 /// assert_eq!(url.as_str(), "https://example.com/products");
1629 ///
1630 /// url.set_query(Some("page=2"));
1631 /// assert_eq!(url.as_str(), "https://example.com/products?page=2");
1632 /// assert_eq!(url.query(), Some("page=2"));
1633 /// # Ok(())
1634 /// # }
1635 /// # run().unwrap();
1636 /// ```
1637 pub fn set_query(&mut self, query: Option<&str>) {
1638 let fragment = self.take_fragment();
1639
1640 // Remove any previous query
1641 if let Some(start) = self.query_start.take() {
1642 debug_assert!(self.byte_at(start) == b'?');
1643 self.serialization.truncate(start as usize);
1644 }
1645 // Write the new query, if any
1646 if let Some(input) = query {
1647 self.query_start = Some(to_u32(self.serialization.len()).unwrap());
1648 self.serialization.push('?');
1649 let scheme_type = SchemeType::from(self.scheme());
1650 let scheme_end = self.scheme_end;
1651 self.mutate(|parser| {
1652 let vfn = parser.violation_fn;
1653 parser.parse_query(
1654 scheme_type,
1655 scheme_end,
1656 parser::Input::new_trim_tab_and_newlines(input, vfn),
1657 )
1658 });
1659 } else {
1660 self.query_start = None;
1661 if fragment.is_none() {
1662 self.strip_trailing_spaces_from_opaque_path();
1663 }
1664 }
1665
1666 self.restore_already_parsed_fragment(fragment);
1667 }
1668
1669 /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs
1670 /// in `application/x-www-form-urlencoded` syntax.
1671 ///
1672 /// The return value has a method-chaining API:
1673 ///
1674 /// ```rust
1675 /// # use url::{Url, ParseError};
1676 ///
1677 /// # fn run() -> Result<(), ParseError> {
1678 /// let mut url = Url::parse("https://example.net?lang=fr#nav")?;
1679 /// assert_eq!(url.query(), Some("lang=fr"));
1680 ///
1681 /// url.query_pairs_mut().append_pair("foo", "bar");
1682 /// assert_eq!(url.query(), Some("lang=fr&foo=bar"));
1683 /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav");
1684 ///
1685 /// url.query_pairs_mut()
1686 /// .clear()
1687 /// .append_pair("foo", "bar & baz")
1688 /// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
1689 /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
1690 /// assert_eq!(url.as_str(),
1691 /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
1692 /// # Ok(())
1693 /// # }
1694 /// # run().unwrap();
1695 /// ```
1696 ///
1697 /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`,
1698 /// not `url.set_query(None)`.
1699 ///
1700 /// The state of `Url` is unspecified if this return value is leaked without being dropped.
1701 pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> {
1702 let fragment = self.take_fragment();
1703
1704 let query_start;
1705 if let Some(start) = self.query_start {
1706 debug_assert!(self.byte_at(start) == b'?');
1707 query_start = start as usize;
1708 } else {
1709 query_start = self.serialization.len();
1710 self.query_start = Some(to_u32(query_start).unwrap());
1711 self.serialization.push('?');
1712 }
1713
1714 let query = UrlQuery {
1715 url: Some(self),
1716 fragment,
1717 };
1718 form_urlencoded::Serializer::for_suffix(query, query_start + "?".len())
1719 }
1720
1721 fn take_after_path(&mut self) -> String {
1722 match (self.query_start, self.fragment_start) {
1723 (Some(i), _) | (None, Some(i)) => {
1724 let after_path = self.slice(i..).to_owned();
1725 self.serialization.truncate(i as usize);
1726 after_path
1727 }
1728 (None, None) => String::new(),
1729 }
1730 }
1731
1732 /// Change this URL’s path.
1733 ///
1734 /// # Examples
1735 ///
1736 /// ```rust
1737 /// use url::Url;
1738 /// # use url::ParseError;
1739 ///
1740 /// # fn run() -> Result<(), ParseError> {
1741 /// let mut url = Url::parse("https://example.com")?;
1742 /// url.set_path("api/comments");
1743 /// assert_eq!(url.as_str(), "https://example.com/api/comments");
1744 /// assert_eq!(url.path(), "/api/comments");
1745 ///
1746 /// let mut url = Url::parse("https://example.com/api")?;
1747 /// url.set_path("data/report.csv");
1748 /// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
1749 /// assert_eq!(url.path(), "/data/report.csv");
1750 ///
1751 /// // `set_path` percent-encodes the given string if it's not already percent-encoded.
1752 /// let mut url = Url::parse("https://example.com")?;
1753 /// url.set_path("api/some comments");
1754 /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1755 /// assert_eq!(url.path(), "/api/some%20comments");
1756 ///
1757 /// // `set_path` will not double percent-encode the string if it's already percent-encoded.
1758 /// let mut url = Url::parse("https://example.com")?;
1759 /// url.set_path("api/some%20comments");
1760 /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1761 /// assert_eq!(url.path(), "/api/some%20comments");
1762 ///
1763 /// # Ok(())
1764 /// # }
1765 /// # run().unwrap();
1766 /// ```
1767 pub fn set_path(&mut self, mut path: &str) {
1768 let after_path = self.take_after_path();
1769 let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
1770 let cannot_be_a_base = self.cannot_be_a_base();
1771 let scheme_type = SchemeType::from(self.scheme());
1772 self.serialization.truncate(self.path_start as usize);
1773 self.mutate(|parser| {
1774 if cannot_be_a_base {
1775 if path.starts_with('/') {
1776 parser.serialization.push_str("%2F");
1777 path = &path[1..];
1778 }
1779 parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path));
1780 } else {
1781 let mut has_host = true; // FIXME
1782 parser.parse_path_start(
1783 scheme_type,
1784 &mut has_host,
1785 parser::Input::new_no_trim(path),
1786 );
1787 }
1788 });
1789 self.restore_after_path(old_after_path_pos, &after_path);
1790 }
1791
1792 /// Return an object with methods to manipulate this URL’s path segments.
1793 ///
1794 /// Return `Err(())` if this URL is cannot-be-a-base.
1795 #[allow(clippy::result_unit_err)]
1796 pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> {
1797 if self.cannot_be_a_base() {
1798 Err(())
1799 } else {
1800 Ok(path_segments::new(self))
1801 }
1802 }
1803
1804 fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) {
1805 let new_after_path_position = to_u32(self.serialization.len()).unwrap();
1806 let adjust = |index: &mut u32| {
1807 *index -= old_after_path_position;
1808 *index += new_after_path_position;
1809 };
1810 if let Some(ref mut index) = self.query_start {
1811 adjust(index)
1812 }
1813 if let Some(ref mut index) = self.fragment_start {
1814 adjust(index)
1815 }
1816 self.serialization.push_str(after_path)
1817 }
1818
1819 /// Change this URL’s port number.
1820 ///
1821 /// Note that default port numbers are not reflected in the serialization.
1822 ///
1823 /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme;
1824 /// do nothing and return `Err`.
1825 ///
1826 /// # Examples
1827 ///
1828 /// ```
1829 /// use url::Url;
1830 ///
1831 /// # #[cfg(feature = "std")]
1832 /// # use std::error::Error;
1833 /// # #[cfg(not(feature = "std"))]
1834 /// # use core::error::Error;
1835 ///
1836 /// # fn run() -> Result<(), Box<dyn Error>> {
1837 /// let mut url = Url::parse("ssh://example.net:2048/")?;
1838 ///
1839 /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?;
1840 /// assert_eq!(url.as_str(), "ssh://example.net:4096/");
1841 ///
1842 /// url.set_port(None).map_err(|_| "cannot be base")?;
1843 /// assert_eq!(url.as_str(), "ssh://example.net/");
1844 /// # Ok(())
1845 /// # }
1846 /// # run().unwrap();
1847 /// ```
1848 ///
1849 /// Known default port numbers are not reflected:
1850 ///
1851 /// ```rust
1852 /// use url::Url;
1853 ///
1854 /// # #[cfg(feature = "std")]
1855 /// # use std::error::Error;
1856 /// # #[cfg(not(feature = "std"))]
1857 /// # use core::error::Error;
1858 ///
1859 /// # fn run() -> Result<(), Box<dyn Error>> {
1860 /// let mut url = Url::parse("https://example.org/")?;
1861 ///
1862 /// url.set_port(Some(443)).map_err(|_| "cannot be base")?;
1863 /// assert!(url.port().is_none());
1864 /// # Ok(())
1865 /// # }
1866 /// # run().unwrap();
1867 /// ```
1868 ///
1869 /// Cannot set port for cannot-be-a-base URLs:
1870 ///
1871 /// ```
1872 /// use url::Url;
1873 /// # use url::ParseError;
1874 ///
1875 /// # fn run() -> Result<(), ParseError> {
1876 /// let mut url = Url::parse("mailto:rms@example.net")?;
1877 ///
1878 /// let result = url.set_port(Some(80));
1879 /// assert!(result.is_err());
1880 ///
1881 /// let result = url.set_port(None);
1882 /// assert!(result.is_err());
1883 /// # Ok(())
1884 /// # }
1885 /// # run().unwrap();
1886 /// ```
1887 #[allow(clippy::result_unit_err)]
1888 pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> {
1889 // has_host implies !cannot_be_a_base
1890 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1891 return Err(());
1892 }
1893 if port.is_some() && port == parser::default_port(self.scheme()) {
1894 port = None
1895 }
1896 self.set_port_internal(port);
1897 Ok(())
1898 }
1899
1900 fn set_port_internal(&mut self, port: Option<u16>) {
1901 match (self.port, port) {
1902 (None, None) => {}
1903 (Some(_), None) => {
1904 self.serialization
1905 .drain(self.host_end as usize..self.path_start as usize);
1906 let offset = self.path_start - self.host_end;
1907 self.path_start = self.host_end;
1908 if let Some(ref mut index) = self.query_start {
1909 *index -= offset
1910 }
1911 if let Some(ref mut index) = self.fragment_start {
1912 *index -= offset
1913 }
1914 }
1915 (Some(old), Some(new)) if old == new => {}
1916 (_, Some(new)) => {
1917 let path_and_after = self.slice(self.path_start..).to_owned();
1918 self.serialization.truncate(self.host_end as usize);
1919 write!(&mut self.serialization, ":{new}").unwrap();
1920 let old_path_start = self.path_start;
1921 let new_path_start = to_u32(self.serialization.len()).unwrap();
1922 self.path_start = new_path_start;
1923 let adjust = |index: &mut u32| {
1924 *index -= old_path_start;
1925 *index += new_path_start;
1926 };
1927 if let Some(ref mut index) = self.query_start {
1928 adjust(index)
1929 }
1930 if let Some(ref mut index) = self.fragment_start {
1931 adjust(index)
1932 }
1933 self.serialization.push_str(&path_and_after);
1934 }
1935 }
1936 self.port = port;
1937 }
1938
1939 /// Change this URL’s host.
1940 ///
1941 /// Removing the host (calling this with `None`)
1942 /// will also remove any username, password, and port number.
1943 ///
1944 /// # Examples
1945 ///
1946 /// Change host:
1947 ///
1948 /// ```
1949 /// use url::Url;
1950 /// # use url::ParseError;
1951 ///
1952 /// # fn run() -> Result<(), ParseError> {
1953 /// let mut url = Url::parse("https://example.net")?;
1954 /// let result = url.set_host(Some("rust-lang.org"));
1955 /// assert!(result.is_ok());
1956 /// assert_eq!(url.as_str(), "https://rust-lang.org/");
1957 /// # Ok(())
1958 /// # }
1959 /// # run().unwrap();
1960 /// ```
1961 ///
1962 /// Remove host:
1963 ///
1964 /// ```
1965 /// use url::Url;
1966 /// # use url::ParseError;
1967 ///
1968 /// # fn run() -> Result<(), ParseError> {
1969 /// let mut url = Url::parse("foo://example.net")?;
1970 /// let result = url.set_host(None);
1971 /// assert!(result.is_ok());
1972 /// assert_eq!(url.as_str(), "foo:/");
1973 /// # Ok(())
1974 /// # }
1975 /// # run().unwrap();
1976 /// ```
1977 ///
1978 /// Cannot remove host for 'special' schemes (e.g. `http`):
1979 ///
1980 /// ```
1981 /// use url::Url;
1982 /// # use url::ParseError;
1983 ///
1984 /// # fn run() -> Result<(), ParseError> {
1985 /// let mut url = Url::parse("https://example.net")?;
1986 /// let result = url.set_host(None);
1987 /// assert!(result.is_err());
1988 /// assert_eq!(url.as_str(), "https://example.net/");
1989 /// # Ok(())
1990 /// # }
1991 /// # run().unwrap();
1992 /// ```
1993 ///
1994 /// Cannot change or remove host for cannot-be-a-base URLs:
1995 ///
1996 /// ```
1997 /// use url::Url;
1998 /// # use url::ParseError;
1999 ///
2000 /// # fn run() -> Result<(), ParseError> {
2001 /// let mut url = Url::parse("mailto:rms@example.net")?;
2002 ///
2003 /// let result = url.set_host(Some("rust-lang.org"));
2004 /// assert!(result.is_err());
2005 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2006 ///
2007 /// let result = url.set_host(None);
2008 /// assert!(result.is_err());
2009 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2010 /// # Ok(())
2011 /// # }
2012 /// # run().unwrap();
2013 /// ```
2014 ///
2015 /// # Errors
2016 ///
2017 /// If this URL is cannot-be-a-base or there is an error parsing the given `host`,
2018 /// a [`ParseError`] variant will be returned.
2019 ///
2020 /// [`ParseError`]: enum.ParseError.html
2021 pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
2022 if self.cannot_be_a_base() {
2023 return Err(ParseError::SetHostOnCannotBeABaseUrl);
2024 }
2025
2026 let scheme_type = SchemeType::from(self.scheme());
2027
2028 if let Some(host) = host {
2029 if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() {
2030 return Err(ParseError::EmptyHost);
2031 }
2032 let mut host_substr = host;
2033 // Otherwise, if c is U+003A (:) and the [] flag is unset, then
2034 if !host.starts_with('[') || !host.ends_with(']') {
2035 match host.find(':') {
2036 Some(0) => {
2037 // If buffer is the empty string, validation error, return failure.
2038 return Err(ParseError::InvalidDomainCharacter);
2039 }
2040 // Let host be the result of host parsing buffer
2041 Some(colon_index) => {
2042 host_substr = &host[..colon_index];
2043 }
2044 None => {}
2045 }
2046 }
2047 if SchemeType::from(self.scheme()).is_special() {
2048 self.set_host_internal(Host::parse_cow(host_substr.into())?, None);
2049 } else {
2050 self.set_host_internal(Host::parse_opaque_cow(host_substr.into())?, None);
2051 }
2052 } else if self.has_host() {
2053 if scheme_type.is_special() && !scheme_type.is_file() {
2054 return Err(ParseError::EmptyHost);
2055 } else if self.serialization.len() == self.path_start as usize {
2056 self.serialization.push('/');
2057 }
2058 debug_assert!(self.byte_at(self.scheme_end) == b':');
2059 debug_assert!(self.byte_at(self.path_start) == b'/');
2060
2061 let new_path_start = if scheme_type.is_file() {
2062 self.scheme_end + 3
2063 } else {
2064 self.scheme_end + 1
2065 };
2066
2067 self.serialization
2068 .drain(new_path_start as usize..self.path_start as usize);
2069 let offset = self.path_start - new_path_start;
2070 self.path_start = new_path_start;
2071 self.username_end = new_path_start;
2072 self.host_start = new_path_start;
2073 self.host_end = new_path_start;
2074 self.port = None;
2075 if let Some(ref mut index) = self.query_start {
2076 *index -= offset
2077 }
2078 if let Some(ref mut index) = self.fragment_start {
2079 *index -= offset
2080 }
2081 }
2082 Ok(())
2083 }
2084
2085 /// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
2086 fn set_host_internal(&mut self, host: Host<Cow<'_, str>>, opt_new_port: Option<Option<u16>>) {
2087 let old_suffix_pos = if opt_new_port.is_some() {
2088 self.path_start
2089 } else {
2090 self.host_end
2091 };
2092 let suffix = self.slice(old_suffix_pos..).to_owned();
2093 self.serialization.truncate(self.host_start as usize);
2094 if !self.has_authority() {
2095 debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
2096 debug_assert!(self.username_end == self.host_start);
2097 self.serialization.push('/');
2098 self.serialization.push('/');
2099 self.username_end += 2;
2100 self.host_start += 2;
2101 }
2102 write!(&mut self.serialization, "{host}").unwrap();
2103 self.host_end = to_u32(self.serialization.len()).unwrap();
2104 self.host = host.into();
2105
2106 if let Some(new_port) = opt_new_port {
2107 self.port = new_port;
2108 if let Some(port) = new_port {
2109 write!(&mut self.serialization, ":{port}").unwrap();
2110 }
2111 }
2112 let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
2113 self.serialization.push_str(&suffix);
2114
2115 let adjust = |index: &mut u32| {
2116 *index -= old_suffix_pos;
2117 *index += new_suffix_pos;
2118 };
2119 adjust(&mut self.path_start);
2120 if let Some(ref mut index) = self.query_start {
2121 adjust(index)
2122 }
2123 if let Some(ref mut index) = self.fragment_start {
2124 adjust(index)
2125 }
2126 }
2127
2128 /// Change this URL’s host to the given IP address.
2129 ///
2130 /// If this URL is cannot-be-a-base, do nothing and return `Err`.
2131 ///
2132 /// Compared to `Url::set_host`, this skips the host parser.
2133 ///
2134 /// # Examples
2135 ///
2136 /// ```rust
2137 /// use url::{Url, ParseError};
2138 ///
2139 /// # fn run() -> Result<(), ParseError> {
2140 /// let mut url = Url::parse("http://example.com")?;
2141 /// url.set_ip_host("127.0.0.1".parse().unwrap());
2142 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
2143 /// assert_eq!(url.as_str(), "http://127.0.0.1/");
2144 /// # Ok(())
2145 /// # }
2146 /// # run().unwrap();
2147 /// ```
2148 ///
2149 /// Cannot change URL's from mailto(cannot-be-base) to ip:
2150 ///
2151 /// ```rust
2152 /// use url::{Url, ParseError};
2153 ///
2154 /// # fn run() -> Result<(), ParseError> {
2155 /// let mut url = Url::parse("mailto:rms@example.com")?;
2156 /// let result = url.set_ip_host("127.0.0.1".parse().unwrap());
2157 ///
2158 /// assert_eq!(url.as_str(), "mailto:rms@example.com");
2159 /// assert!(result.is_err());
2160 /// # Ok(())
2161 /// # }
2162 /// # run().unwrap();
2163 /// ```
2164 ///
2165 #[allow(clippy::result_unit_err)]
2166 pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> {
2167 if self.cannot_be_a_base() {
2168 return Err(());
2169 }
2170
2171 let address = match address {
2172 IpAddr::V4(address) => Host::Ipv4(address),
2173 IpAddr::V6(address) => Host::Ipv6(address),
2174 };
2175 self.set_host_internal(address, None);
2176 Ok(())
2177 }
2178
2179 /// Change this URL’s password.
2180 ///
2181 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2182 ///
2183 /// # Examples
2184 ///
2185 /// ```rust
2186 /// use url::{Url, ParseError};
2187 ///
2188 /// # fn run() -> Result<(), ParseError> {
2189 /// let mut url = Url::parse("mailto:rmz@example.com")?;
2190 /// let result = url.set_password(Some("secret_password"));
2191 /// assert!(result.is_err());
2192 ///
2193 /// let mut url = Url::parse("ftp://user1:secret1@example.com")?;
2194 /// let result = url.set_password(Some("secret_password"));
2195 /// assert_eq!(url.password(), Some("secret_password"));
2196 ///
2197 /// let mut url = Url::parse("ftp://user2:@example.com")?;
2198 /// let result = url.set_password(Some("secret2"));
2199 /// assert!(result.is_ok());
2200 /// assert_eq!(url.password(), Some("secret2"));
2201 /// # Ok(())
2202 /// # }
2203 /// # run().unwrap();
2204 /// ```
2205 #[allow(clippy::result_unit_err)]
2206 pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> {
2207 // has_host implies !cannot_be_a_base
2208 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2209 return Err(());
2210 }
2211 let password = password.unwrap_or_default();
2212 if !password.is_empty() {
2213 let host_and_after = self.slice(self.host_start..).to_owned();
2214 self.serialization.truncate(self.username_end as usize);
2215 self.serialization.push(':');
2216 self.serialization
2217 .extend(utf8_percent_encode(password, USERINFO));
2218 self.serialization.push('@');
2219
2220 let old_host_start = self.host_start;
2221 let new_host_start = to_u32(self.serialization.len()).unwrap();
2222 let adjust = |index: &mut u32| {
2223 *index -= old_host_start;
2224 *index += new_host_start;
2225 };
2226 self.host_start = new_host_start;
2227 adjust(&mut self.host_end);
2228 adjust(&mut self.path_start);
2229 if let Some(ref mut index) = self.query_start {
2230 adjust(index)
2231 }
2232 if let Some(ref mut index) = self.fragment_start {
2233 adjust(index)
2234 }
2235
2236 self.serialization.push_str(&host_and_after);
2237 } else if self.byte_at(self.username_end) == b':' {
2238 // If there is a password to remove
2239 let has_username_or_password = self.byte_at(self.host_start - 1) == b'@';
2240 debug_assert!(has_username_or_password);
2241 let username_start = self.scheme_end + 3;
2242 let empty_username = username_start == self.username_end;
2243 let start = self.username_end; // Remove the ':'
2244 let end = if empty_username {
2245 self.host_start // Remove the '@' as well
2246 } else {
2247 self.host_start - 1 // Keep the '@' to separate the username from the host
2248 };
2249 self.serialization.drain(start as usize..end as usize);
2250 let offset = end - start;
2251 self.host_start -= offset;
2252 self.host_end -= offset;
2253 self.path_start -= offset;
2254 if let Some(ref mut index) = self.query_start {
2255 *index -= offset
2256 }
2257 if let Some(ref mut index) = self.fragment_start {
2258 *index -= offset
2259 }
2260 }
2261 Ok(())
2262 }
2263
2264 /// Change this URL’s username.
2265 ///
2266 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2267 /// # Examples
2268 ///
2269 /// Cannot setup username from mailto(cannot-be-base)
2270 ///
2271 /// ```rust
2272 /// use url::{Url, ParseError};
2273 ///
2274 /// # fn run() -> Result<(), ParseError> {
2275 /// let mut url = Url::parse("mailto:rmz@example.com")?;
2276 /// let result = url.set_username("user1");
2277 /// assert_eq!(url.as_str(), "mailto:rmz@example.com");
2278 /// assert!(result.is_err());
2279 /// # Ok(())
2280 /// # }
2281 /// # run().unwrap();
2282 /// ```
2283 ///
2284 /// Setup username to user1
2285 ///
2286 /// ```rust
2287 /// use url::{Url, ParseError};
2288 ///
2289 /// # fn run() -> Result<(), ParseError> {
2290 /// let mut url = Url::parse("ftp://:secre1@example.com/")?;
2291 /// let result = url.set_username("user1");
2292 /// assert!(result.is_ok());
2293 /// assert_eq!(url.username(), "user1");
2294 /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/");
2295 /// # Ok(())
2296 /// # }
2297 /// # run().unwrap();
2298 /// ```
2299 #[allow(clippy::result_unit_err)]
2300 pub fn set_username(&mut self, username: &str) -> Result<(), ()> {
2301 // has_host implies !cannot_be_a_base
2302 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2303 return Err(());
2304 }
2305 let username_start = self.scheme_end + 3;
2306 debug_assert!(self.slice(self.scheme_end..username_start) == "://");
2307 if self.slice(username_start..self.username_end) == username {
2308 return Ok(());
2309 }
2310 let after_username = self.slice(self.username_end..).to_owned();
2311 self.serialization.truncate(username_start as usize);
2312 self.serialization
2313 .extend(utf8_percent_encode(username, USERINFO));
2314
2315 let mut removed_bytes = self.username_end;
2316 self.username_end = to_u32(self.serialization.len()).unwrap();
2317 let mut added_bytes = self.username_end;
2318
2319 let new_username_is_empty = self.username_end == username_start;
2320 match (new_username_is_empty, after_username.chars().next()) {
2321 (true, Some('@')) => {
2322 removed_bytes += 1;
2323 self.serialization.push_str(&after_username[1..]);
2324 }
2325 (false, Some('@')) | (_, Some(':')) | (true, _) => {
2326 self.serialization.push_str(&after_username);
2327 }
2328 (false, _) => {
2329 added_bytes += 1;
2330 self.serialization.push('@');
2331 self.serialization.push_str(&after_username);
2332 }
2333 }
2334
2335 let adjust = |index: &mut u32| {
2336 *index -= removed_bytes;
2337 *index += added_bytes;
2338 };
2339 adjust(&mut self.host_start);
2340 adjust(&mut self.host_end);
2341 adjust(&mut self.path_start);
2342 if let Some(ref mut index) = self.query_start {
2343 adjust(index)
2344 }
2345 if let Some(ref mut index) = self.fragment_start {
2346 adjust(index)
2347 }
2348 Ok(())
2349 }
2350
2351 /// Change this URL’s scheme.
2352 ///
2353 /// Do nothing and return `Err` under the following circumstances:
2354 ///
2355 /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+`
2356 /// * If this URL is cannot-be-a-base and the new scheme is one of
2357 /// `http`, `https`, `ws`, `wss` or `ftp`
2358 /// * If either the old or new scheme is `http`, `https`, `ws`,
2359 /// `wss` or `ftp` and the other is not one of these
2360 /// * If the new scheme is `file` and this URL includes credentials
2361 /// or has a non-null port
2362 /// * If this URL's scheme is `file` and its host is empty or null
2363 ///
2364 /// See also [the URL specification's section on legal scheme state
2365 /// overrides](https://url.spec.whatwg.org/#scheme-state).
2366 ///
2367 /// # Examples
2368 ///
2369 /// Change the URL’s scheme from `https` to `http`:
2370 ///
2371 /// ```
2372 /// use url::Url;
2373 /// # use url::ParseError;
2374 ///
2375 /// # fn run() -> Result<(), ParseError> {
2376 /// let mut url = Url::parse("https://example.net")?;
2377 /// let result = url.set_scheme("http");
2378 /// assert_eq!(url.as_str(), "http://example.net/");
2379 /// assert!(result.is_ok());
2380 /// # Ok(())
2381 /// # }
2382 /// # run().unwrap();
2383 /// ```
2384 /// Change the URL’s scheme from `foo` to `bar`:
2385 ///
2386 /// ```
2387 /// use url::Url;
2388 /// # use url::ParseError;
2389 ///
2390 /// # fn run() -> Result<(), ParseError> {
2391 /// let mut url = Url::parse("foo://example.net")?;
2392 /// let result = url.set_scheme("bar");
2393 /// assert_eq!(url.as_str(), "bar://example.net");
2394 /// assert!(result.is_ok());
2395 /// # Ok(())
2396 /// # }
2397 /// # run().unwrap();
2398 /// ```
2399 ///
2400 /// Cannot change URL’s scheme from `https` to `foõ`:
2401 ///
2402 /// ```
2403 /// use url::Url;
2404 /// # use url::ParseError;
2405 ///
2406 /// # fn run() -> Result<(), ParseError> {
2407 /// let mut url = Url::parse("https://example.net")?;
2408 /// let result = url.set_scheme("foõ");
2409 /// assert_eq!(url.as_str(), "https://example.net/");
2410 /// assert!(result.is_err());
2411 /// # Ok(())
2412 /// # }
2413 /// # run().unwrap();
2414 /// ```
2415 ///
2416 /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`:
2417 ///
2418 /// ```
2419 /// use url::Url;
2420 /// # use url::ParseError;
2421 ///
2422 /// # fn run() -> Result<(), ParseError> {
2423 /// let mut url = Url::parse("mailto:rms@example.net")?;
2424 /// let result = url.set_scheme("https");
2425 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2426 /// assert!(result.is_err());
2427 /// # Ok(())
2428 /// # }
2429 /// # run().unwrap();
2430 /// ```
2431 /// Cannot change the URL’s scheme from `foo` to `https`:
2432 ///
2433 /// ```
2434 /// use url::Url;
2435 /// # use url::ParseError;
2436 ///
2437 /// # fn run() -> Result<(), ParseError> {
2438 /// let mut url = Url::parse("foo://example.net")?;
2439 /// let result = url.set_scheme("https");
2440 /// assert_eq!(url.as_str(), "foo://example.net");
2441 /// assert!(result.is_err());
2442 /// # Ok(())
2443 /// # }
2444 /// # run().unwrap();
2445 /// ```
2446 /// Cannot change the URL’s scheme from `http` to `foo`:
2447 ///
2448 /// ```
2449 /// use url::Url;
2450 /// # use url::ParseError;
2451 ///
2452 /// # fn run() -> Result<(), ParseError> {
2453 /// let mut url = Url::parse("http://example.net")?;
2454 /// let result = url.set_scheme("foo");
2455 /// assert_eq!(url.as_str(), "http://example.net/");
2456 /// assert!(result.is_err());
2457 /// # Ok(())
2458 /// # }
2459 /// # run().unwrap();
2460 /// ```
2461 #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)]
2462 pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
2463 let mut parser = Parser::for_setter(String::new());
2464 let remaining = parser.parse_scheme(parser::Input::new_no_trim(scheme))?;
2465 let new_scheme_type = SchemeType::from(&parser.serialization);
2466 let old_scheme_type = SchemeType::from(self.scheme());
2467 // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2468 if (new_scheme_type.is_special() && !old_scheme_type.is_special()) ||
2469 // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2470 (!new_scheme_type.is_special() && old_scheme_type.is_special()) ||
2471 // If url includes credentials or has a non-null port, and buffer is "file", then return.
2472 // If url’s scheme is "file" and its host is an empty host or null, then return.
2473 (new_scheme_type.is_file() && self.has_authority())
2474 {
2475 return Err(());
2476 }
2477
2478 if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) {
2479 return Err(());
2480 }
2481 let old_scheme_end = self.scheme_end;
2482 let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
2483 let adjust = |index: &mut u32| {
2484 *index -= old_scheme_end;
2485 *index += new_scheme_end;
2486 };
2487
2488 self.scheme_end = new_scheme_end;
2489 adjust(&mut self.username_end);
2490 adjust(&mut self.host_start);
2491 adjust(&mut self.host_end);
2492 adjust(&mut self.path_start);
2493 if let Some(ref mut index) = self.query_start {
2494 adjust(index)
2495 }
2496 if let Some(ref mut index) = self.fragment_start {
2497 adjust(index)
2498 }
2499
2500 parser.serialization.push_str(self.slice(old_scheme_end..));
2501 self.serialization = parser.serialization;
2502
2503 // Update the port so it can be removed
2504 // If it is the scheme's default
2505 // we don't mind it silently failing
2506 // if there was no port in the first place
2507 let previous_port = self.port();
2508 let _ = self.set_port(previous_port);
2509
2510 Ok(())
2511 }
2512
2513 /// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
2514 ///
2515 /// This returns `Err` if the given path is not absolute or,
2516 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2517 ///
2518 /// # Examples
2519 ///
2520 /// On Unix-like platforms:
2521 ///
2522 /// ```
2523 /// # if cfg!(unix) {
2524 /// use url::Url;
2525 ///
2526 /// # fn run() -> Result<(), ()> {
2527 /// let url = Url::from_file_path("/tmp/foo.txt")?;
2528 /// assert_eq!(url.as_str(), "file:///tmp/foo.txt");
2529 ///
2530 /// let url = Url::from_file_path("../foo.txt");
2531 /// assert!(url.is_err());
2532 ///
2533 /// let url = Url::from_file_path("https://google.com/");
2534 /// assert!(url.is_err());
2535 /// # Ok(())
2536 /// # }
2537 /// # run().unwrap();
2538 /// # }
2539 /// ```
2540 ///
2541 /// This method is only available if the `std` Cargo feature is enabled.
2542 #[cfg(all(
2543 feature = "std",
2544 any(
2545 unix,
2546 windows,
2547 target_os = "redox",
2548 target_os = "wasi",
2549 target_os = "hermit"
2550 )
2551 ))]
2552 #[allow(clippy::result_unit_err)]
2553 pub fn from_file_path<P: AsRef<std::path::Path>>(path: P) -> Result<Self, ()> {
2554 let mut serialization = "file://".to_owned();
2555 let host_start = serialization.len() as u32;
2556 let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?;
2557 Ok(Self {
2558 serialization,
2559 scheme_end: "file".len() as u32,
2560 username_end: host_start,
2561 host_start,
2562 host_end,
2563 host,
2564 port: None,
2565 path_start: host_end,
2566 query_start: None,
2567 fragment_start: None,
2568 })
2569 }
2570
2571 /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme.
2572 ///
2573 /// This returns `Err` if the given path is not absolute or,
2574 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2575 ///
2576 /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash
2577 /// so that the entire path is considered when using this URL as a base URL.
2578 ///
2579 /// For example:
2580 ///
2581 /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))`
2582 /// as the base URL is `file:///var/www/index.html`
2583 /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))`
2584 /// as the base URL is `file:///var/index.html`, which might not be what was intended.
2585 ///
2586 /// Note that `std::path` does not consider trailing slashes significant
2587 /// and usually does not include them (e.g. in `Path::parent()`).
2588 ///
2589 /// This method is only available if the `std` Cargo feature is enabled.
2590 #[cfg(all(
2591 feature = "std",
2592 any(
2593 unix,
2594 windows,
2595 target_os = "redox",
2596 target_os = "wasi",
2597 target_os = "hermit"
2598 )
2599 ))]
2600 #[allow(clippy::result_unit_err)]
2601 pub fn from_directory_path<P: AsRef<std::path::Path>>(path: P) -> Result<Self, ()> {
2602 let mut url = Self::from_file_path(path)?;
2603 if !url.serialization.ends_with('/') {
2604 url.serialization.push('/')
2605 }
2606 Ok(url)
2607 }
2608
2609 /// Serialize with Serde using the internal representation of the `Url` struct.
2610 ///
2611 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2612 /// for speed, compared to the `Deserialize` trait impl.
2613 ///
2614 /// This method is only available if the `serde` Cargo feature is enabled.
2615 #[cfg(feature = "serde")]
2616 #[deny(unused)]
2617 pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2618 where
2619 S: serde::Serializer,
2620 {
2621 use serde::Serialize;
2622 // Destructuring first lets us ensure that adding or removing fields forces this method
2623 // to be updated
2624 let Url {
2625 ref serialization,
2626 ref scheme_end,
2627 ref username_end,
2628 ref host_start,
2629 ref host_end,
2630 ref host,
2631 ref port,
2632 ref path_start,
2633 ref query_start,
2634 ref fragment_start,
2635 } = *self;
2636 (
2637 serialization,
2638 scheme_end,
2639 username_end,
2640 host_start,
2641 host_end,
2642 host,
2643 port,
2644 path_start,
2645 query_start,
2646 fragment_start,
2647 )
2648 .serialize(serializer)
2649 }
2650
2651 /// Serialize with Serde using the internal representation of the `Url` struct.
2652 ///
2653 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2654 /// for speed, compared to the `Deserialize` trait impl.
2655 ///
2656 /// This method is only available if the `serde` Cargo feature is enabled.
2657 #[cfg(feature = "serde")]
2658 #[deny(unused)]
2659 pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error>
2660 where
2661 D: serde::Deserializer<'de>,
2662 {
2663 use serde::de::{Deserialize, Error};
2664 let (
2665 serialization,
2666 scheme_end,
2667 username_end,
2668 host_start,
2669 host_end,
2670 host,
2671 port,
2672 path_start,
2673 query_start,
2674 fragment_start,
2675 ) = Deserialize::deserialize(deserializer)?;
2676 let url = Url {
2677 serialization,
2678 scheme_end,
2679 username_end,
2680 host_start,
2681 host_end,
2682 host,
2683 port,
2684 path_start,
2685 query_start,
2686 fragment_start,
2687 };
2688 if cfg!(debug_assertions) {
2689 url.check_invariants().map_err(Error::custom)?
2690 }
2691 Ok(url)
2692 }
2693
2694 /// Assuming the URL is in the `file` scheme or similar,
2695 /// convert its path to an absolute `std::path::Path`.
2696 ///
2697 /// **Note:** This does not actually check the URL’s `scheme`,
2698 /// and may give nonsensical results for other schemes.
2699 /// It is the user’s responsibility to check the URL’s scheme before calling this.
2700 ///
2701 /// ```
2702 /// # use url::Url;
2703 /// # let url = Url::parse("file:///etc/passwd").unwrap();
2704 /// let path = url.to_file_path();
2705 /// ```
2706 ///
2707 /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
2708 /// `file:` URLs may have a non-local host),
2709 /// or if `Path::new_opt()` returns `None`.
2710 /// (That is, if the percent-decoded path contains a NUL byte or,
2711 /// for a Windows path, is not UTF-8.)
2712 ///
2713 /// This method is only available if the `std` Cargo feature is enabled.
2714 #[inline]
2715 #[cfg(all(
2716 feature = "std",
2717 any(
2718 unix,
2719 windows,
2720 target_os = "redox",
2721 target_os = "wasi",
2722 target_os = "hermit"
2723 )
2724 ))]
2725 #[allow(clippy::result_unit_err)]
2726 pub fn to_file_path(&self) -> Result<PathBuf, ()> {
2727 if let Some(segments) = self.path_segments() {
2728 let host = match self.host() {
2729 None | Some(Host::Domain("localhost")) => None,
2730 Some(_) if cfg!(windows) && self.scheme() == "file" => {
2731 Some(&self.serialization[self.host_start as usize..self.host_end as usize])
2732 }
2733 _ => return Err(()),
2734 };
2735
2736 let str_len = self.as_str().len();
2737 let estimated_capacity = if cfg!(target_os = "redox") {
2738 let scheme_len = self.scheme().len();
2739 let file_scheme_len = "file".len();
2740 // remove only // because it still has file:
2741 if scheme_len < file_scheme_len {
2742 let scheme_diff = file_scheme_len - scheme_len;
2743 (str_len + scheme_diff).saturating_sub(2)
2744 } else {
2745 let scheme_diff = scheme_len - file_scheme_len;
2746 str_len.saturating_sub(scheme_diff + 2)
2747 }
2748 } else if cfg!(windows) {
2749 // remove scheme: - has posssible \\ for hostname
2750 str_len.saturating_sub(self.scheme().len() + 1)
2751 } else {
2752 // remove scheme://
2753 str_len.saturating_sub(self.scheme().len() + 3)
2754 };
2755 return file_url_segments_to_pathbuf(estimated_capacity, host, segments);
2756 }
2757 Err(())
2758 }
2759
2760 // Private helper methods:
2761
2762 #[inline]
2763 fn slice<R>(&self, range: R) -> &str
2764 where
2765 R: RangeArg,
2766 {
2767 range.slice_of(&self.serialization)
2768 }
2769
2770 #[inline]
2771 fn byte_at(&self, i: u32) -> u8 {
2772 self.serialization.as_bytes()[i as usize]
2773 }
2774}
2775
2776/// Parse a string as an URL, without a base URL or encoding override.
2777impl str::FromStr for Url {
2778 type Err = ParseError;
2779
2780 #[inline]
2781 fn from_str(input: &str) -> Result<Self, crate::ParseError> {
2782 Self::parse(input)
2783 }
2784}
2785
2786impl<'a> TryFrom<&'a str> for Url {
2787 type Error = ParseError;
2788
2789 fn try_from(s: &'a str) -> Result<Self, Self::Error> {
2790 Self::parse(s)
2791 }
2792}
2793
2794/// Display the serialization of this URL.
2795impl fmt::Display for Url {
2796 #[inline]
2797 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
2798 fmt::Display::fmt(&self.serialization, formatter)
2799 }
2800}
2801
2802/// String conversion.
2803impl From<Url> for String {
2804 fn from(value: Url) -> Self {
2805 value.serialization
2806 }
2807}
2808
2809/// Debug the serialization of this URL.
2810impl fmt::Debug for Url {
2811 #[inline]
2812 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2813 formatter
2814 .debug_struct("Url")
2815 .field("scheme", &self.scheme())
2816 .field("cannot_be_a_base", &self.cannot_be_a_base())
2817 .field("username", &self.username())
2818 .field("password", &self.password())
2819 .field("host", &self.host())
2820 .field("port", &self.port())
2821 .field("path", &self.path())
2822 .field("query", &self.query())
2823 .field("fragment", &self.fragment())
2824 .finish()
2825 }
2826}
2827
2828/// URLs compare like their serialization.
2829impl Eq for Url {}
2830
2831/// URLs compare like their serialization.
2832impl PartialEq for Url {
2833 #[inline]
2834 fn eq(&self, other: &Self) -> bool {
2835 self.serialization == other.serialization
2836 }
2837}
2838
2839/// URLs compare like their serialization.
2840impl Ord for Url {
2841 #[inline]
2842 fn cmp(&self, other: &Self) -> cmp::Ordering {
2843 self.serialization.cmp(&other.serialization)
2844 }
2845}
2846
2847/// URLs compare like their serialization.
2848impl PartialOrd for Url {
2849 #[inline]
2850 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
2851 Some(self.cmp(other))
2852 }
2853}
2854
2855/// URLs hash like their serialization.
2856impl hash::Hash for Url {
2857 #[inline]
2858 fn hash<H>(&self, state: &mut H)
2859 where
2860 H: hash::Hasher,
2861 {
2862 hash::Hash::hash(&self.serialization, state)
2863 }
2864}
2865
2866/// Return the serialization of this URL.
2867impl AsRef<str> for Url {
2868 #[inline]
2869 fn as_ref(&self) -> &str {
2870 &self.serialization
2871 }
2872}
2873
2874trait RangeArg {
2875 fn slice_of<'a>(&self, s: &'a str) -> &'a str;
2876}
2877
2878impl RangeArg for Range<u32> {
2879 #[inline]
2880 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2881 &s[self.start as usize..self.end as usize]
2882 }
2883}
2884
2885impl RangeArg for RangeFrom<u32> {
2886 #[inline]
2887 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2888 &s[self.start as usize..]
2889 }
2890}
2891
2892impl RangeArg for RangeTo<u32> {
2893 #[inline]
2894 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2895 &s[..self.end as usize]
2896 }
2897}
2898
2899/// Serializes this URL into a `serde` stream.
2900///
2901/// This implementation is only available if the `serde` Cargo feature is enabled.
2902#[cfg(feature = "serde")]
2903impl serde::Serialize for Url {
2904 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2905 where
2906 S: serde::Serializer,
2907 {
2908 serializer.serialize_str(self.as_str())
2909 }
2910}
2911
2912/// Deserializes this URL from a `serde` stream.
2913///
2914/// This implementation is only available if the `serde` Cargo feature is enabled.
2915#[cfg(feature = "serde")]
2916impl<'de> serde::Deserialize<'de> for Url {
2917 fn deserialize<D>(deserializer: D) -> Result<Url, D::Error>
2918 where
2919 D: serde::Deserializer<'de>,
2920 {
2921 use serde::de::{Error, Visitor};
2922
2923 struct UrlVisitor;
2924
2925 impl Visitor<'_> for UrlVisitor {
2926 type Value = Url;
2927
2928 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2929 formatter.write_str("a string representing an URL")
2930 }
2931
2932 fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
2933 where
2934 E: Error,
2935 {
2936 Url::parse(s).map_err(|err| Error::custom(format!("{err}: {s:?}")))
2937 }
2938 }
2939
2940 deserializer.deserialize_str(UrlVisitor)
2941 }
2942}
2943
2944#[cfg(all(
2945 feature = "std",
2946 any(unix, target_os = "redox", target_os = "wasi", target_os = "hermit")
2947))]
2948fn path_to_file_url_segments(
2949 path: &Path,
2950 serialization: &mut String,
2951) -> Result<(u32, HostInternal), ()> {
2952 use parser::SPECIAL_PATH_SEGMENT;
2953 use percent_encoding::percent_encode;
2954 #[cfg(target_os = "hermit")]
2955 use std::os::hermit::ffi::OsStrExt;
2956 #[cfg(any(unix, target_os = "redox"))]
2957 use std::os::unix::prelude::OsStrExt;
2958 if !path.is_absolute() {
2959 return Err(());
2960 }
2961 let host_end = to_u32(serialization.len()).unwrap();
2962 let mut empty = true;
2963 // skip the root component
2964 for component in path.components().skip(1) {
2965 empty = false;
2966 serialization.push('/');
2967 #[cfg(not(target_os = "wasi"))]
2968 serialization.extend(percent_encode(
2969 component.as_os_str().as_bytes(),
2970 SPECIAL_PATH_SEGMENT,
2971 ));
2972 #[cfg(target_os = "wasi")]
2973 serialization.extend(percent_encode(
2974 component.as_os_str().to_string_lossy().as_bytes(),
2975 SPECIAL_PATH_SEGMENT,
2976 ));
2977 }
2978 if empty {
2979 // An URL’s path must not be empty.
2980 serialization.push('/');
2981 }
2982 Ok((host_end, HostInternal::None))
2983}
2984
2985#[cfg(all(feature = "std", windows))]
2986fn path_to_file_url_segments(
2987 path: &Path,
2988 serialization: &mut String,
2989) -> Result<(u32, HostInternal), ()> {
2990 path_to_file_url_segments_windows(path, serialization)
2991}
2992
2993// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2994#[cfg(feature = "std")]
2995#[cfg_attr(not(windows), allow(dead_code))]
2996fn path_to_file_url_segments_windows(
2997 path: &Path,
2998 serialization: &mut String,
2999) -> Result<(u32, HostInternal), ()> {
3000 use crate::parser::PATH_SEGMENT;
3001 use percent_encoding::percent_encode;
3002 use std::path::{Component, Prefix};
3003 if !path.is_absolute() {
3004 return Err(());
3005 }
3006 let mut components = path.components();
3007
3008 let host_start = serialization.len() + 1;
3009 let host_end;
3010 let host_internal;
3011
3012 match components.next() {
3013 Some(Component::Prefix(ref p)) => match p.kind() {
3014 Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
3015 host_end = to_u32(serialization.len()).unwrap();
3016 host_internal = HostInternal::None;
3017 serialization.push('/');
3018 serialization.push(letter as char);
3019 serialization.push(':');
3020 }
3021 Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
3022 let host = Host::parse_cow(server.to_str().ok_or(())?.into()).map_err(|_| ())?;
3023 write!(serialization, "{host}").unwrap();
3024 host_end = to_u32(serialization.len()).unwrap();
3025 host_internal = host.into();
3026 serialization.push('/');
3027 let share = share.to_str().ok_or(())?;
3028 serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
3029 }
3030 _ => return Err(()),
3031 },
3032 _ => return Err(()),
3033 }
3034
3035 let mut path_only_has_prefix = true;
3036 for component in components {
3037 if component == Component::RootDir {
3038 continue;
3039 }
3040
3041 path_only_has_prefix = false;
3042 // FIXME: somehow work with non-unicode?
3043 let component = component.as_os_str().to_str().ok_or(())?;
3044
3045 serialization.push('/');
3046 serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
3047 }
3048
3049 // A windows drive letter must end with a slash.
3050 if serialization.len() > host_start
3051 && parser::is_windows_drive_letter(&serialization[host_start..])
3052 && path_only_has_prefix
3053 {
3054 serialization.push('/');
3055 }
3056
3057 Ok((host_end, host_internal))
3058}
3059
3060#[cfg(all(
3061 feature = "std",
3062 any(unix, target_os = "redox", target_os = "wasi", target_os = "hermit")
3063))]
3064fn file_url_segments_to_pathbuf(
3065 estimated_capacity: usize,
3066 host: Option<&str>,
3067 segments: str::Split<'_, char>,
3068) -> Result<PathBuf, ()> {
3069 use alloc::vec::Vec;
3070 use percent_encoding::percent_decode;
3071 #[cfg(not(target_os = "wasi"))]
3072 use std::ffi::OsStr;
3073 #[cfg(target_os = "hermit")]
3074 use std::os::hermit::ffi::OsStrExt;
3075 #[cfg(any(unix, target_os = "redox"))]
3076 use std::os::unix::prelude::OsStrExt;
3077
3078 if host.is_some() {
3079 return Err(());
3080 }
3081
3082 let mut bytes = Vec::new();
3083 bytes.try_reserve(estimated_capacity).map_err(|_| ())?;
3084 if cfg!(target_os = "redox") {
3085 bytes.extend(b"file:");
3086 }
3087
3088 for segment in segments {
3089 bytes.push(b'/');
3090 bytes.extend(percent_decode(segment.as_bytes()));
3091 }
3092
3093 // A windows drive letter must end with a slash.
3094 if bytes.len() > 2
3095 && bytes[bytes.len() - 2].is_ascii_alphabetic()
3096 && matches!(bytes[bytes.len() - 1], b':' | b'|')
3097 {
3098 bytes.push(b'/');
3099 }
3100
3101 #[cfg(not(target_os = "wasi"))]
3102 let path = PathBuf::from(OsStr::from_bytes(&bytes));
3103 #[cfg(target_os = "wasi")]
3104 let path = String::from_utf8(bytes)
3105 .map(|path| PathBuf::from(path))
3106 .map_err(|_| ())?;
3107
3108 debug_assert!(
3109 path.is_absolute(),
3110 "to_file_path() failed to produce an absolute Path"
3111 );
3112
3113 Ok(path)
3114}
3115
3116#[cfg(all(feature = "std", windows))]
3117fn file_url_segments_to_pathbuf(
3118 estimated_capacity: usize,
3119 host: Option<&str>,
3120 segments: str::Split<char>,
3121) -> Result<PathBuf, ()> {
3122 file_url_segments_to_pathbuf_windows(estimated_capacity, host, segments)
3123}
3124
3125// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
3126#[cfg(feature = "std")]
3127#[cfg_attr(not(windows), allow(dead_code))]
3128fn file_url_segments_to_pathbuf_windows(
3129 estimated_capacity: usize,
3130 host: Option<&str>,
3131 mut segments: str::Split<'_, char>,
3132) -> Result<PathBuf, ()> {
3133 use percent_encoding::percent_decode_str;
3134 let mut string = String::new();
3135 string.try_reserve(estimated_capacity).map_err(|_| ())?;
3136 if let Some(host) = host {
3137 string.push_str(r"\\");
3138 string.push_str(host);
3139 } else {
3140 let first = segments.next().ok_or(())?;
3141
3142 match first.len() {
3143 2 => {
3144 if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
3145 return Err(());
3146 }
3147
3148 string.push_str(first);
3149 }
3150
3151 4 => {
3152 if !first.starts_with(parser::ascii_alpha) {
3153 return Err(());
3154 }
3155 let bytes = first.as_bytes();
3156 if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
3157 return Err(());
3158 }
3159
3160 string.push_str(&first[0..1]);
3161 string.push(':');
3162 }
3163
3164 _ => return Err(()),
3165 }
3166 };
3167
3168 for segment in segments {
3169 string.push('\\');
3170
3171 // Currently non-unicode windows paths cannot be represented
3172 match percent_decode_str(segment).decode_utf8() {
3173 Ok(s) => string.push_str(&s),
3174 Err(..) => return Err(()),
3175 }
3176 }
3177 // ensure our estimated capacity was good
3178 if cfg!(test) {
3179 debug_assert!(
3180 string.len() <= estimated_capacity,
3181 "len: {}, capacity: {}",
3182 string.len(),
3183 estimated_capacity
3184 );
3185 }
3186 let path = PathBuf::from(string);
3187 debug_assert!(
3188 path.is_absolute(),
3189 "to_file_path() failed to produce an absolute Path"
3190 );
3191 Ok(path)
3192}
3193
3194/// Implementation detail of `Url::query_pairs_mut`. Typically not used directly.
3195#[derive(Debug)]
3196pub struct UrlQuery<'a> {
3197 url: Option<&'a mut Url>,
3198 fragment: Option<String>,
3199}
3200
3201// `as_mut_string` string here exposes the internal serialization of an `Url`,
3202// which should not be exposed to users.
3203// We achieve that by not giving users direct access to `UrlQuery`:
3204// * Its fields are private
3205// (and so can not be constructed with struct literal syntax outside of this crate),
3206// * It has no constructor
3207// * It is only visible (on the type level) to users in the return type of
3208// `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
3209// * `Serializer` keeps its target in a private field
3210// * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
3211impl<'a> form_urlencoded::Target for UrlQuery<'a> {
3212 fn as_mut_string(&mut self) -> &mut String {
3213 &mut self.url.as_mut().unwrap().serialization
3214 }
3215
3216 fn finish(mut self) -> &'a mut Url {
3217 let url = self.url.take().unwrap();
3218 url.restore_already_parsed_fragment(self.fragment.take());
3219 url
3220 }
3221
3222 type Finished = &'a mut Url;
3223}
3224
3225impl Drop for UrlQuery<'_> {
3226 fn drop(&mut self) {
3227 if let Some(url) = self.url.take() {
3228 url.restore_already_parsed_fragment(self.fragment.take())
3229 }
3230 }
3231}