iri_string/types/generic.rs
1//! Generic resource identifier types.
2//!
3//! ```text
4//! IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ]
5//! IRI-reference = IRI / irelative-ref
6//! absolute-IRI = scheme ":" ihier-part [ "?" iquery ]
7//! irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ]
8//! (`irelative-part` is roughly same as `ihier-part`.)
9//! ```
10//!
11//! Hierarchy:
12//!
13//! ```text
14//! RiReferenceStr
15//! |-- RiStr
16//! | `-- RiAbsoluteStr
17//! `-- RiRelativeStr
18//! ```
19//!
20//! Therefore, the conversions below are safe and cheap:
21//!
22//! * `RiStr -> RiReferenceStr`
23//! * `RiAbsoluteStr -> RiStr`
24//! * `RiAbsoluteStr -> RiReferenceStr`
25//! * `RiRelativeStr -> RiReferenceStr`
26//!
27//! For safely convertible types (consider `FooStr -> BarStr` is safe), traits
28//! below are implemented:
29//!
30//! * `AsRef<BarStr> for FooStr`
31//! * `AsRef<BarStr> for FooString`
32//! * `From<FooString> for BarString`
33//! * `PartialEq<FooStr> for BarStr` and lots of impls like that
34//! + `PartialEq` and `ParitalOrd`.
35//! + Slice, owned, `Cow`, reference, etc...
36//!
37//! # IDNA encoding
38//!
39//! This crate does not have built-in IDNA converter, but the user can provide
40//! such conversion function and replace the domain part of IRIs.
41//!
42//! ## Slice IRI types
43//!
44//! 1. Get host by `authority_components()?.host()`.
45//! 2. Process the name.
46//! 3. Create a builder by `Builder::from(&...)`.
47//! 4. Overwrite the domain by `.host(...)`.
48//! 5. Build the new IRI by `.build()`.
49//!
50//! ```
51//! # #[cfg(feature = "alloc")] extern crate alloc;
52//! # #[cfg(feature = "alloc")] use alloc::string::ToString;
53//! use iri_string::build::Builder;
54//! use iri_string::types::{IriStr, UriStr};
55//!
56//! struct IdnaEncodedDomain<'a> {
57//! /* ... */
58//! # raw: &'a str,
59//! }
60//! impl IdnaEncodedDomain<'_> {
61//! pub fn as_str(&self) -> &str {
62//! /* ... */
63//! # match self.raw {
64//! # "alpha.\u{03B1}.example.com" => "alpha.xn--mxa.example.com",
65//! # _ => unimplemented!(),
66//! # }
67//! }
68//! }
69//! // Usually IDNA conversion requires dynamic memory allocation, but
70//! // `iri-string` itself does not require or assume that. It is enough if the
71//! // conversion result can be retrieved as `&str`, so users can do whatever
72//! // such as limiting the possible input and/or using statically allocated buffer.
73//! fn apply_idna(s: &str) -> IdnaEncodedDomain<'_> {
74//! /* ... */
75//! # IdnaEncodedDomain { raw: s }
76//! }
77//!
78//! let orig_iri = IriStr::new("https://alpha.\u{03B1}.example.com").unwrap();
79//!
80//! // 1. Get the host.
81//! let orig_host = orig_iri.authority_components()
82//! .expect("orig_iri has a host")
83//! .host();
84//! debug_assert_eq!(orig_host, "alpha.\u{03B1}.example.com");
85//!
86//! // 2. Process the name.
87//! let new_domain = apply_idna(orig_host);
88//!
89//! // 3. Create a builder.
90//! let mut builder = Builder::from(orig_iri);
91//!
92//! // 4. Overwrite the domain.
93//! builder.host(new_domain.as_str());
94//!
95//! // 5. Build the new IRI.
96//! let new_iri = builder.build::<UriStr>()
97//! .expect("the new host is a valid domain and now they are US-ASCII only");
98//!
99//! // Note that `ToString::to_string()` requires `alloc` feature.
100//! #[cfg(feature = "alloc")]
101//! debug_assert_eq!(new_iri.to_string(), "https://alpha.xn--mxa.example.com");
102//! ```
103//!
104//! ## Allocated IRI types
105//!
106//! For allocated types such as `IriString`, you can use
107//! `{,try_}replace_host{,_reg_name}` methods.
108//!
109//! 1. Get host by `authority_components()?.host()`.
110//! 2. Process the name.
111//! 3. Replace the host by the new result.
112//!
113//! ```
114//! # #[cfg(feature = "alloc")] {
115//! # extern crate alloc;
116//! # use alloc::string::String;
117//! use iri_string::types::IriString;
118//!
119//! fn apply_idna(s: &str) -> String {
120//! /* ... */
121//! # match s {
122//! # "alpha.\u{03B1}.example.com" => "alpha.xn--mxa.example.com".to_owned(),
123//! # _ => unimplemented!(),
124//! # }
125//! }
126//!
127//! let mut iri =
128//! IriString::try_from("https://alpha.\u{03B1}.example.com")
129//! .unwrap();
130//!
131//! // 1. Get the host.
132//! let orig_host = iri.authority_components()
133//! .expect("orig_iri has a host")
134//! .host();
135//! debug_assert_eq!(orig_host, "alpha.\u{03B1}.example.com");
136//!
137//! // 2. Process the name.
138//! let new_domain = apply_idna(orig_host);
139//!
140//! // 3. Replace the host.
141//! iri.replace_host(&new_domain);
142//! debug_assert_eq!(iri, "https://alpha.xn--mxa.example.com");
143//! # }
144//! ```
145
146pub use self::{
147 absolute::RiAbsoluteStr, fragment::RiFragmentStr, normal::RiStr, query::RiQueryStr,
148 reference::RiReferenceStr, relative::RiRelativeStr,
149};
150#[cfg(feature = "alloc")]
151pub use self::{
152 absolute::RiAbsoluteString, error::CreationError, fragment::RiFragmentString, normal::RiString,
153 query::RiQueryString, reference::RiReferenceString, relative::RiRelativeString,
154};
155
156#[macro_use]
157mod macros;
158
159mod absolute;
160#[cfg(feature = "alloc")]
161mod error;
162mod fragment;
163mod normal;
164mod query;
165mod reference;
166mod relative;
167
168/// Replaces the host in-place and returns the range of the new host, if authority is not empty.
169///
170/// If the IRI has no authority, returns `None` without doing nothing. Note
171/// that an empty host is distinguished from the absence of an authority.
172///
173/// If the new host is invalid (i.e., [`validate::validate_host`][`crate::validate::host`]
174/// returns `Err(_)`), also returns `None` without doing anything.
175#[cfg(feature = "alloc")]
176fn replace_domain_impl<S: crate::spec::Spec>(
177 iri_ref: &mut alloc::string::String,
178 new_host: &str,
179 replace_only_reg_name: bool,
180) -> Result<Option<core::ops::Range<usize>>, alloc::collections::TryReserveError> {
181 use crate::components::AuthorityComponents;
182 use crate::parser::trusted as trusted_parser;
183 use crate::parser::validate::validate_host;
184
185 // Validation of `new_host` needs some parsing, so do this authority
186 // presence first to avoid that cost when possible. Extracting authority
187 // should be faster because it essentially checks the length of the
188 // scheme (which is known to be valid if available) and the presence of
189 // the fixed string `://`.
190 let (old_host, host_start) = match AuthorityComponents::from_iri_get_offset(iri_ref) {
191 Some((authority, offset)) => (authority.host(), offset + authority.host_start),
192 None => return Ok(None),
193 };
194 let old_host_end = host_start + old_host.len();
195
196 if validate_host::<S>(new_host).is_err() {
197 return Ok(None);
198 }
199
200 if replace_only_reg_name && !trusted_parser::authority::is_host_reg_name(old_host) {
201 // Host in the IRI is not a reg-name. Avoid replacing.
202 return Ok(None);
203 }
204
205 if let Some(additional) = new_host.len().checked_sub(old_host.len()) {
206 iri_ref.try_reserve(additional)?;
207 }
208 iri_ref.replace_range(host_start..old_host_end, new_host);
209
210 let new_host_end = host_start + new_host.len();
211 Ok(Some(host_start..new_host_end))
212}