1use core::fmt::{self, Write as _};
4use core::marker::PhantomData;
5use core::num::NonZeroU8;
6use core::ops::ControlFlow;
7
8use crate::parser::str::find_split;
9use crate::parser::trusted::hexdigits_to_byte;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub(crate) enum PctEncodedFragments<'a> {
14 NoPctStr(&'a str),
16 StrayPercent,
18 Char(&'a str, char),
20 InvalidUtf8PctTriplets(&'a str),
22}
23
24pub(crate) fn process_percent_encoded_best_effort<T, F, B>(
26 v: T,
27 mut f: F,
28) -> Result<ControlFlow<B>, fmt::Error>
29where
30 T: fmt::Display,
31 F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>,
32{
33 let mut buf = [0_u8; 12];
34 let mut writer = DecomposeWriter {
35 f: &mut f,
36 decoder: Default::default(),
37 buf: &mut buf,
38 result: ControlFlow::Continue(()),
39 _r: PhantomData,
40 };
41
42 if write!(writer, "{v}").is_err() {
43 match writer.result {
44 ControlFlow::Continue(_) => return Err(fmt::Error),
45 ControlFlow::Break(v) => return Ok(ControlFlow::Break(v)),
46 }
47 }
48
49 if let Some(len) = writer.decoder.flush(&mut buf).map(|v| usize::from(v.get())) {
51 let len_suffix = len % 3;
52 let triplets_end = len - len_suffix;
53 let triplets = core::str::from_utf8(&buf[..triplets_end])
54 .expect("percent-encoded triplets consist of ASCII characters");
55 if let ControlFlow::Break(v) = f(PctEncodedFragments::InvalidUtf8PctTriplets(triplets)) {
56 return Ok(ControlFlow::Break(v));
57 }
58
59 if len_suffix > 0 {
60 if let ControlFlow::Break(v) = f(PctEncodedFragments::StrayPercent) {
61 return Ok(ControlFlow::Break(v));
62 }
63 }
64 if len_suffix > 1 {
65 let after_percent =
66 core::str::from_utf8(&buf[(triplets_end + 1)..(triplets_end + len_suffix)])
67 .expect("percent-encoded triplets contains only ASCII characters");
68 if let ControlFlow::Break(v) = f(PctEncodedFragments::NoPctStr(after_percent)) {
69 return Ok(ControlFlow::Break(v));
70 }
71 }
72 }
73
74 Ok(ControlFlow::Continue(()))
75}
76
77struct DecomposeWriter<'a, F, B> {
79 f: &'a mut F,
81 decoder: DecoderBuffer,
83 buf: &'a mut [u8],
85 result: ControlFlow<B>,
87 _r: PhantomData<fn() -> B>,
89}
90impl<F, B> DecomposeWriter<'_, F, B>
91where
92 F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>,
93{
94 #[inline(always)]
96 fn result_continue_or_err(&self) -> fmt::Result {
97 if self.result.is_break() {
98 return Err(fmt::Error);
99 }
100 Ok(())
101 }
102
103 fn output_as_undecodable(&mut self, len_undecodable: u8) -> fmt::Result {
105 let len_written = usize::from(len_undecodable);
106 let frag = core::str::from_utf8(&self.buf[..len_written])
107 .expect("`DecoderBuffer` writes a valid ASCII string");
108 let len_incomplete = len_written % 3;
109 let len_complete = len_written - len_incomplete;
110 self.result = (self.f)(PctEncodedFragments::InvalidUtf8PctTriplets(
111 &frag[..len_complete],
112 ));
113 self.result_continue_or_err()?;
114 if len_incomplete > 0 {
115 self.result = (self.f)(PctEncodedFragments::StrayPercent);
117 if self.result.is_break() {
118 return Err(fmt::Error);
119 }
120 if len_incomplete > 1 {
121 debug_assert_eq!(
123 len_incomplete, 2,
124 "the length of incomplete percent-encoded triplet must be less than 2 bytes"
125 );
126 self.result = (self.f)(PctEncodedFragments::NoPctStr(
127 &frag[(len_complete + 1)..len_written],
128 ));
129 self.result_continue_or_err()?;
130 }
131 }
132 Ok(())
133 }
134}
135
136impl<F, B> fmt::Write for DecomposeWriter<'_, F, B>
137where
138 F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>,
139{
140 fn write_str(&mut self, s: &str) -> fmt::Result {
141 self.result_continue_or_err()?;
142 let mut rest = s;
143 while !rest.is_empty() {
144 let (len_consumed, result) = self.decoder.push_encoded(self.buf, rest);
145 if len_consumed == 0 {
146 if let Some(len_written) = self.decoder.flush(self.buf).map(NonZeroU8::get) {
149 self.output_as_undecodable(len_written)?;
150 rest = &rest[usize::from(len_written)..];
151 }
152
153 let (plain_prefix, suffix) = find_split(rest, b'%').unwrap_or((rest, ""));
155 debug_assert!(
156 !plain_prefix.is_empty(),
157 "`len_consumed == 0` indicates non-empty `rest` not starting with `%`"
158 );
159 self.result = (self.f)(PctEncodedFragments::NoPctStr(plain_prefix));
160 self.result_continue_or_err()?;
161 rest = suffix;
162 continue;
163 }
164
165 match result {
167 PushResult::Decoded(len_written, c) => {
168 let len_written = usize::from(len_written.get());
169 let frag = core::str::from_utf8(&self.buf[..len_written])
170 .expect("`DecoderBuffer` writes a valid ASCII string");
171 self.result = (self.f)(PctEncodedFragments::Char(frag, c));
172 self.result_continue_or_err()?;
173 }
174 PushResult::Undecodable(len_written) => {
175 self.output_as_undecodable(len_written)?;
176 }
177 PushResult::NeedMoreBytes => {
178 }
180 }
181 rest = &rest[len_consumed..];
182 }
183 Ok(())
184 }
185}
186
187#[derive(Debug, Clone, Copy)]
189enum PushResult {
190 NeedMoreBytes,
192 Decoded(NonZeroU8, char),
196 Undecodable(u8),
199}
200
201#[derive(Default, Debug, Clone, Copy)]
203struct DecoderBuffer {
204 encoded: [u8; 12],
209 decoded: [u8; 4],
211 len_encoded: u8,
215}
216
217impl DecoderBuffer {
218 fn write_and_pop(&mut self, dest: &mut [u8], remove_len: u8) {
220 let new_len = self.len_encoded - remove_len;
221 let remove_len = usize::from(remove_len);
222 let src_range = remove_len..usize::from(self.len_encoded);
223 dest[..remove_len].copy_from_slice(&self.encoded[..remove_len]);
224
225 if new_len == 0 {
226 *self = Self::default();
227 return;
228 }
229 self.encoded.copy_within(src_range, 0);
230 self.decoded
231 .copy_within((remove_len / 3)..usize::from(self.len_encoded / 3), 0);
232 self.len_encoded = new_len;
233 }
234
235 fn push_single_encoded_byte(&mut self, byte: u8) {
237 debug_assert!(
238 self.len_encoded < 12,
239 "four percent-encoded triplets are enough for a unicode code point"
240 );
241 let pos_enc = usize::from(self.len_encoded);
242 self.len_encoded += 1;
243 self.encoded[pos_enc] = byte;
244 if self.len_encoded % 3 == 0 {
245 let pos_dec = usize::from(self.len_encoded / 3 - 1);
247 let upper = self.encoded[pos_enc - 1];
248 let lower = byte;
249 debug_assert!(
250 upper.is_ascii_hexdigit() && lower.is_ascii_hexdigit(),
251 "the `encoded` buffer should contain valid percent-encoded triplets"
252 );
253 self.decoded[pos_dec] = hexdigits_to_byte([upper, lower]);
254 }
255 }
256
257 #[must_use]
266 pub(crate) fn push_encoded(&mut self, buf: &mut [u8], s: &str) -> (usize, PushResult) {
267 debug_assert!(
268 buf.len() >= 12,
269 "[precondition] destination buffer should be at least 12 bytes"
270 );
271 let mut chars = s.chars();
272 let mut len_triplet_incomplete = self.len_encoded % 3;
273 for c in &mut chars {
274 if len_triplet_incomplete == 0 {
275 if c != '%' {
277 let len_consumed = s.len() - chars.as_str().len() - 1;
280 let len_result = self.len_encoded;
281 self.write_and_pop(buf, len_result);
282 return (len_consumed, PushResult::Undecodable(len_result));
283 }
284 self.push_single_encoded_byte(b'%');
285 len_triplet_incomplete = 1;
286 continue;
287 }
288
289 if !c.is_ascii_hexdigit() {
291 let len_consumed = s.len() - chars.as_str().len() - 1;
294 let len_result = self.len_encoded;
295 self.write_and_pop(buf, len_result);
296 return (len_consumed, PushResult::Undecodable(len_result));
297 }
298 self.push_single_encoded_byte(c as u8);
299 if len_triplet_incomplete == 1 {
300 len_triplet_incomplete = 2;
301 continue;
302 } else {
303 debug_assert_eq!(len_triplet_incomplete, 2);
305 len_triplet_incomplete = 0;
306 }
307
308 let len_decoded = usize::from(self.len_encoded) / 3;
311 match core::str::from_utf8(&self.decoded[..len_decoded]) {
312 Ok(decoded_str) => {
313 let len_consumed = s.len() - chars.as_str().len();
315 let c = decoded_str
316 .chars()
317 .next()
318 .expect("`decoded` buffer is nonempty");
319 let len_result = NonZeroU8::new(self.len_encoded).expect(
320 "`encoded` buffer is nonempty since \
321 `push_single_encoded_byte()` was called",
322 );
323 self.write_and_pop(buf, len_result.get());
324 return (len_consumed, PushResult::Decoded(len_result, c));
325 }
326 Err(e) => {
327 assert_eq!(
329 e.valid_up_to(),
330 0,
331 "`decoded` buffer contains at most one character"
332 );
333 let skip_len_decoded = match e.error_len() {
334 None => continue,
336 Some(v) => v,
338 };
339 let len_consumed = s.len() - chars.as_str().len();
340 let len_result = skip_len_decoded as u8 * 3;
341 assert_ne!(skip_len_decoded, 0, "empty bytes cannot be invalid");
342 self.write_and_pop(buf, len_result);
343 return (len_consumed, PushResult::Undecodable(len_result));
344 }
345 };
346 }
347 let len_consumed = s.len() - chars.as_str().len();
348 (len_consumed, PushResult::NeedMoreBytes)
349 }
350
351 #[must_use]
353 pub(crate) fn flush(&mut self, buf: &mut [u8]) -> Option<NonZeroU8> {
354 let len_result = NonZeroU8::new(self.len_encoded)?;
355 self.write_and_pop(buf, len_result.get());
357 debug_assert_eq!(
358 self.len_encoded, 0,
359 "the buffer should be cleared after flushed"
360 );
361 Some(len_result)
362 }
363}