1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
/// Partial percent-decoding.
///
/// Performs percent-decoding on a slice but can selectively skip decoding certain sequences.
///
/// # Examples
/// ```
/// # use actix_router::Quoter;
/// // + is set as a protected character and will not be decoded...
/// let q = Quoter::new(&[], b"+");
///
/// // ...but the other encoded characters (like the hyphen below) will.
/// assert_eq!(q.requote(b"/a%2Db%2Bc").unwrap(), b"/a-b%2Bc");
/// ```
pub struct Quoter {
/// Simple bit-map of protected values in the 0-127 ASCII range.
protected_table: AsciiBitmap,
}
impl Quoter {
/// Constructs a new `Quoter` instance given a set of protected ASCII bytes.
///
/// The first argument is ignored but is kept for backward compatibility.
///
/// # Panics
/// Panics if any of the `protected` bytes are not in the 0-127 ASCII range.
pub fn new(_: &[u8], protected: &[u8]) -> Quoter {
let mut protected_table = AsciiBitmap::default();
// prepare protected table
for &ch in protected {
protected_table.set_bit(ch);
}
Quoter { protected_table }
}
/// Decodes the next escape sequence, if any, and advances `val`.
#[inline(always)]
fn decode_next<'a>(&self, val: &mut &'a [u8]) -> Option<(&'a [u8], u8)> {
for i in 0..val.len() {
if let (prev, [b'%', p1, p2, rem @ ..]) = val.split_at(i) {
if let Some(ch) = hex_pair_to_char(*p1, *p2)
// ignore protected ascii bytes
.filter(|&ch| !(ch < 128 && self.protected_table.bit_at(ch)))
{
*val = rem;
return Some((prev, ch));
}
}
}
None
}
/// Partially percent-decodes the given bytes.
///
/// Escape sequences of the protected set are *not* decoded.
///
/// Returns `None` when no modification to the original bytes was required.
///
/// Invalid/incomplete percent-encoding sequences are passed unmodified.
pub fn requote(&self, val: &[u8]) -> Option<Vec<u8>> {
let mut remaining = val;
// early return indicates that no percent-encoded sequences exist and we can skip allocation
let (pre, decoded_char) = self.decode_next(&mut remaining)?;
// decoded output will always be shorter than the input
let mut decoded = Vec::<u8>::with_capacity(val.len());
// push first segment and decoded char
decoded.extend_from_slice(pre);
decoded.push(decoded_char);
// decode and push rest of segments and decoded chars
while let Some((prev, ch)) = self.decode_next(&mut remaining) {
// this ugly conditional achieves +50% perf in cases where this is a tight loop.
if !prev.is_empty() {
decoded.extend_from_slice(prev);
}
decoded.push(ch);
}
decoded.extend_from_slice(remaining);
Some(decoded)
}
pub(crate) fn requote_str_lossy(&self, val: &str) -> Option<String> {
self.requote(val.as_bytes())
.map(|data| String::from_utf8_lossy(&data).into_owned())
}
}
/// Decode a ASCII hex-encoded pair to an integer.
///
/// Returns `None` if either portion of the decoded pair does not evaluate to a valid hex value.
///
/// - `0x33 ('3'), 0x30 ('0') => 0x30 ('0')`
/// - `0x34 ('4'), 0x31 ('1') => 0x41 ('A')`
/// - `0x36 ('6'), 0x31 ('1') => 0x61 ('a')`
#[inline(always)]
fn hex_pair_to_char(d1: u8, d2: u8) -> Option<u8> {
let d_high = char::from(d1).to_digit(16)?;
let d_low = char::from(d2).to_digit(16)?;
// left shift high nibble by 4 bits
Some((d_high as u8) << 4 | (d_low as u8))
}
#[derive(Debug, Default, Clone)]
struct AsciiBitmap {
array: [u8; 16],
}
impl AsciiBitmap {
/// Sets bit in given bit-map to 1=true.
///
/// # Panics
/// Panics if `ch` index is out of bounds.
fn set_bit(&mut self, ch: u8) {
self.array[(ch >> 3) as usize] |= 0b1 << (ch & 0b111)
}
/// Returns true if bit to true in given bit-map.
///
/// # Panics
/// Panics if `ch` index is out of bounds.
fn bit_at(&self, ch: u8) -> bool {
self.array[(ch >> 3) as usize] & (0b1 << (ch & 0b111)) != 0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn custom_quoter() {
let q = Quoter::new(b"", b"+");
assert_eq!(q.requote(b"/a%25c").unwrap(), b"/a%c");
assert_eq!(q.requote(b"/a%2Bc"), None);
let q = Quoter::new(b"%+", b"/");
assert_eq!(q.requote(b"/a%25b%2Bc").unwrap(), b"/a%b+c");
assert_eq!(q.requote(b"/a%2fb"), None);
assert_eq!(q.requote(b"/a%2Fb"), None);
assert_eq!(q.requote(b"/a%0Ab").unwrap(), b"/a\nb");
assert_eq!(q.requote(b"/a%FE\xffb").unwrap(), b"/a\xfe\xffb");
assert_eq!(q.requote(b"/a\xfe\xffb"), None);
}
#[test]
fn non_ascii() {
let q = Quoter::new(b"%+", b"/");
assert_eq!(q.requote(b"/a%FE\xffb").unwrap(), b"/a\xfe\xffb");
assert_eq!(q.requote(b"/a\xfe\xffb"), None);
}
#[test]
fn invalid_sequences() {
let q = Quoter::new(b"%+", b"/");
assert_eq!(q.requote(b"/a%2x%2X%%"), None);
assert_eq!(q.requote(b"/a%20%2X%%").unwrap(), b"/a %2X%%");
}
#[test]
fn quoter_no_modification() {
let q = Quoter::new(b"", b"");
assert_eq!(q.requote(b"/abc/../efg"), None);
}
}