brotli/enc/
utf8_util.rs

1use crate::enc::floatX;
2
3fn parse_as_utf8(input: &[u8], size: usize) -> (usize, i32) {
4    if (input[0] & 0x80) == 0 {
5        if input[0] > 0 {
6            return (1, i32::from(input[0]));
7        }
8    }
9    if size > 1 && (input[0] & 0xe0) == 0xc0 && (input[1] & 0xc0) == 0x80 {
10        let symbol = (input[0] as i32 & 0x1f) << 6 | input[1] as i32 & 0x3f;
11        if symbol > 0x7f {
12            return (2, symbol);
13        }
14    }
15    if size > 2
16        && (input[0] & 0xf0) == 0xe0
17        && (input[1] & 0xc0) == 0x80
18        && (input[2] & 0xc0) == 0x80
19    {
20        let symbol = (i32::from(input[0]) & 0x0f) << 12
21            | (i32::from(input[1]) & 0x3f) << 6
22            | i32::from(input[2]) & 0x3f;
23        if symbol > 0x7ff {
24            return (3, symbol);
25        }
26    }
27    if size > 3
28        && (input[0] & 0xf8) == 0xf0
29        && (input[1] & 0xc0) == 0x80
30        && (input[2] & 0xc0) == 0x80
31        && (input[3] & 0xc0) == 0x80
32    {
33        let symbol = (i32::from(input[0]) & 0x07) << 18
34            | (i32::from(input[1]) & 0x3f) << 12
35            | (i32::from(input[2]) & 0x3f) << 6
36            | i32::from(input[3]) & 0x3f;
37        if symbol > 0xffff && symbol <= 0x10_ffff {
38            return (4, symbol);
39        }
40    }
41
42    (1, 0x11_0000 | i32::from(input[0]))
43}
44
45pub(crate) fn is_mostly_utf8(
46    data: &[u8],
47    pos: usize,
48    mask: usize,
49    length: usize,
50    min_fraction: floatX,
51) -> bool {
52    let mut size_utf8: usize = 0;
53    let mut i: usize = 0;
54    while i < length {
55        let (bytes_read, symbol) = parse_as_utf8(&data[(pos.wrapping_add(i) & mask)..], length - i);
56        i = i.wrapping_add(bytes_read);
57        if symbol < 0x11_0000 {
58            size_utf8 = size_utf8.wrapping_add(bytes_read);
59        }
60    }
61    size_utf8 as floatX > min_fraction * length as floatX
62}