1use crate::enc::floatX;
2
3fn parse_as_utf8(input: &[u8], size: usize) -> (usize, i32) {
4 if (input[0] & 0x80) == 0 {
5 if input[0] > 0 {
6 return (1, i32::from(input[0]));
7 }
8 }
9 if size > 1 && (input[0] & 0xe0) == 0xc0 && (input[1] & 0xc0) == 0x80 {
10 let symbol = (input[0] as i32 & 0x1f) << 6 | input[1] as i32 & 0x3f;
11 if symbol > 0x7f {
12 return (2, symbol);
13 }
14 }
15 if size > 2
16 && (input[0] & 0xf0) == 0xe0
17 && (input[1] & 0xc0) == 0x80
18 && (input[2] & 0xc0) == 0x80
19 {
20 let symbol = (i32::from(input[0]) & 0x0f) << 12
21 | (i32::from(input[1]) & 0x3f) << 6
22 | i32::from(input[2]) & 0x3f;
23 if symbol > 0x7ff {
24 return (3, symbol);
25 }
26 }
27 if size > 3
28 && (input[0] & 0xf8) == 0xf0
29 && (input[1] & 0xc0) == 0x80
30 && (input[2] & 0xc0) == 0x80
31 && (input[3] & 0xc0) == 0x80
32 {
33 let symbol = (i32::from(input[0]) & 0x07) << 18
34 | (i32::from(input[1]) & 0x3f) << 12
35 | (i32::from(input[2]) & 0x3f) << 6
36 | i32::from(input[3]) & 0x3f;
37 if symbol > 0xffff && symbol <= 0x10_ffff {
38 return (4, symbol);
39 }
40 }
41
42 (1, 0x11_0000 | i32::from(input[0]))
43}
44
45pub(crate) fn is_mostly_utf8(
46 data: &[u8],
47 pos: usize,
48 mask: usize,
49 length: usize,
50 min_fraction: floatX,
51) -> bool {
52 let mut size_utf8: usize = 0;
53 let mut i: usize = 0;
54 while i < length {
55 let (bytes_read, symbol) = parse_as_utf8(&data[(pos.wrapping_add(i) & mask)..], length - i);
56 i = i.wrapping_add(bytes_read);
57 if symbol < 0x11_0000 {
58 size_utf8 = size_utf8.wrapping_add(bytes_read);
59 }
60 }
61 size_utf8 as floatX > min_fraction * length as floatX
62}