1#![no_std]
69#[cfg(feature = "std")]
71extern crate std;
72#[macro_use]
73extern crate alloc;
74#[cfg(feature = "smallvec")]
75extern crate smallvec;
76
77pub mod data_source;
78pub mod deprecated;
79pub mod format_chars;
80pub mod level;
81pub mod utf16;
82
83mod char_data;
84mod explicit;
85mod implicit;
86mod prepare;
87
88pub use crate::char_data::{BidiClass, UNICODE_VERSION};
89pub use crate::data_source::BidiDataSource;
90pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL};
91pub use crate::prepare::{LevelRun, LevelRunVec};
92
93#[cfg(feature = "hardcoded-data")]
94pub use crate::char_data::{bidi_class, HardcodedBidiData};
95
96use alloc::borrow::Cow;
97use alloc::string::String;
98use alloc::vec::Vec;
99use core::char;
100use core::cmp;
101use core::iter::repeat;
102use core::ops::Range;
103use core::str::CharIndices;
104#[cfg(feature = "smallvec")]
105use smallvec::SmallVec;
106
107use crate::format_chars as chars;
108use crate::BidiClass::*;
109
110pub trait TextSource<'text>: private::Sealed {
115 type CharIter: Iterator<Item = char>;
116 type CharIndexIter: Iterator<Item = (usize, char)>;
117 type IndexLenIter: Iterator<Item = (usize, usize)>;
118
119 #[doc(hidden)]
121 fn len(&self) -> usize;
122
123 #[doc(hidden)]
127 fn char_at(&self, index: usize) -> Option<(char, usize)>;
128
129 #[doc(hidden)]
132 fn subrange(&self, range: Range<usize>) -> &Self;
133
134 #[doc(hidden)]
137 fn chars(&'text self) -> Self::CharIter;
138
139 #[doc(hidden)]
143 fn char_indices(&'text self) -> Self::CharIndexIter;
144
145 #[doc(hidden)]
149 fn indices_lengths(&'text self) -> Self::IndexLenIter;
150
151 #[doc(hidden)]
153 fn char_len(ch: char) -> usize;
154}
155
156mod private {
157 pub trait Sealed {}
158
159 impl Sealed for str {}
161 impl Sealed for [u16] {}
162}
163
164#[derive(PartialEq, Debug)]
165pub enum Direction {
166 Ltr,
167 Rtl,
168 Mixed,
169}
170
171#[derive(Clone, Debug, PartialEq)]
173pub struct ParagraphInfo {
174 pub range: Range<usize>,
178
179 pub level: Level,
183}
184
185impl ParagraphInfo {
186 pub fn len(&self) -> usize {
188 self.range.end - self.range.start
189 }
190}
191
192#[derive(PartialEq, Debug)]
196pub struct InitialInfo<'text> {
197 pub text: &'text str,
199
200 pub original_classes: Vec<BidiClass>,
203
204 pub paragraphs: Vec<ParagraphInfo>,
206}
207
208impl<'text> InitialInfo<'text> {
209 #[cfg_attr(feature = "flame_it", flamer::flame)]
219 #[cfg(feature = "hardcoded-data")]
220 pub fn new(text: &str, default_para_level: Option<Level>) -> InitialInfo<'_> {
221 Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
222 }
223
224 #[cfg_attr(feature = "flame_it", flamer::flame)]
234 pub fn new_with_data_source<'a, D: BidiDataSource>(
235 data_source: &D,
236 text: &'a str,
237 default_para_level: Option<Level>,
238 ) -> InitialInfo<'a> {
239 InitialInfoExt::new_with_data_source(data_source, text, default_para_level).base
240 }
241}
242
243#[derive(PartialEq, Debug)]
245struct InitialInfoExt<'text> {
246 base: InitialInfo<'text>,
248
249 flags: Vec<ParagraphInfoFlags>,
253}
254
255#[derive(PartialEq, Debug)]
256struct ParagraphInfoFlags {
257 is_pure_ltr: bool,
258 has_isolate_controls: bool,
259}
260
261impl<'text> InitialInfoExt<'text> {
262 #[cfg_attr(feature = "flame_it", flamer::flame)]
272 pub fn new_with_data_source<'a, D: BidiDataSource>(
273 data_source: &D,
274 text: &'a str,
275 default_para_level: Option<Level>,
276 ) -> InitialInfoExt<'a> {
277 let mut paragraphs = Vec::<ParagraphInfo>::new();
278 let mut flags = Vec::<ParagraphInfoFlags>::new();
279 let (original_classes, _, _, _) = compute_initial_info(
280 data_source,
281 text,
282 default_para_level,
283 Some((&mut paragraphs, &mut flags)),
284 );
285
286 InitialInfoExt {
287 base: InitialInfo {
288 text,
289 original_classes,
290 paragraphs,
291 },
292 flags,
293 }
294 }
295}
296
297fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
305 data_source: &D,
306 text: &'a T,
307 default_para_level: Option<Level>,
308 mut split_paragraphs: Option<(&mut Vec<ParagraphInfo>, &mut Vec<ParagraphInfoFlags>)>,
309) -> (Vec<BidiClass>, Level, bool, bool) {
310 let mut original_classes = Vec::with_capacity(text.len());
311
312 #[cfg(feature = "smallvec")]
314 let mut isolate_stack = SmallVec::<[usize; 8]>::new();
315 #[cfg(not(feature = "smallvec"))]
316 let mut isolate_stack = Vec::new();
317
318 debug_assert!(
319 if let Some((ref paragraphs, ref flags)) = split_paragraphs {
320 paragraphs.is_empty() && flags.is_empty()
321 } else {
322 true
323 }
324 );
325
326 let mut para_start = 0;
327 let mut para_level = default_para_level;
328
329 let mut is_pure_ltr = true;
332 let mut has_isolate_controls = false;
334
335 #[cfg(feature = "flame_it")]
336 flame::start("compute_initial_info(): iter text.char_indices()");
337
338 for (i, c) in text.char_indices() {
339 let class = data_source.bidi_class(c);
340
341 #[cfg(feature = "flame_it")]
342 flame::start("original_classes.extend()");
343
344 let len = T::char_len(c);
345 original_classes.extend(repeat(class).take(len));
346
347 #[cfg(feature = "flame_it")]
348 flame::end("original_classes.extend()");
349
350 match class {
351 B => {
352 if let Some((ref mut paragraphs, ref mut flags)) = split_paragraphs {
353 let para_end = i + len;
356 paragraphs.push(ParagraphInfo {
357 range: para_start..para_end,
358 level: para_level.unwrap_or(LTR_LEVEL),
360 });
361 flags.push(ParagraphInfoFlags {
362 is_pure_ltr,
363 has_isolate_controls,
364 });
365 para_start = para_end;
367 para_level = default_para_level;
371 is_pure_ltr = true;
372 has_isolate_controls = false;
373 isolate_stack.clear();
374 }
375 }
376
377 L | R | AL => {
378 if class != L {
379 is_pure_ltr = false;
380 }
381 match isolate_stack.last() {
382 Some(&start) => {
383 if original_classes[start] == FSI {
384 for j in 0..T::char_len(chars::FSI) {
387 original_classes[start + j] = if class == L { LRI } else { RLI };
388 }
389 }
390 }
391
392 None => {
393 if para_level.is_none() {
394 para_level = Some(if class != L { RTL_LEVEL } else { LTR_LEVEL });
398 }
399 }
400 }
401 }
402
403 AN | LRE | RLE | LRO | RLO => {
404 is_pure_ltr = false;
405 }
406
407 RLI | LRI | FSI => {
408 is_pure_ltr = false;
409 has_isolate_controls = true;
410 isolate_stack.push(i);
411 }
412
413 PDI => {
414 isolate_stack.pop();
415 }
416
417 _ => {}
418 }
419 }
420
421 if let Some((paragraphs, flags)) = split_paragraphs {
422 if para_start < text.len() {
423 paragraphs.push(ParagraphInfo {
424 range: para_start..text.len(),
425 level: para_level.unwrap_or(LTR_LEVEL),
426 });
427 flags.push(ParagraphInfoFlags {
428 is_pure_ltr,
429 has_isolate_controls,
430 });
431 }
432 debug_assert_eq!(paragraphs.len(), flags.len());
433 }
434 debug_assert_eq!(original_classes.len(), text.len());
435
436 #[cfg(feature = "flame_it")]
437 flame::end("compute_initial_info(): iter text.char_indices()");
438
439 (
440 original_classes,
441 para_level.unwrap_or(LTR_LEVEL),
442 is_pure_ltr,
443 has_isolate_controls,
444 )
445}
446
447#[derive(Debug, PartialEq)]
454pub struct BidiInfo<'text> {
455 pub text: &'text str,
457
458 pub original_classes: Vec<BidiClass>,
460
461 pub levels: Vec<Level>,
463
464 pub paragraphs: Vec<ParagraphInfo>,
469}
470
471impl<'text> BidiInfo<'text> {
472 #[cfg_attr(feature = "flame_it", flamer::flame)]
482 #[cfg(feature = "hardcoded-data")]
483 #[inline]
484 pub fn new(text: &str, default_para_level: Option<Level>) -> BidiInfo<'_> {
485 Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
486 }
487
488 #[cfg_attr(feature = "flame_it", flamer::flame)]
497 pub fn new_with_data_source<'a, D: BidiDataSource>(
498 data_source: &D,
499 text: &'a str,
500 default_para_level: Option<Level>,
501 ) -> BidiInfo<'a> {
502 let InitialInfoExt { base, flags, .. } =
503 InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
504
505 let mut levels = Vec::<Level>::with_capacity(text.len());
506 let mut processing_classes = base.original_classes.clone();
507
508 for (para, flags) in base.paragraphs.iter().zip(flags.iter()) {
509 let text = &text[para.range.clone()];
510 let original_classes = &base.original_classes[para.range.clone()];
511
512 compute_bidi_info_for_para(
513 data_source,
514 para,
515 flags.is_pure_ltr,
516 flags.has_isolate_controls,
517 text,
518 original_classes,
519 &mut processing_classes,
520 &mut levels,
521 );
522 }
523
524 BidiInfo {
525 text,
526 original_classes: base.original_classes,
527 paragraphs: base.paragraphs,
528 levels,
529 }
530 }
531
532 #[cfg_attr(feature = "flame_it", flamer::flame)]
546 pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range<usize>) -> Vec<Level> {
547 assert!(line.start <= self.levels.len());
548 assert!(line.end <= self.levels.len());
549
550 let mut levels = self.levels.clone();
551 let line_classes = &self.original_classes[line.clone()];
552 let line_levels = &mut levels[line.clone()];
553
554 reorder_levels(
555 line_classes,
556 line_levels,
557 self.text.subrange(line),
558 para.level,
559 );
560
561 levels
562 }
563
564 #[cfg_attr(feature = "flame_it", flamer::flame)]
578 pub fn reordered_levels_per_char(
579 &self,
580 para: &ParagraphInfo,
581 line: Range<usize>,
582 ) -> Vec<Level> {
583 let levels = self.reordered_levels(para, line);
584 self.text.char_indices().map(|(i, _)| levels[i]).collect()
585 }
586
587 #[cfg_attr(feature = "flame_it", flamer::flame)]
594 pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, str> {
595 if !level::has_rtl(&self.levels[line.clone()]) {
596 return self.text[line].into();
597 }
598 let (levels, runs) = self.visual_runs(para, line.clone());
599 reorder_line(self.text, line, levels, runs)
600 }
601
602 #[cfg_attr(feature = "flame_it", flamer::flame)]
638 #[inline]
639 pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
640 reorder_visual(levels)
641 }
642
643 #[cfg_attr(feature = "flame_it", flamer::flame)]
668 #[inline]
669 pub fn visual_runs(
670 &self,
671 para: &ParagraphInfo,
672 line: Range<usize>,
673 ) -> (Vec<Level>, Vec<LevelRun>) {
674 let levels = self.reordered_levels(para, line.clone());
675 visual_runs_for_line(levels, &line)
676 }
677
678 #[inline]
682 pub fn has_rtl(&self) -> bool {
683 level::has_rtl(&self.levels)
684 }
685}
686
687#[derive(Debug, PartialEq)]
693pub struct ParagraphBidiInfo<'text> {
694 pub text: &'text str,
696
697 pub original_classes: Vec<BidiClass>,
699
700 pub levels: Vec<Level>,
702
703 pub paragraph_level: Level,
705
706 pub is_pure_ltr: bool,
708}
709
710impl<'text> ParagraphBidiInfo<'text> {
711 #[cfg_attr(feature = "flame_it", flamer::flame)]
721 #[cfg(feature = "hardcoded-data")]
722 #[inline]
723 pub fn new(text: &str, default_para_level: Option<Level>) -> ParagraphBidiInfo<'_> {
724 Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
725 }
726
727 #[cfg_attr(feature = "flame_it", flamer::flame)]
734 pub fn new_with_data_source<'a, D: BidiDataSource>(
735 data_source: &D,
736 text: &'a str,
737 default_para_level: Option<Level>,
738 ) -> ParagraphBidiInfo<'a> {
739 let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) =
742 compute_initial_info(data_source, text, default_para_level, None);
743
744 let mut levels = Vec::<Level>::with_capacity(text.len());
745 let mut processing_classes = original_classes.clone();
746
747 let para_info = ParagraphInfo {
748 range: Range {
749 start: 0,
750 end: text.len(),
751 },
752 level: paragraph_level,
753 };
754
755 compute_bidi_info_for_para(
756 data_source,
757 ¶_info,
758 is_pure_ltr,
759 has_isolate_controls,
760 text,
761 &original_classes,
762 &mut processing_classes,
763 &mut levels,
764 );
765
766 ParagraphBidiInfo {
767 text,
768 original_classes,
769 levels,
770 paragraph_level,
771 is_pure_ltr,
772 }
773 }
774
775 #[cfg_attr(feature = "flame_it", flamer::flame)]
783 pub fn reordered_levels(&self, line: Range<usize>) -> Vec<Level> {
784 assert!(line.start <= self.levels.len());
785 assert!(line.end <= self.levels.len());
786
787 let mut levels = self.levels.clone();
788 let line_classes = &self.original_classes[line.clone()];
789 let line_levels = &mut levels[line.clone()];
790
791 reorder_levels(
792 line_classes,
793 line_levels,
794 self.text.subrange(line),
795 self.paragraph_level,
796 );
797
798 levels
799 }
800
801 #[cfg_attr(feature = "flame_it", flamer::flame)]
809 pub fn reordered_levels_per_char(&self, line: Range<usize>) -> Vec<Level> {
810 let levels = self.reordered_levels(line);
811 self.text.char_indices().map(|(i, _)| levels[i]).collect()
812 }
813
814 #[cfg_attr(feature = "flame_it", flamer::flame)]
820 pub fn reorder_line(&self, line: Range<usize>) -> Cow<'text, str> {
821 if !level::has_rtl(&self.levels[line.clone()]) {
822 return self.text[line].into();
823 }
824
825 let (levels, runs) = self.visual_runs(line.clone());
826
827 reorder_line(self.text, line, levels, runs)
828 }
829
830 #[cfg_attr(feature = "flame_it", flamer::flame)]
834 #[inline]
835 pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
836 reorder_visual(levels)
837 }
838
839 #[cfg_attr(feature = "flame_it", flamer::flame)]
847 #[inline]
848 pub fn visual_runs(&self, line: Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
849 let levels = self.reordered_levels(line.clone());
850 visual_runs_for_line(levels, &line)
851 }
852
853 #[inline]
857 pub fn has_rtl(&self) -> bool {
858 !self.is_pure_ltr
859 }
860
861 #[inline]
863 pub fn direction(&self) -> Direction {
864 para_direction(&self.levels)
865 }
866}
867
868fn reorder_line(
885 text: &str,
886 line: Range<usize>,
887 levels: Vec<Level>,
888 runs: Vec<LevelRun>,
889) -> Cow<'_, str> {
890 if runs.iter().all(|run| levels[run.start].is_ltr()) {
892 return text[line].into();
893 }
894
895 let mut result = String::with_capacity(line.len());
896 for run in runs {
897 if levels[run.start].is_rtl() {
898 result.extend(text[run].chars().rev());
899 } else {
900 result.push_str(&text[run]);
901 }
902 }
903 result.into()
904}
905
906fn visual_runs_for_line(levels: Vec<Level>, line: &Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
931 let mut runs = Vec::new();
933 let mut start = line.start;
934 let mut run_level = levels[start];
935 let mut min_level = run_level;
936 let mut max_level = run_level;
937
938 for (i, &new_level) in levels.iter().enumerate().take(line.end).skip(start + 1) {
939 if new_level != run_level {
940 runs.push(start..i);
942 start = i;
943 run_level = new_level;
944 min_level = cmp::min(run_level, min_level);
945 max_level = cmp::max(run_level, max_level);
946 }
947 }
948 runs.push(start..line.end);
949
950 let run_count = runs.len();
951
952 min_level = min_level.new_lowest_ge_rtl().expect("Level error");
957 while max_level >= min_level {
960 let mut seq_start = 0;
962 while seq_start < run_count {
963 if levels[runs[seq_start].start] < max_level {
964 seq_start += 1;
965 continue;
966 }
967
968 let mut seq_end = seq_start + 1;
970 while seq_end < run_count {
971 if levels[runs[seq_end].start] < max_level {
972 break;
973 }
974 seq_end += 1;
975 }
976 runs[seq_start..seq_end].reverse();
978
979 seq_start = seq_end;
980 }
981 max_level
982 .lower(1)
983 .expect("Lowering embedding level below zero");
984 }
985 (levels, runs)
986}
987
988fn reorder_visual(levels: &[Level]) -> Vec<usize> {
1003 fn next_range(levels: &[level::Level], mut start_index: usize, max: Level) -> Range<usize> {
1006 if levels.is_empty() || start_index >= levels.len() {
1007 return start_index..start_index;
1008 }
1009 while let Some(l) = levels.get(start_index) {
1010 if *l >= max {
1011 break;
1012 }
1013 start_index += 1;
1014 }
1015
1016 if levels.get(start_index).is_none() {
1017 return start_index..start_index;
1020 }
1021
1022 let mut end_index = start_index + 1;
1023 while let Some(l) = levels.get(end_index) {
1024 if *l < max {
1025 return start_index..end_index;
1026 }
1027 end_index += 1;
1028 }
1029
1030 start_index..end_index
1031 }
1032
1033 if levels.is_empty() {
1037 return vec![];
1038 }
1039
1040 let (mut min, mut max) = levels
1042 .iter()
1043 .fold((levels[0], levels[0]), |(min, max), &l| {
1044 (cmp::min(min, l), cmp::max(max, l))
1045 });
1046
1047 let mut result: Vec<usize> = (0..levels.len()).collect();
1049
1050 if min == max && min.is_ltr() {
1051 return result;
1053 }
1054
1055 min = min.new_lowest_ge_rtl().expect("Level error");
1058
1059 while min <= max {
1066 let mut range = 0..0;
1067 loop {
1068 range = next_range(levels, range.end, max);
1069 result[range.clone()].reverse();
1070
1071 if range.end >= levels.len() {
1072 break;
1073 }
1074 }
1075
1076 max.lower(1).expect("Level error");
1077 }
1078
1079 result
1080}
1081
1082fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
1085 data_source: &D,
1086 para: &ParagraphInfo,
1087 is_pure_ltr: bool,
1088 has_isolate_controls: bool,
1089 text: &'a T,
1090 original_classes: &[BidiClass],
1091 processing_classes: &mut [BidiClass],
1092 levels: &mut Vec<Level>,
1093) {
1094 let new_len = levels.len() + para.range.len();
1095 levels.resize(new_len, para.level);
1096 if para.level == LTR_LEVEL && is_pure_ltr {
1097 return;
1098 }
1099
1100 let processing_classes = &mut processing_classes[para.range.clone()];
1101 let levels = &mut levels[para.range.clone()];
1102 let mut level_runs = LevelRunVec::new();
1103
1104 explicit::compute(
1105 text,
1106 para.level,
1107 original_classes,
1108 levels,
1109 processing_classes,
1110 &mut level_runs,
1111 );
1112
1113 let mut sequences = prepare::IsolatingRunSequenceVec::new();
1114 prepare::isolating_run_sequences(
1115 para.level,
1116 original_classes,
1117 levels,
1118 level_runs,
1119 has_isolate_controls,
1120 &mut sequences,
1121 );
1122 for sequence in &sequences {
1123 implicit::resolve_weak(text, sequence, processing_classes);
1124 implicit::resolve_neutral(
1125 text,
1126 data_source,
1127 sequence,
1128 levels,
1129 original_classes,
1130 processing_classes,
1131 );
1132 }
1133
1134 implicit::resolve_levels(processing_classes, levels);
1135
1136 assign_levels_to_removed_chars(para.level, original_classes, levels);
1137}
1138
1139fn reorder_levels<'a, T: TextSource<'a> + ?Sized>(
1147 line_classes: &[BidiClass],
1148 line_levels: &mut [Level],
1149 line_text: &'a T,
1150 para_level: Level,
1151) {
1152 let mut reset_from: Option<usize> = Some(0);
1155 let mut reset_to: Option<usize> = None;
1156 let mut prev_level = para_level;
1157 for (i, c) in line_text.char_indices() {
1158 match line_classes[i] {
1159 B | S => {
1161 assert_eq!(reset_to, None);
1162 reset_to = Some(i + T::char_len(c));
1163 if reset_from.is_none() {
1164 reset_from = Some(i);
1165 }
1166 }
1167 WS | FSI | LRI | RLI | PDI => {
1169 if reset_from.is_none() {
1170 reset_from = Some(i);
1171 }
1172 }
1173 RLE | LRE | RLO | LRO | PDF | BN => {
1176 if reset_from.is_none() {
1177 reset_from = Some(i);
1178 }
1179 line_levels[i] = prev_level;
1181 }
1182 _ => {
1183 reset_from = None;
1184 }
1185 }
1186 if let (Some(from), Some(to)) = (reset_from, reset_to) {
1187 for level in &mut line_levels[from..to] {
1188 *level = para_level;
1189 }
1190 reset_from = None;
1191 reset_to = None;
1192 }
1193 prev_level = line_levels[i];
1194 }
1195 if let Some(from) = reset_from {
1196 for level in &mut line_levels[from..] {
1197 *level = para_level;
1198 }
1199 }
1200}
1201
1202#[derive(Debug)]
1206pub struct Paragraph<'a, 'text> {
1207 pub info: &'a BidiInfo<'text>,
1208 pub para: &'a ParagraphInfo,
1209}
1210
1211impl<'a, 'text> Paragraph<'a, 'text> {
1212 #[inline]
1213 pub fn new(info: &'a BidiInfo<'text>, para: &'a ParagraphInfo) -> Paragraph<'a, 'text> {
1214 Paragraph { info, para }
1215 }
1216
1217 #[inline]
1219 pub fn direction(&self) -> Direction {
1220 para_direction(&self.info.levels[self.para.range.clone()])
1221 }
1222
1223 #[inline]
1225 pub fn level_at(&self, pos: usize) -> Level {
1226 let actual_position = self.para.range.start + pos;
1227 self.info.levels[actual_position]
1228 }
1229}
1230
1231#[cfg_attr(feature = "flame_it", flamer::flame)]
1233fn para_direction(levels: &[Level]) -> Direction {
1234 let mut ltr = false;
1235 let mut rtl = false;
1236 for level in levels {
1237 if level.is_ltr() {
1238 ltr = true;
1239 if rtl {
1240 return Direction::Mixed;
1241 }
1242 }
1243
1244 if level.is_rtl() {
1245 rtl = true;
1246 if ltr {
1247 return Direction::Mixed;
1248 }
1249 }
1250 }
1251
1252 if ltr {
1253 return Direction::Ltr;
1254 }
1255
1256 Direction::Rtl
1257}
1258
1259#[cfg_attr(feature = "flame_it", flamer::flame)]
1264fn assign_levels_to_removed_chars(para_level: Level, classes: &[BidiClass], levels: &mut [Level]) {
1265 for i in 0..levels.len() {
1266 if prepare::removed_by_x9(classes[i]) {
1267 levels[i] = if i > 0 { levels[i - 1] } else { para_level };
1268 }
1269 }
1270}
1271
1272#[cfg(feature = "hardcoded-data")]
1289#[inline]
1290pub fn get_base_direction<'a, T: TextSource<'a> + ?Sized>(text: &'a T) -> Direction {
1291 get_base_direction_with_data_source(&HardcodedBidiData, text)
1292}
1293
1294#[cfg(feature = "hardcoded-data")]
1302#[inline]
1303pub fn get_base_direction_full<'a, T: TextSource<'a> + ?Sized>(text: &'a T) -> Direction {
1304 get_base_direction_full_with_data_source(&HardcodedBidiData, text)
1305}
1306
1307#[inline]
1308pub fn get_base_direction_with_data_source<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
1309 data_source: &D,
1310 text: &'a T,
1311) -> Direction {
1312 get_base_direction_impl(data_source, text, false)
1313}
1314
1315#[inline]
1316pub fn get_base_direction_full_with_data_source<
1317 'a,
1318 D: BidiDataSource,
1319 T: TextSource<'a> + ?Sized,
1320>(
1321 data_source: &D,
1322 text: &'a T,
1323) -> Direction {
1324 get_base_direction_impl(data_source, text, true)
1325}
1326
1327fn get_base_direction_impl<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
1328 data_source: &D,
1329 text: &'a T,
1330 use_full_text: bool,
1331) -> Direction {
1332 let mut isolate_level = 0;
1333 for c in text.chars() {
1334 match data_source.bidi_class(c) {
1335 LRI | RLI | FSI => isolate_level += 1,
1336 PDI if isolate_level > 0 => isolate_level -= 1,
1337 L if isolate_level == 0 => return Direction::Ltr,
1338 R | AL if isolate_level == 0 => return Direction::Rtl,
1339 B if !use_full_text => break,
1340 B if use_full_text => isolate_level = 0,
1341 _ => (),
1342 }
1343 }
1344 Direction::Mixed
1348}
1349
1350impl<'text> TextSource<'text> for str {
1352 type CharIter = core::str::Chars<'text>;
1353 type CharIndexIter = core::str::CharIndices<'text>;
1354 type IndexLenIter = Utf8IndexLenIter<'text>;
1355
1356 #[inline]
1357 fn len(&self) -> usize {
1358 (self as &str).len()
1359 }
1360 #[inline]
1361 fn char_at(&self, index: usize) -> Option<(char, usize)> {
1362 if let Some(slice) = self.get(index..) {
1363 if let Some(ch) = slice.chars().next() {
1364 return Some((ch, ch.len_utf8()));
1365 }
1366 }
1367 None
1368 }
1369 #[inline]
1370 fn subrange(&self, range: Range<usize>) -> &Self {
1371 &(self as &str)[range]
1372 }
1373 #[inline]
1374 fn chars(&'text self) -> Self::CharIter {
1375 (self as &str).chars()
1376 }
1377 #[inline]
1378 fn char_indices(&'text self) -> Self::CharIndexIter {
1379 (self as &str).char_indices()
1380 }
1381 #[inline]
1382 fn indices_lengths(&'text self) -> Self::IndexLenIter {
1383 Utf8IndexLenIter::new(self)
1384 }
1385 #[inline]
1386 fn char_len(ch: char) -> usize {
1387 ch.len_utf8()
1388 }
1389}
1390
1391#[derive(Debug)]
1393pub struct Utf8IndexLenIter<'text> {
1394 iter: CharIndices<'text>,
1395}
1396
1397impl<'text> Utf8IndexLenIter<'text> {
1398 #[inline]
1399 pub fn new(text: &'text str) -> Self {
1400 Utf8IndexLenIter {
1401 iter: text.char_indices(),
1402 }
1403 }
1404}
1405
1406impl Iterator for Utf8IndexLenIter<'_> {
1407 type Item = (usize, usize);
1408
1409 #[inline]
1410 fn next(&mut self) -> Option<Self::Item> {
1411 if let Some((pos, ch)) = self.iter.next() {
1412 return Some((pos, ch.len_utf8()));
1413 }
1414 None
1415 }
1416}
1417
1418#[cfg(test)]
1419fn to_utf16(s: &str) -> Vec<u16> {
1420 s.encode_utf16().collect()
1421}
1422
1423#[cfg(test)]
1424#[cfg(feature = "hardcoded-data")]
1425mod tests {
1426 use super::*;
1427
1428 use utf16::{
1429 BidiInfo as BidiInfoU16, InitialInfo as InitialInfoU16, Paragraph as ParagraphU16,
1430 ParagraphBidiInfo as ParagraphBidiInfoU16,
1431 };
1432
1433 #[test]
1434 fn test_utf16_text_source() {
1435 let text: &[u16] =
1436 &[0x41, 0xD801, 0xDC01, 0x20, 0xD800, 0x20, 0xDFFF, 0x20, 0xDC00, 0xD800];
1437 assert_eq!(text.char_at(0), Some(('A', 1)));
1438 assert_eq!(text.char_at(1), Some(('\u{10401}', 2)));
1439 assert_eq!(text.char_at(2), None);
1440 assert_eq!(text.char_at(3), Some((' ', 1)));
1441 assert_eq!(text.char_at(4), Some((char::REPLACEMENT_CHARACTER, 1)));
1442 assert_eq!(text.char_at(5), Some((' ', 1)));
1443 assert_eq!(text.char_at(6), Some((char::REPLACEMENT_CHARACTER, 1)));
1444 assert_eq!(text.char_at(7), Some((' ', 1)));
1445 assert_eq!(text.char_at(8), Some((char::REPLACEMENT_CHARACTER, 1)));
1446 assert_eq!(text.char_at(9), Some((char::REPLACEMENT_CHARACTER, 1)));
1447 assert_eq!(text.char_at(10), None);
1448 }
1449
1450 #[test]
1451 fn test_utf16_char_iter() {
1452 let text: &[u16] =
1453 &[0x41, 0xD801, 0xDC01, 0x20, 0xD800, 0x20, 0xDFFF, 0x20, 0xDC00, 0xD800];
1454 assert_eq!(text.len(), 10);
1455 assert_eq!(text.chars().count(), 9);
1456 let mut chars = text.chars();
1457 assert_eq!(chars.next(), Some('A'));
1458 assert_eq!(chars.next(), Some('\u{10401}'));
1459 assert_eq!(chars.next(), Some(' '));
1460 assert_eq!(chars.next(), Some('\u{FFFD}'));
1461 assert_eq!(chars.next(), Some(' '));
1462 assert_eq!(chars.next(), Some('\u{FFFD}'));
1463 assert_eq!(chars.next(), Some(' '));
1464 assert_eq!(chars.next(), Some('\u{FFFD}'));
1465 assert_eq!(chars.next(), Some('\u{FFFD}'));
1466 assert_eq!(chars.next(), None);
1467 }
1468
1469 #[test]
1470 fn test_initial_text_info() {
1471 let tests = vec![
1472 (
1473 "a1",
1475 vec![L, EN],
1477 vec![ParagraphInfo {
1479 range: 0..2,
1480 level: LTR_LEVEL,
1481 }],
1482 vec![L, EN],
1484 vec![ParagraphInfo {
1486 range: 0..2,
1487 level: LTR_LEVEL,
1488 }],
1489 ),
1490 (
1491 "\u{0639} \u{05D0}",
1493 vec![AL, AL, WS, R, R],
1494 vec![ParagraphInfo {
1495 range: 0..5,
1496 level: RTL_LEVEL,
1497 }],
1498 vec![AL, WS, R],
1499 vec![ParagraphInfo {
1500 range: 0..3,
1501 level: RTL_LEVEL,
1502 }],
1503 ),
1504 (
1505 "\u{10A00}\u{12000}\u{1E900}",
1507 vec![R, R, R, R, L, L, L, L, R, R, R, R],
1508 vec![ParagraphInfo {
1509 range: 0..12,
1510 level: RTL_LEVEL,
1511 }],
1512 vec![R, R, L, L, R, R],
1513 vec![ParagraphInfo {
1514 range: 0..6,
1515 level: RTL_LEVEL,
1516 }],
1517 ),
1518 (
1519 "a\u{2029}b",
1520 vec![L, B, B, B, L],
1521 vec![
1522 ParagraphInfo {
1523 range: 0..4,
1524 level: LTR_LEVEL,
1525 },
1526 ParagraphInfo {
1527 range: 4..5,
1528 level: LTR_LEVEL,
1529 },
1530 ],
1531 vec![L, B, L],
1532 vec![
1533 ParagraphInfo {
1534 range: 0..2,
1535 level: LTR_LEVEL,
1536 },
1537 ParagraphInfo {
1538 range: 2..3,
1539 level: LTR_LEVEL,
1540 },
1541 ],
1542 ),
1543 (
1544 "\u{2068}א\u{2069}a", vec![RLI, RLI, RLI, R, R, PDI, PDI, PDI, L],
1546 vec![ParagraphInfo {
1547 range: 0..9,
1548 level: LTR_LEVEL,
1549 }],
1550 vec![RLI, R, PDI, L],
1551 vec![ParagraphInfo {
1552 range: 0..4,
1553 level: LTR_LEVEL,
1554 }],
1555 ),
1556 ];
1557
1558 for t in tests {
1559 assert_eq!(
1560 InitialInfo::new(t.0, None),
1561 InitialInfo {
1562 text: t.0,
1563 original_classes: t.1,
1564 paragraphs: t.2,
1565 }
1566 );
1567 let text = &to_utf16(t.0);
1568 assert_eq!(
1569 InitialInfoU16::new(text, None),
1570 InitialInfoU16 {
1571 text,
1572 original_classes: t.3,
1573 paragraphs: t.4,
1574 }
1575 );
1576 }
1577 }
1578
1579 #[test]
1580 #[cfg(feature = "hardcoded-data")]
1581 fn test_process_text() {
1582 let tests = vec![
1583 (
1584 "",
1586 Some(RTL_LEVEL),
1588 Level::vec(&[]),
1590 vec![],
1592 vec![],
1594 Level::vec(&[]),
1596 vec![],
1598 vec![],
1600 ),
1601 (
1602 "abc123",
1604 Some(LTR_LEVEL),
1606 Level::vec(&[0, 0, 0, 0, 0, 0]),
1608 vec![L, L, L, EN, EN, EN],
1610 vec![ParagraphInfo {
1612 range: 0..6,
1613 level: LTR_LEVEL,
1614 }],
1615 Level::vec(&[0, 0, 0, 0, 0, 0]),
1617 vec![L, L, L, EN, EN, EN],
1619 vec![ParagraphInfo {
1621 range: 0..6,
1622 level: LTR_LEVEL,
1623 }],
1624 ),
1625 (
1626 "abc \u{05D0}\u{05D1}\u{05D2}",
1627 Some(LTR_LEVEL),
1628 Level::vec(&[0, 0, 0, 0, 1, 1, 1, 1, 1, 1]),
1629 vec![L, L, L, WS, R, R, R, R, R, R],
1630 vec![ParagraphInfo {
1631 range: 0..10,
1632 level: LTR_LEVEL,
1633 }],
1634 Level::vec(&[0, 0, 0, 0, 1, 1, 1]),
1635 vec![L, L, L, WS, R, R, R],
1636 vec![ParagraphInfo {
1637 range: 0..7,
1638 level: LTR_LEVEL,
1639 }],
1640 ),
1641 (
1642 "abc \u{05D0}\u{05D1}\u{05D2}",
1643 Some(RTL_LEVEL),
1644 Level::vec(&[2, 2, 2, 1, 1, 1, 1, 1, 1, 1]),
1645 vec![L, L, L, WS, R, R, R, R, R, R],
1646 vec![ParagraphInfo {
1647 range: 0..10,
1648 level: RTL_LEVEL,
1649 }],
1650 Level::vec(&[2, 2, 2, 1, 1, 1, 1]),
1651 vec![L, L, L, WS, R, R, R],
1652 vec![ParagraphInfo {
1653 range: 0..7,
1654 level: RTL_LEVEL,
1655 }],
1656 ),
1657 (
1658 "\u{05D0}\u{05D1}\u{05D2} abc",
1659 Some(LTR_LEVEL),
1660 Level::vec(&[1, 1, 1, 1, 1, 1, 0, 0, 0, 0]),
1661 vec![R, R, R, R, R, R, WS, L, L, L],
1662 vec![ParagraphInfo {
1663 range: 0..10,
1664 level: LTR_LEVEL,
1665 }],
1666 Level::vec(&[1, 1, 1, 0, 0, 0, 0]),
1667 vec![R, R, R, WS, L, L, L],
1668 vec![ParagraphInfo {
1669 range: 0..7,
1670 level: LTR_LEVEL,
1671 }],
1672 ),
1673 (
1674 "\u{05D0}\u{05D1}\u{05D2} abc",
1675 None,
1676 Level::vec(&[1, 1, 1, 1, 1, 1, 1, 2, 2, 2]),
1677 vec![R, R, R, R, R, R, WS, L, L, L],
1678 vec![ParagraphInfo {
1679 range: 0..10,
1680 level: RTL_LEVEL,
1681 }],
1682 Level::vec(&[1, 1, 1, 1, 2, 2, 2]),
1683 vec![R, R, R, WS, L, L, L],
1684 vec![ParagraphInfo {
1685 range: 0..7,
1686 level: RTL_LEVEL,
1687 }],
1688 ),
1689 (
1690 "\u{063A}2\u{0638} \u{05D0}2\u{05D2}",
1691 Some(LTR_LEVEL),
1692 Level::vec(&[1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1]),
1693 vec![AL, AL, EN, AL, AL, WS, R, R, EN, R, R],
1694 vec![ParagraphInfo {
1695 range: 0..11,
1696 level: LTR_LEVEL,
1697 }],
1698 Level::vec(&[1, 2, 1, 1, 1, 2, 1]),
1699 vec![AL, EN, AL, WS, R, EN, R],
1700 vec![ParagraphInfo {
1701 range: 0..7,
1702 level: LTR_LEVEL,
1703 }],
1704 ),
1705 (
1706 "a א.\nג",
1707 None,
1708 Level::vec(&[0, 0, 1, 1, 0, 0, 1, 1]),
1709 vec![L, WS, R, R, CS, B, R, R],
1710 vec![
1711 ParagraphInfo {
1712 range: 0..6,
1713 level: LTR_LEVEL,
1714 },
1715 ParagraphInfo {
1716 range: 6..8,
1717 level: RTL_LEVEL,
1718 },
1719 ],
1720 Level::vec(&[0, 0, 1, 0, 0, 1]),
1721 vec![L, WS, R, CS, B, R],
1722 vec![
1723 ParagraphInfo {
1724 range: 0..5,
1725 level: LTR_LEVEL,
1726 },
1727 ParagraphInfo {
1728 range: 5..6,
1729 level: RTL_LEVEL,
1730 },
1731 ],
1732 ),
1733 (
1735 "\u{060B}\u{20CF}\u{06F9}",
1736 None,
1737 Level::vec(&[1, 1, 1, 1, 1, 2, 2]),
1738 vec![AL, AL, ET, ET, ET, EN, EN],
1739 vec![ParagraphInfo {
1740 range: 0..7,
1741 level: RTL_LEVEL,
1742 }],
1743 Level::vec(&[1, 1, 2]),
1744 vec![AL, ET, EN],
1745 vec![ParagraphInfo {
1746 range: 0..3,
1747 level: RTL_LEVEL,
1748 }],
1749 ),
1750 ];
1751
1752 for t in tests {
1753 assert_eq!(
1754 BidiInfo::new(t.0, t.1),
1755 BidiInfo {
1756 text: t.0,
1757 levels: t.2.clone(),
1758 original_classes: t.3.clone(),
1759 paragraphs: t.4.clone(),
1760 }
1761 );
1762 if t.4.len() == 0 {
1764 assert_eq!(
1765 ParagraphBidiInfo::new(t.0, t.1),
1766 ParagraphBidiInfo {
1767 text: t.0,
1768 original_classes: t.3.clone(),
1769 levels: t.2.clone(),
1770 paragraph_level: RTL_LEVEL,
1771 is_pure_ltr: true,
1772 }
1773 )
1774 }
1775 if t.4.len() == 1 {
1777 assert_eq!(
1778 ParagraphBidiInfo::new(t.0, t.1),
1779 ParagraphBidiInfo {
1780 text: t.0,
1781 original_classes: t.3,
1782 levels: t.2.clone(),
1783 paragraph_level: t.4[0].level,
1784 is_pure_ltr: !level::has_rtl(&t.2),
1785 }
1786 )
1787 }
1788 let text = &to_utf16(t.0);
1789 assert_eq!(
1790 BidiInfoU16::new(text, t.1),
1791 BidiInfoU16 {
1792 text,
1793 levels: t.5.clone(),
1794 original_classes: t.6.clone(),
1795 paragraphs: t.7.clone(),
1796 }
1797 );
1798 if t.7.len() == 1 {
1799 assert_eq!(
1800 ParagraphBidiInfoU16::new(text, t.1),
1801 ParagraphBidiInfoU16 {
1802 text: text,
1803 original_classes: t.6.clone(),
1804 levels: t.5.clone(),
1805 paragraph_level: t.7[0].level,
1806 is_pure_ltr: !level::has_rtl(&t.5),
1807 }
1808 )
1809 }
1810 }
1811 }
1812
1813 #[test]
1814 #[cfg(feature = "hardcoded-data")]
1815 fn test_paragraph_bidi_info() {
1816 let tests = vec![
1820 (
1821 "a א.\nג",
1822 None,
1823 vec![L, WS, R, R, CS, B, R, R],
1825 Level::vec(&[0, 0, 1, 1, 1, 1, 1, 1]),
1826 vec![L, WS, R, CS, B, R],
1828 Level::vec(&[0, 0, 1, 1, 1, 1]),
1829 LTR_LEVEL,
1831 false,
1832 ),
1833 (
1834 "\u{5d1} a.\nb.",
1835 None,
1836 vec![R, R, WS, L, CS, B, L, CS],
1838 Level::vec(&[1, 1, 1, 2, 2, 2, 2, 1]),
1839 vec![R, WS, L, CS, B, L, CS],
1841 Level::vec(&[1, 1, 2, 2, 2, 2, 1]),
1842 RTL_LEVEL,
1844 false,
1845 ),
1846 (
1847 "a א.\tג",
1848 None,
1849 vec![L, WS, R, R, CS, S, R, R],
1851 Level::vec(&[0, 0, 1, 1, 1, 1, 1, 1]),
1852 vec![L, WS, R, CS, S, R],
1854 Level::vec(&[0, 0, 1, 1, 1, 1]),
1855 LTR_LEVEL,
1857 false,
1858 ),
1859 (
1860 "\u{5d1} a.\tb.",
1861 None,
1862 vec![R, R, WS, L, CS, S, L, CS],
1864 Level::vec(&[1, 1, 1, 2, 2, 2, 2, 1]),
1865 vec![R, WS, L, CS, S, L, CS],
1867 Level::vec(&[1, 1, 2, 2, 2, 2, 1]),
1868 RTL_LEVEL,
1870 false,
1871 ),
1872 ];
1873
1874 for t in tests {
1875 assert_eq!(
1876 ParagraphBidiInfo::new(t.0, t.1),
1877 ParagraphBidiInfo {
1878 text: t.0,
1879 original_classes: t.2,
1880 levels: t.3,
1881 paragraph_level: t.6,
1882 is_pure_ltr: t.7,
1883 }
1884 );
1885 let text = &to_utf16(t.0);
1886 assert_eq!(
1887 ParagraphBidiInfoU16::new(text, t.1),
1888 ParagraphBidiInfoU16 {
1889 text: text,
1890 original_classes: t.4,
1891 levels: t.5,
1892 paragraph_level: t.6,
1893 is_pure_ltr: t.7,
1894 }
1895 );
1896 }
1897 }
1898
1899 #[test]
1900 #[cfg(feature = "hardcoded-data")]
1901 fn test_bidi_info_has_rtl() {
1902 let tests = vec![
1903 ("123", None, false),
1905 ("123", Some(LTR_LEVEL), false),
1906 ("123", Some(RTL_LEVEL), false),
1907 ("abc", None, false),
1908 ("abc", Some(LTR_LEVEL), false),
1909 ("abc", Some(RTL_LEVEL), false),
1910 ("abc 123", None, false),
1911 ("abc\n123", None, false),
1912 ("\u{05D0}\u{05D1}\u{05BC}\u{05D2}", None, true),
1914 ("\u{05D0}\u{05D1}\u{05BC}\u{05D2}", Some(LTR_LEVEL), true),
1915 ("\u{05D0}\u{05D1}\u{05BC}\u{05D2}", Some(RTL_LEVEL), true),
1916 ("abc \u{05D0}\u{05D1}\u{05BC}\u{05D2}", None, true),
1917 ("abc\n\u{05D0}\u{05D1}\u{05BC}\u{05D2}", None, true),
1918 ("\u{05D0}\u{05D1}\u{05BC}\u{05D2} abc", None, true),
1919 ("\u{05D0}\u{05D1}\u{05BC}\u{05D2}\nabc", None, true),
1920 ("\u{05D0}\u{05D1}\u{05BC}\u{05D2} 123", None, true),
1921 ("\u{05D0}\u{05D1}\u{05BC}\u{05D2}\n123", None, true),
1922 ];
1923
1924 for t in tests {
1925 assert_eq!(BidiInfo::new(t.0, t.1).has_rtl(), t.2);
1926 assert_eq!(BidiInfoU16::new(&to_utf16(t.0), t.1).has_rtl(), t.2);
1927 }
1928 }
1929
1930 #[cfg(feature = "hardcoded-data")]
1931 fn reorder_paras(text: &str) -> Vec<Cow<'_, str>> {
1932 let bidi_info = BidiInfo::new(text, None);
1933 bidi_info
1934 .paragraphs
1935 .iter()
1936 .map(|para| bidi_info.reorder_line(para, para.range.clone()))
1937 .collect()
1938 }
1939
1940 #[cfg(feature = "hardcoded-data")]
1941 fn reorder_paras_u16(text: &[u16]) -> Vec<Cow<'_, [u16]>> {
1942 let bidi_info = BidiInfoU16::new(text, None);
1943 bidi_info
1944 .paragraphs
1945 .iter()
1946 .map(|para| bidi_info.reorder_line(para, para.range.clone()))
1947 .collect()
1948 }
1949
1950 #[test]
1951 #[cfg(feature = "hardcoded-data")]
1952 fn test_reorder_line() {
1953 let tests = vec![
1954 ("abc\ndef\nghi", vec!["abc\n", "def\n", "ghi"]),
1956 ("ab1\nde2\ngh3", vec!["ab1\n", "de2\n", "gh3"]),
1958 ("abc\nابج", vec!["abc\n", "جبا"]),
1960 (
1962 "\u{0627}\u{0628}\u{062C}\nabc",
1963 vec!["\n\u{062C}\u{0628}\u{0627}", "abc"],
1964 ),
1965 ("1.-2", vec!["1.-2"]),
1966 ("1-.2", vec!["1-.2"]),
1967 ("abc אבג", vec!["abc גבא"]),
1968 ("123 \u{05D0}\u{05D1}\u{05D2}", vec!["גבא 123"]),
1970 ("abc\u{202A}def", vec!["abc\u{202A}def"]),
1971 (
1972 "abc\u{202A}def\u{202C}ghi",
1973 vec!["abc\u{202A}def\u{202C}ghi"],
1974 ),
1975 (
1976 "abc\u{2066}def\u{2069}ghi",
1977 vec!["abc\u{2066}def\u{2069}ghi"],
1978 ),
1979 ("\u{202B}abc אבג\u{202C}", vec!["\u{202b}גבא abc\u{202c}"]),
1981 ("\u{05D0}בג? אבג", vec!["גבא ?גבא"]),
1983 ("A אבג?", vec!["A גבא?"]),
1985 ("A אבג?\u{200F}", vec!["A \u{200F}?גבא"]),
1987 ("\u{05D0}בג abc", vec!["abc גבא"]),
1988 ("abc\u{2067}.-\u{2069}ghi", vec!["abc\u{2067}-.\u{2069}ghi"]),
1989 (
1990 "Hello, \u{2068}\u{202E}world\u{202C}\u{2069}!",
1991 vec!["Hello, \u{2068}\u{202E}\u{202C}dlrow\u{2069}!"],
1992 ),
1993 ("\u{05D0}(ב)ג.", vec![".ג)ב(א"]),
1995 ("\u{05D0}ב(גד[&ef].)gh", vec!["gh).]ef&[דג(בא"]),
1997 ];
1998
1999 for t in tests {
2000 assert_eq!(reorder_paras(t.0), t.1);
2001 let expect_utf16 = t.1.iter().map(|v| to_utf16(v)).collect::<Vec<_>>();
2002 assert_eq!(reorder_paras_u16(&to_utf16(t.0)), expect_utf16);
2003 }
2004 }
2005
2006 fn reordered_levels_for_paras(text: &str) -> Vec<Vec<Level>> {
2007 let bidi_info = BidiInfo::new(text, None);
2008 bidi_info
2009 .paragraphs
2010 .iter()
2011 .map(|para| bidi_info.reordered_levels(para, para.range.clone()))
2012 .collect()
2013 }
2014
2015 fn reordered_levels_per_char_for_paras(text: &str) -> Vec<Vec<Level>> {
2016 let bidi_info = BidiInfo::new(text, None);
2017 bidi_info
2018 .paragraphs
2019 .iter()
2020 .map(|para| bidi_info.reordered_levels_per_char(para, para.range.clone()))
2021 .collect()
2022 }
2023
2024 fn reordered_levels_for_paras_u16(text: &[u16]) -> Vec<Vec<Level>> {
2025 let bidi_info = BidiInfoU16::new(text, None);
2026 bidi_info
2027 .paragraphs
2028 .iter()
2029 .map(|para| bidi_info.reordered_levels(para, para.range.clone()))
2030 .collect()
2031 }
2032
2033 fn reordered_levels_per_char_for_paras_u16(text: &[u16]) -> Vec<Vec<Level>> {
2034 let bidi_info = BidiInfoU16::new(text, None);
2035 bidi_info
2036 .paragraphs
2037 .iter()
2038 .map(|para| bidi_info.reordered_levels_per_char(para, para.range.clone()))
2039 .collect()
2040 }
2041
2042 #[test]
2043 #[cfg(feature = "hardcoded-data")]
2044 fn test_reordered_levels() {
2045 let tests = vec![
2046 (
2048 "\u{2067}\u{2069}",
2049 vec![Level::vec(&[0, 0, 0, 0, 0, 0])],
2050 vec![Level::vec(&[0, 0])],
2051 vec![Level::vec(&[0, 0])],
2052 ),
2053 (
2055 "\u{060B}\u{20CF}\u{06F9}",
2056 vec![Level::vec(&[1, 1, 1, 1, 1, 2, 2])],
2057 vec![Level::vec(&[1, 1, 2])],
2058 vec![Level::vec(&[1, 1, 2])],
2059 ),
2060 ];
2061
2062 for t in tests {
2063 assert_eq!(reordered_levels_for_paras(t.0), t.1);
2064 assert_eq!(reordered_levels_per_char_for_paras(t.0), t.2);
2065 let text = &to_utf16(t.0);
2066 assert_eq!(reordered_levels_for_paras_u16(text), t.3);
2067 assert_eq!(reordered_levels_per_char_for_paras_u16(text), t.2);
2068 }
2069
2070 let tests = vec![
2071 (
2073 "\u{0605}\u{2067}\u{202C}\u{0590}",
2074 vec![&["2", "2", "0", "0", "0", "x", "x", "x", "1", "1"]],
2075 vec![&["2", "0", "x", "1"]],
2076 vec![&["2", "0", "x", "1"]],
2077 ),
2078 ];
2079
2080 for t in tests {
2081 assert_eq!(reordered_levels_for_paras(t.0), t.1);
2082 assert_eq!(reordered_levels_per_char_for_paras(t.0), t.2);
2083 let text = &to_utf16(t.0);
2084 assert_eq!(reordered_levels_for_paras_u16(text), t.3);
2085 assert_eq!(reordered_levels_per_char_for_paras_u16(text), t.2);
2086 }
2087
2088 let text = "aa טֶ";
2089 let bidi_info = BidiInfo::new(text, None);
2090 assert_eq!(
2091 bidi_info.reordered_levels(&bidi_info.paragraphs[0], 3..7),
2092 Level::vec(&[0, 0, 0, 1, 1, 1, 1]),
2093 );
2094
2095 let text = &to_utf16(text);
2096 let bidi_info = BidiInfoU16::new(text, None);
2097 assert_eq!(
2098 bidi_info.reordered_levels(&bidi_info.paragraphs[0], 1..4),
2099 Level::vec(&[0, 0, 0, 1, 1]),
2100 );
2101 }
2102
2103 #[test]
2104 fn test_paragraph_info_len() {
2105 let text = "hello world";
2106 let bidi_info = BidiInfo::new(text, None);
2107 assert_eq!(bidi_info.paragraphs.len(), 1);
2108 assert_eq!(bidi_info.paragraphs[0].len(), text.len());
2109
2110 let text2 = "How are you";
2111 let whole_text = format!("{}\n{}", text, text2);
2112 let bidi_info = BidiInfo::new(&whole_text, None);
2113 assert_eq!(bidi_info.paragraphs.len(), 2);
2114
2115 assert_eq!(bidi_info.paragraphs[0].len(), text.len() + 1);
2119 assert_eq!(bidi_info.paragraphs[1].len(), text2.len());
2120
2121 let text = &to_utf16(text);
2122 let bidi_info = BidiInfoU16::new(text, None);
2123 assert_eq!(bidi_info.paragraphs.len(), 1);
2124 assert_eq!(bidi_info.paragraphs[0].len(), text.len());
2125
2126 let text2 = &to_utf16(text2);
2127 let whole_text = &to_utf16(&whole_text);
2128 let bidi_info = BidiInfoU16::new(&whole_text, None);
2129 assert_eq!(bidi_info.paragraphs.len(), 2);
2130
2131 assert_eq!(bidi_info.paragraphs[0].len(), text.len() + 1);
2132 assert_eq!(bidi_info.paragraphs[1].len(), text2.len());
2133 }
2134
2135 #[test]
2136 fn test_direction() {
2137 let ltr_text = "hello world";
2138 let rtl_text = "أهلا بكم";
2139 let all_paragraphs = format!("{}\n{}\n{}{}", ltr_text, rtl_text, ltr_text, rtl_text);
2140 let bidi_info = BidiInfo::new(&all_paragraphs, None);
2141 assert_eq!(bidi_info.paragraphs.len(), 3);
2142 let p_ltr = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]);
2143 let p_rtl = Paragraph::new(&bidi_info, &bidi_info.paragraphs[1]);
2144 let p_mixed = Paragraph::new(&bidi_info, &bidi_info.paragraphs[2]);
2145 assert_eq!(p_ltr.direction(), Direction::Ltr);
2146 assert_eq!(p_rtl.direction(), Direction::Rtl);
2147 assert_eq!(p_mixed.direction(), Direction::Mixed);
2148
2149 let all_paragraphs = &to_utf16(&all_paragraphs);
2150 let bidi_info = BidiInfoU16::new(&all_paragraphs, None);
2151 assert_eq!(bidi_info.paragraphs.len(), 3);
2152 let p_ltr = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[0]);
2153 let p_rtl = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[1]);
2154 let p_mixed = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[2]);
2155 assert_eq!(p_ltr.direction(), Direction::Ltr);
2156 assert_eq!(p_rtl.direction(), Direction::Rtl);
2157 assert_eq!(p_mixed.direction(), Direction::Mixed);
2158 }
2159
2160 #[test]
2161 fn test_edge_cases_direction() {
2162 let empty = "";
2164 let bidi_info = BidiInfo::new(empty, Option::from(RTL_LEVEL));
2165 assert_eq!(bidi_info.paragraphs.len(), 0);
2166
2167 let empty = &to_utf16(empty);
2168 let bidi_info = BidiInfoU16::new(empty, Option::from(RTL_LEVEL));
2169 assert_eq!(bidi_info.paragraphs.len(), 0);
2170
2171 let tests = vec![
2172 ("\n", None, Direction::Ltr),
2175 ("\n", Option::from(LTR_LEVEL), Direction::Ltr),
2178 ("\n", Option::from(RTL_LEVEL), Direction::Rtl),
2181 ];
2182
2183 for t in tests {
2184 let bidi_info = BidiInfo::new(t.0, t.1);
2185 assert_eq!(bidi_info.paragraphs.len(), 1);
2186 let p = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]);
2187 assert_eq!(p.direction(), t.2);
2188 let text = &to_utf16(t.0);
2189 let bidi_info = BidiInfoU16::new(text, t.1);
2190 let p = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[0]);
2191 assert_eq!(p.direction(), t.2);
2192 }
2193 }
2194
2195 #[test]
2196 fn test_level_at() {
2197 let ltr_text = "hello world";
2198 let rtl_text = "أهلا بكم";
2199 let all_paragraphs = format!("{}\n{}\n{}{}", ltr_text, rtl_text, ltr_text, rtl_text);
2200 let bidi_info = BidiInfo::new(&all_paragraphs, None);
2201 assert_eq!(bidi_info.paragraphs.len(), 3);
2202
2203 let p_ltr = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]);
2204 let p_rtl = Paragraph::new(&bidi_info, &bidi_info.paragraphs[1]);
2205 let p_mixed = Paragraph::new(&bidi_info, &bidi_info.paragraphs[2]);
2206
2207 assert_eq!(p_ltr.level_at(0), LTR_LEVEL);
2208 assert_eq!(p_rtl.level_at(0), RTL_LEVEL);
2209 assert_eq!(p_mixed.level_at(0), LTR_LEVEL);
2210 assert_eq!(p_mixed.info.levels.len(), 54);
2211 assert_eq!(p_mixed.para.range.start, 28);
2212 assert_eq!(p_mixed.level_at(ltr_text.len()), RTL_LEVEL);
2213
2214 let all_paragraphs = &to_utf16(&all_paragraphs);
2215 let bidi_info = BidiInfoU16::new(&all_paragraphs, None);
2216 assert_eq!(bidi_info.paragraphs.len(), 3);
2217
2218 let p_ltr = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[0]);
2219 let p_rtl = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[1]);
2220 let p_mixed = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[2]);
2221
2222 assert_eq!(p_ltr.level_at(0), LTR_LEVEL);
2223 assert_eq!(p_rtl.level_at(0), RTL_LEVEL);
2224 assert_eq!(p_mixed.level_at(0), LTR_LEVEL);
2225 assert_eq!(p_mixed.info.levels.len(), 40);
2226 assert_eq!(p_mixed.para.range.start, 21);
2227 assert_eq!(p_mixed.level_at(ltr_text.len()), RTL_LEVEL);
2228 }
2229
2230 #[test]
2231 fn test_get_base_direction() {
2232 let tests = vec![
2233 ("", Direction::Mixed), ("123[]-+\u{2019}\u{2060}\u{00bf}?", Direction::Mixed),
2235 ("3.14\npi", Direction::Mixed), ("[123 'abc']", Direction::Ltr),
2237 ("[123 '\u{0628}' abc", Direction::Rtl),
2238 ("[123 '\u{2066}abc\u{2069}'\u{0628}]", Direction::Rtl), ("[123 '\u{2066}abc\u{2068}'\u{0628}]", Direction::Mixed),
2240 ];
2241
2242 for t in tests {
2243 assert_eq!(get_base_direction(t.0), t.1);
2244 let text = &to_utf16(t.0);
2245 assert_eq!(get_base_direction(text.as_slice()), t.1);
2246 }
2247 }
2248
2249 #[test]
2250 fn test_get_base_direction_full() {
2251 let tests = vec![
2252 ("", Direction::Mixed), ("123[]-+\u{2019}\u{2060}\u{00bf}?", Direction::Mixed),
2254 ("3.14\npi", Direction::Ltr), ("3.14\n\u{05D0}", Direction::Rtl), ("[123 'abc']", Direction::Ltr),
2257 ("[123 '\u{0628}' abc", Direction::Rtl),
2258 ("[123 '\u{2066}abc\u{2069}'\u{0628}]", Direction::Rtl), ("[123 '\u{2066}abc\u{2068}'\u{0628}]", Direction::Mixed),
2260 ("[123 '\u{2066}abc\u{2068}'\n\u{0628}]", Direction::Rtl), ];
2262
2263 for t in tests {
2264 assert_eq!(get_base_direction_full(t.0), t.1);
2265 let text = &to_utf16(t.0);
2266 assert_eq!(get_base_direction_full(text.as_slice()), t.1);
2267 }
2268 }
2269}
2270
2271#[cfg(all(feature = "serde", feature = "hardcoded-data", test))]
2272mod serde_tests {
2273 use super::*;
2274 use serde_test::{assert_tokens, Token};
2275
2276 #[test]
2277 fn test_levels() {
2278 let text = "abc אבג";
2279 let bidi_info = BidiInfo::new(text, None);
2280 let levels = bidi_info.levels;
2281 assert_eq!(text.as_bytes().len(), 10);
2282 assert_eq!(levels.len(), 10);
2283 assert_tokens(
2284 &levels,
2285 &[
2286 Token::Seq { len: Some(10) },
2287 Token::NewtypeStruct { name: "Level" },
2288 Token::U8(0),
2289 Token::NewtypeStruct { name: "Level" },
2290 Token::U8(0),
2291 Token::NewtypeStruct { name: "Level" },
2292 Token::U8(0),
2293 Token::NewtypeStruct { name: "Level" },
2294 Token::U8(0),
2295 Token::NewtypeStruct { name: "Level" },
2296 Token::U8(1),
2297 Token::NewtypeStruct { name: "Level" },
2298 Token::U8(1),
2299 Token::NewtypeStruct { name: "Level" },
2300 Token::U8(1),
2301 Token::NewtypeStruct { name: "Level" },
2302 Token::U8(1),
2303 Token::NewtypeStruct { name: "Level" },
2304 Token::U8(1),
2305 Token::NewtypeStruct { name: "Level" },
2306 Token::U8(1),
2307 Token::SeqEnd,
2308 ],
2309 );
2310 }
2311}