regex/regexset/
bytes.rs

1use alloc::string::String;
2
3use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter};
4
5use crate::{bytes::RegexSetBuilder, Error};
6
7/// Match multiple, possibly overlapping, regexes in a single search.
8///
9/// A regex set corresponds to the union of zero or more regular expressions.
10/// That is, a regex set will match a haystack when at least one of its
11/// constituent regexes matches. A regex set as its formulated here provides a
12/// touch more power: it will also report *which* regular expressions in the
13/// set match. Indeed, this is the key difference between regex sets and a
14/// single `Regex` with many alternates, since only one alternate can match at
15/// a time.
16///
17/// For example, consider regular expressions to match email addresses and
18/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a
19/// regex set is constructed from those regexes, then searching the haystack
20/// `foo@example.com` will report both regexes as matching. Of course, one
21/// could accomplish this by compiling each regex on its own and doing two
22/// searches over the haystack. The key advantage of using a regex set is
23/// that it will report the matching regexes using a *single pass through the
24/// haystack*. If one has hundreds or thousands of regexes to match repeatedly
25/// (like a URL router for a complex web application or a user agent matcher),
26/// then a regex set *can* realize huge performance gains.
27///
28/// Unlike the top-level [`RegexSet`](crate::RegexSet), this `RegexSet`
29/// searches haystacks with type `&[u8]` instead of `&str`. Consequently, this
30/// `RegexSet` is permitted to match invalid UTF-8.
31///
32/// # Limitations
33///
34/// Regex sets are limited to answering the following two questions:
35///
36/// 1. Does any regex in the set match?
37/// 2. If so, which regexes in the set match?
38///
39/// As with the main [`Regex`][crate::bytes::Regex] type, it is cheaper to ask
40/// (1) instead of (2) since the matching engines can stop after the first
41/// match is found.
42///
43/// You cannot directly extract [`Match`][crate::bytes::Match] or
44/// [`Captures`][crate::bytes::Captures] objects from a regex set. If you need
45/// these operations, the recommended approach is to compile each pattern in
46/// the set independently and scan the exact same haystack a second time with
47/// those independently compiled patterns:
48///
49/// ```
50/// use regex::bytes::{Regex, RegexSet};
51///
52/// let patterns = ["foo", "bar"];
53/// // Both patterns will match different ranges of this string.
54/// let hay = b"barfoo";
55///
56/// // Compile a set matching any of our patterns.
57/// let set = RegexSet::new(patterns).unwrap();
58/// // Compile each pattern independently.
59/// let regexes: Vec<_> = set
60///     .patterns()
61///     .iter()
62///     .map(|pat| Regex::new(pat).unwrap())
63///     .collect();
64///
65/// // Match against the whole set first and identify the individual
66/// // matching patterns.
67/// let matches: Vec<&[u8]> = set
68///     .matches(hay)
69///     .into_iter()
70///     // Dereference the match index to get the corresponding
71///     // compiled pattern.
72///     .map(|index| &regexes[index])
73///     // To get match locations or any other info, we then have to search the
74///     // exact same haystack again, using our separately-compiled pattern.
75///     .map(|re| re.find(hay).unwrap().as_bytes())
76///     .collect();
77///
78/// // Matches arrive in the order the constituent patterns were declared,
79/// // not the order they appear in the haystack.
80/// assert_eq!(vec![&b"foo"[..], &b"bar"[..]], matches);
81/// ```
82///
83/// # Performance
84///
85/// A `RegexSet` has the same performance characteristics as `Regex`. Namely,
86/// search takes `O(m * n)` time, where `m` is proportional to the size of the
87/// regex set and `n` is proportional to the length of the haystack.
88///
89/// # Trait implementations
90///
91/// The `Default` trait is implemented for `RegexSet`. The default value
92/// is an empty set. An empty set can also be explicitly constructed via
93/// [`RegexSet::empty`].
94///
95/// # Example
96///
97/// This shows how the above two regexes (for matching email addresses and
98/// domains) might work:
99///
100/// ```
101/// use regex::bytes::RegexSet;
102///
103/// let set = RegexSet::new(&[
104///     r"[a-z]+@[a-z]+\.(com|org|net)",
105///     r"[a-z]+\.(com|org|net)",
106/// ]).unwrap();
107///
108/// // Ask whether any regexes in the set match.
109/// assert!(set.is_match(b"foo@example.com"));
110///
111/// // Identify which regexes in the set match.
112/// let matches: Vec<_> = set.matches(b"foo@example.com").into_iter().collect();
113/// assert_eq!(vec![0, 1], matches);
114///
115/// // Try again, but with a haystack that only matches one of the regexes.
116/// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect();
117/// assert_eq!(vec![1], matches);
118///
119/// // Try again, but with a haystack that doesn't match any regex in the set.
120/// let matches: Vec<_> = set.matches(b"example").into_iter().collect();
121/// assert!(matches.is_empty());
122/// ```
123///
124/// Note that it would be possible to adapt the above example to using `Regex`
125/// with an expression like:
126///
127/// ```text
128/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net))
129/// ```
130///
131/// After a match, one could then inspect the capture groups to figure out
132/// which alternates matched. The problem is that it is hard to make this
133/// approach scale when there are many regexes since the overlap between each
134/// alternate isn't always obvious to reason about.
135#[derive(Clone)]
136pub struct RegexSet {
137    pub(crate) meta: meta::Regex,
138    pub(crate) patterns: alloc::sync::Arc<[String]>,
139}
140
141impl RegexSet {
142    /// Create a new regex set with the given regular expressions.
143    ///
144    /// This takes an iterator of `S`, where `S` is something that can produce
145    /// a `&str`. If any of the strings in the iterator are not valid regular
146    /// expressions, then an error is returned.
147    ///
148    /// # Example
149    ///
150    /// Create a new regex set from an iterator of strings:
151    ///
152    /// ```
153    /// use regex::bytes::RegexSet;
154    ///
155    /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap();
156    /// assert!(set.is_match(b"foo"));
157    /// ```
158    pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error>
159    where
160        S: AsRef<str>,
161        I: IntoIterator<Item = S>,
162    {
163        RegexSetBuilder::new(exprs).build()
164    }
165
166    /// Create a new empty regex set.
167    ///
168    /// An empty regex never matches anything.
169    ///
170    /// This is a convenience function for `RegexSet::new([])`, but doesn't
171    /// require one to specify the type of the input.
172    ///
173    /// # Example
174    ///
175    /// ```
176    /// use regex::bytes::RegexSet;
177    ///
178    /// let set = RegexSet::empty();
179    /// assert!(set.is_empty());
180    /// // an empty set matches nothing
181    /// assert!(!set.is_match(b""));
182    /// ```
183    pub fn empty() -> RegexSet {
184        let empty: [&str; 0] = [];
185        RegexSetBuilder::new(empty).build().unwrap()
186    }
187
188    /// Returns true if and only if one of the regexes in this set matches
189    /// the haystack given.
190    ///
191    /// This method should be preferred if you only need to test whether any
192    /// of the regexes in the set should match, but don't care about *which*
193    /// regexes matched. This is because the underlying matching engine will
194    /// quit immediately after seeing the first match instead of continuing to
195    /// find all matches.
196    ///
197    /// Note that as with searches using [`Regex`](crate::bytes::Regex), the
198    /// expression is unanchored by default. That is, if the regex does not
199    /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted
200    /// to match anywhere in the haystack.
201    ///
202    /// # Example
203    ///
204    /// Tests whether a set matches somewhere in a haystack:
205    ///
206    /// ```
207    /// use regex::bytes::RegexSet;
208    ///
209    /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap();
210    /// assert!(set.is_match(b"foo"));
211    /// assert!(!set.is_match("☃".as_bytes()));
212    /// ```
213    #[inline]
214    pub fn is_match(&self, haystack: &[u8]) -> bool {
215        self.is_match_at(haystack, 0)
216    }
217
218    /// Returns true if and only if one of the regexes in this set matches the
219    /// haystack given, with the search starting at the offset given.
220    ///
221    /// The significance of the starting point is that it takes the surrounding
222    /// context into consideration. For example, the `\A` anchor can only
223    /// match when `start == 0`.
224    ///
225    /// # Panics
226    ///
227    /// This panics when `start >= haystack.len() + 1`.
228    ///
229    /// # Example
230    ///
231    /// This example shows the significance of `start`. Namely, consider a
232    /// haystack `foobar` and a desire to execute a search starting at offset
233    /// `3`. You could search a substring explicitly, but then the look-around
234    /// assertions won't work correctly. Instead, you can use this method to
235    /// specify the start position of a search.
236    ///
237    /// ```
238    /// use regex::bytes::RegexSet;
239    ///
240    /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap();
241    /// let hay = b"foobar";
242    /// // We get a match here, but it's probably not intended.
243    /// assert!(set.is_match(&hay[3..]));
244    /// // No match because the  assertions take the context into account.
245    /// assert!(!set.is_match_at(hay, 3));
246    /// ```
247    #[inline]
248    pub fn is_match_at(&self, haystack: &[u8], start: usize) -> bool {
249        self.meta.is_match(Input::new(haystack).span(start..haystack.len()))
250    }
251
252    /// Returns the set of regexes that match in the given haystack.
253    ///
254    /// The set returned contains the index of each regex that matches in
255    /// the given haystack. The index is in correspondence with the order of
256    /// regular expressions given to `RegexSet`'s constructor.
257    ///
258    /// The set can also be used to iterate over the matched indices. The order
259    /// of iteration is always ascending with respect to the matching indices.
260    ///
261    /// Note that as with searches using [`Regex`](crate::bytes::Regex), the
262    /// expression is unanchored by default. That is, if the regex does not
263    /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted
264    /// to match anywhere in the haystack.
265    ///
266    /// # Example
267    ///
268    /// Tests which regular expressions match the given haystack:
269    ///
270    /// ```
271    /// use regex::bytes::RegexSet;
272    ///
273    /// let set = RegexSet::new([
274    ///     r"\w+",
275    ///     r"\d+",
276    ///     r"\pL+",
277    ///     r"foo",
278    ///     r"bar",
279    ///     r"barfoo",
280    ///     r"foobar",
281    /// ]).unwrap();
282    /// let matches: Vec<_> = set.matches(b"foobar").into_iter().collect();
283    /// assert_eq!(matches, vec![0, 2, 3, 4, 6]);
284    ///
285    /// // You can also test whether a particular regex matched:
286    /// let matches = set.matches(b"foobar");
287    /// assert!(!matches.matched(5));
288    /// assert!(matches.matched(6));
289    /// ```
290    #[inline]
291    pub fn matches(&self, haystack: &[u8]) -> SetMatches {
292        self.matches_at(haystack, 0)
293    }
294
295    /// Returns the set of regexes that match in the given haystack.
296    ///
297    /// The set returned contains the index of each regex that matches in
298    /// the given haystack. The index is in correspondence with the order of
299    /// regular expressions given to `RegexSet`'s constructor.
300    ///
301    /// The set can also be used to iterate over the matched indices. The order
302    /// of iteration is always ascending with respect to the matching indices.
303    ///
304    /// The significance of the starting point is that it takes the surrounding
305    /// context into consideration. For example, the `\A` anchor can only
306    /// match when `start == 0`.
307    ///
308    /// # Panics
309    ///
310    /// This panics when `start >= haystack.len() + 1`.
311    ///
312    /// # Example
313    ///
314    /// Tests which regular expressions match the given haystack:
315    ///
316    /// ```
317    /// use regex::bytes::RegexSet;
318    ///
319    /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap();
320    /// let hay = b"foobar";
321    /// // We get matches here, but it's probably not intended.
322    /// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect();
323    /// assert_eq!(matches, vec![0, 1]);
324    /// // No matches because the  assertions take the context into account.
325    /// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect();
326    /// assert_eq!(matches, vec![]);
327    /// ```
328    #[inline]
329    pub fn matches_at(&self, haystack: &[u8], start: usize) -> SetMatches {
330        let input = Input::new(haystack).span(start..haystack.len());
331        let mut patset = PatternSet::new(self.meta.pattern_len());
332        self.meta.which_overlapping_matches(&input, &mut patset);
333        SetMatches(patset)
334    }
335
336    /// Returns the same as matches, but starts the search at the given
337    /// offset and stores the matches into the slice given.
338    ///
339    /// The significance of the starting point is that it takes the surrounding
340    /// context into consideration. For example, the `\A` anchor can only
341    /// match when `start == 0`.
342    ///
343    /// `matches` must have a length that is at least the number of regexes
344    /// in this set.
345    ///
346    /// This method returns true if and only if at least one member of
347    /// `matches` is true after executing the set against `haystack`.
348    #[doc(hidden)]
349    #[inline]
350    pub fn matches_read_at(
351        &self,
352        matches: &mut [bool],
353        haystack: &[u8],
354        start: usize,
355    ) -> bool {
356        // This is pretty dumb. We should try to fix this, but the
357        // regex-automata API doesn't provide a way to store matches in an
358        // arbitrary &mut [bool]. Thankfully, this API is doc(hidden) and
359        // thus not public... But regex-capi currently uses it. We should
360        // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet
361        // is in regex-automata, not regex. So maybe we should just accept a
362        // 'SetMatches', which is basically just a newtype around PatternSet.
363        let mut patset = PatternSet::new(self.meta.pattern_len());
364        let mut input = Input::new(haystack);
365        input.set_start(start);
366        self.meta.which_overlapping_matches(&input, &mut patset);
367        for pid in patset.iter() {
368            matches[pid] = true;
369        }
370        !patset.is_empty()
371    }
372
373    /// An alias for `matches_read_at` to preserve backward compatibility.
374    ///
375    /// The `regex-capi` crate used this method, so to avoid breaking that
376    /// crate, we continue to export it as an undocumented API.
377    #[doc(hidden)]
378    #[inline]
379    pub fn read_matches_at(
380        &self,
381        matches: &mut [bool],
382        haystack: &[u8],
383        start: usize,
384    ) -> bool {
385        self.matches_read_at(matches, haystack, start)
386    }
387
388    /// Returns the total number of regexes in this set.
389    ///
390    /// # Example
391    ///
392    /// ```
393    /// use regex::bytes::RegexSet;
394    ///
395    /// assert_eq!(0, RegexSet::empty().len());
396    /// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len());
397    /// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len());
398    /// ```
399    #[inline]
400    pub fn len(&self) -> usize {
401        self.meta.pattern_len()
402    }
403
404    /// Returns `true` if this set contains no regexes.
405    ///
406    /// # Example
407    ///
408    /// ```
409    /// use regex::bytes::RegexSet;
410    ///
411    /// assert!(RegexSet::empty().is_empty());
412    /// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty());
413    /// ```
414    #[inline]
415    pub fn is_empty(&self) -> bool {
416        self.meta.pattern_len() == 0
417    }
418
419    /// Returns the regex patterns that this regex set was constructed from.
420    ///
421    /// This function can be used to determine the pattern for a match. The
422    /// slice returned has exactly as many patterns givens to this regex set,
423    /// and the order of the slice is the same as the order of the patterns
424    /// provided to the set.
425    ///
426    /// # Example
427    ///
428    /// ```
429    /// use regex::bytes::RegexSet;
430    ///
431    /// let set = RegexSet::new(&[
432    ///     r"\w+",
433    ///     r"\d+",
434    ///     r"\pL+",
435    ///     r"foo",
436    ///     r"bar",
437    ///     r"barfoo",
438    ///     r"foobar",
439    /// ]).unwrap();
440    /// let matches: Vec<_> = set
441    ///     .matches(b"foobar")
442    ///     .into_iter()
443    ///     .map(|index| &set.patterns()[index])
444    ///     .collect();
445    /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]);
446    /// ```
447    #[inline]
448    pub fn patterns(&self) -> &[String] {
449        &self.patterns
450    }
451}
452
453impl Default for RegexSet {
454    fn default() -> Self {
455        RegexSet::empty()
456    }
457}
458
459/// A set of matches returned by a regex set.
460///
461/// Values of this type are constructed by [`RegexSet::matches`].
462#[derive(Clone, Debug)]
463pub struct SetMatches(PatternSet);
464
465impl SetMatches {
466    /// Whether this set contains any matches.
467    ///
468    /// # Example
469    ///
470    /// ```
471    /// use regex::bytes::RegexSet;
472    ///
473    /// let set = RegexSet::new(&[
474    ///     r"[a-z]+@[a-z]+\.(com|org|net)",
475    ///     r"[a-z]+\.(com|org|net)",
476    /// ]).unwrap();
477    /// let matches = set.matches(b"foo@example.com");
478    /// assert!(matches.matched_any());
479    /// ```
480    #[inline]
481    pub fn matched_any(&self) -> bool {
482        !self.0.is_empty()
483    }
484
485    /// Whether all patterns in this set matched.
486    ///
487    /// # Example
488    ///
489    /// ```
490    /// use regex::bytes::RegexSet;
491    ///
492    /// let set = RegexSet::new(&[
493    ///     r"^foo",
494    ///     r"[a-z]+\.com",
495    /// ]).unwrap();
496    /// let matches = set.matches(b"foo.example.com");
497    /// assert!(matches.matched_all());
498    /// ```
499    pub fn matched_all(&self) -> bool {
500        self.0.is_full()
501    }
502
503    /// Whether the regex at the given index matched.
504    ///
505    /// The index for a regex is determined by its insertion order upon the
506    /// initial construction of a `RegexSet`, starting at `0`.
507    ///
508    /// # Panics
509    ///
510    /// If `index` is greater than or equal to the number of regexes in the
511    /// original set that produced these matches. Equivalently, when `index`
512    /// is greater than or equal to [`SetMatches::len`].
513    ///
514    /// # Example
515    ///
516    /// ```
517    /// use regex::bytes::RegexSet;
518    ///
519    /// let set = RegexSet::new([
520    ///     r"[a-z]+@[a-z]+\.(com|org|net)",
521    ///     r"[a-z]+\.(com|org|net)",
522    /// ]).unwrap();
523    /// let matches = set.matches(b"example.com");
524    /// assert!(!matches.matched(0));
525    /// assert!(matches.matched(1));
526    /// ```
527    #[inline]
528    pub fn matched(&self, index: usize) -> bool {
529        self.0.contains(PatternID::new_unchecked(index))
530    }
531
532    /// The total number of regexes in the set that created these matches.
533    ///
534    /// **WARNING:** This always returns the same value as [`RegexSet::len`].
535    /// In particular, it does *not* return the number of elements yielded by
536    /// [`SetMatches::iter`]. The only way to determine the total number of
537    /// matched regexes is to iterate over them.
538    ///
539    /// # Example
540    ///
541    /// Notice that this method returns the total number of regexes in the
542    /// original set, and *not* the total number of regexes that matched.
543    ///
544    /// ```
545    /// use regex::bytes::RegexSet;
546    ///
547    /// let set = RegexSet::new([
548    ///     r"[a-z]+@[a-z]+\.(com|org|net)",
549    ///     r"[a-z]+\.(com|org|net)",
550    /// ]).unwrap();
551    /// let matches = set.matches(b"example.com");
552    /// // Total number of patterns that matched.
553    /// assert_eq!(1, matches.iter().count());
554    /// // Total number of patterns in the set.
555    /// assert_eq!(2, matches.len());
556    /// ```
557    #[inline]
558    pub fn len(&self) -> usize {
559        self.0.capacity()
560    }
561
562    /// Returns an iterator over the indices of the regexes that matched.
563    ///
564    /// This will always produces matches in ascending order, where the index
565    /// yielded corresponds to the index of the regex that matched with respect
566    /// to its position when initially building the set.
567    ///
568    /// # Example
569    ///
570    /// ```
571    /// use regex::bytes::RegexSet;
572    ///
573    /// let set = RegexSet::new([
574    ///     r"[0-9]",
575    ///     r"[a-z]",
576    ///     r"[A-Z]",
577    ///     r"\p{Greek}",
578    /// ]).unwrap();
579    /// let hay = "βa1".as_bytes();
580    /// let matches: Vec<_> = set.matches(hay).iter().collect();
581    /// assert_eq!(matches, vec![0, 1, 3]);
582    /// ```
583    ///
584    /// Note that `SetMatches` also implements the `IntoIterator` trait, so
585    /// this method is not always needed. For example:
586    ///
587    /// ```
588    /// use regex::bytes::RegexSet;
589    ///
590    /// let set = RegexSet::new([
591    ///     r"[0-9]",
592    ///     r"[a-z]",
593    ///     r"[A-Z]",
594    ///     r"\p{Greek}",
595    /// ]).unwrap();
596    /// let hay = "βa1".as_bytes();
597    /// let mut matches = vec![];
598    /// for index in set.matches(hay) {
599    ///     matches.push(index);
600    /// }
601    /// assert_eq!(matches, vec![0, 1, 3]);
602    /// ```
603    #[inline]
604    pub fn iter(&self) -> SetMatchesIter<'_> {
605        SetMatchesIter(self.0.iter())
606    }
607}
608
609impl IntoIterator for SetMatches {
610    type IntoIter = SetMatchesIntoIter;
611    type Item = usize;
612
613    fn into_iter(self) -> Self::IntoIter {
614        let it = 0..self.0.capacity();
615        SetMatchesIntoIter { patset: self.0, it }
616    }
617}
618
619impl<'a> IntoIterator for &'a SetMatches {
620    type IntoIter = SetMatchesIter<'a>;
621    type Item = usize;
622
623    fn into_iter(self) -> Self::IntoIter {
624        self.iter()
625    }
626}
627
628/// An owned iterator over the set of matches from a regex set.
629///
630/// This will always produces matches in ascending order of index, where the
631/// index corresponds to the index of the regex that matched with respect to
632/// its position when initially building the set.
633///
634/// This iterator is created by calling `SetMatches::into_iter` via the
635/// `IntoIterator` trait. This is automatically done in `for` loops.
636///
637/// # Example
638///
639/// ```
640/// use regex::bytes::RegexSet;
641///
642/// let set = RegexSet::new([
643///     r"[0-9]",
644///     r"[a-z]",
645///     r"[A-Z]",
646///     r"\p{Greek}",
647/// ]).unwrap();
648/// let hay = "βa1".as_bytes();
649/// let mut matches = vec![];
650/// for index in set.matches(hay) {
651///     matches.push(index);
652/// }
653/// assert_eq!(matches, vec![0, 1, 3]);
654/// ```
655#[derive(Debug)]
656pub struct SetMatchesIntoIter {
657    patset: PatternSet,
658    it: core::ops::Range<usize>,
659}
660
661impl Iterator for SetMatchesIntoIter {
662    type Item = usize;
663
664    fn next(&mut self) -> Option<usize> {
665        loop {
666            let id = self.it.next()?;
667            if self.patset.contains(PatternID::new_unchecked(id)) {
668                return Some(id);
669            }
670        }
671    }
672
673    fn size_hint(&self) -> (usize, Option<usize>) {
674        self.it.size_hint()
675    }
676}
677
678impl DoubleEndedIterator for SetMatchesIntoIter {
679    fn next_back(&mut self) -> Option<usize> {
680        loop {
681            let id = self.it.next_back()?;
682            if self.patset.contains(PatternID::new_unchecked(id)) {
683                return Some(id);
684            }
685        }
686    }
687}
688
689impl core::iter::FusedIterator for SetMatchesIntoIter {}
690
691/// A borrowed iterator over the set of matches from a regex set.
692///
693/// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that
694/// created this iterator.
695///
696/// This will always produces matches in ascending order, where the index
697/// corresponds to the index of the regex that matched with respect to its
698/// position when initially building the set.
699///
700/// This iterator is created by the [`SetMatches::iter`] method.
701#[derive(Clone, Debug)]
702pub struct SetMatchesIter<'a>(PatternSetIter<'a>);
703
704impl<'a> Iterator for SetMatchesIter<'a> {
705    type Item = usize;
706
707    fn next(&mut self) -> Option<usize> {
708        self.0.next().map(|pid| pid.as_usize())
709    }
710
711    fn size_hint(&self) -> (usize, Option<usize>) {
712        self.0.size_hint()
713    }
714}
715
716impl<'a> DoubleEndedIterator for SetMatchesIter<'a> {
717    fn next_back(&mut self) -> Option<usize> {
718        self.0.next_back().map(|pid| pid.as_usize())
719    }
720}
721
722impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {}
723
724impl core::fmt::Debug for RegexSet {
725    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
726        write!(f, "RegexSet({:?})", self.patterns())
727    }
728}
regex/regexset/bytes.rs

regex/regexset/
bytes.rs