1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
use regex_syntax::{ast, hir};
use crate::{nfa, util::search::MatchError, PatternID};
/// An error that occurs when construction of a `Regex` fails.
///
/// A build error is generally a result of one of two possible failure
/// modes. First is a parse or syntax error in the concrete syntax of a
/// pattern. Second is that the construction of the underlying regex matcher
/// fails, usually because it gets too big with respect to limits like
/// [`Config::nfa_size_limit`](crate::meta::Config::nfa_size_limit).
///
/// This error provides very little introspection capabilities. You can:
///
/// * Ask for the [`PatternID`] of the pattern that caused an error, if one
/// is available. This is available for things like syntax errors, but not for
/// cases where build limits are exceeded.
/// * Ask for the underlying syntax error, but only if the error is a syntax
/// error.
/// * Ask for a human readable message corresponding to the underlying error.
/// * The `BuildError::source` method (from the `std::error::Error`
/// trait implementation) may be used to query for an underlying error if one
/// exists. There are no API guarantees about which error is returned.
///
/// When the `std` feature is enabled, this implements `std::error::Error`.
#[derive(Clone, Debug)]
pub struct BuildError {
kind: BuildErrorKind,
}
#[derive(Clone, Debug)]
enum BuildErrorKind {
Syntax { pid: PatternID, err: regex_syntax::Error },
NFA(nfa::thompson::BuildError),
}
impl BuildError {
/// If it is known which pattern ID caused this build error to occur, then
/// this method returns it.
///
/// Some errors are not associated with a particular pattern. However, any
/// errors that occur as part of parsing a pattern are guaranteed to be
/// associated with a pattern ID.
///
/// # Example
///
/// ```
/// use regex_automata::{meta::Regex, PatternID};
///
/// let err = Regex::new_many(&["a", "b", r"\p{Foo}", "c"]).unwrap_err();
/// assert_eq!(Some(PatternID::must(2)), err.pattern());
/// ```
pub fn pattern(&self) -> Option<PatternID> {
match self.kind {
BuildErrorKind::Syntax { pid, .. } => Some(pid),
_ => None,
}
}
/// If this error occurred because the regex exceeded the configured size
/// limit before being built, then this returns the configured size limit.
///
/// The limit returned is what was configured, and corresponds to the
/// maximum amount of heap usage in bytes.
pub fn size_limit(&self) -> Option<usize> {
match self.kind {
BuildErrorKind::NFA(ref err) => err.size_limit(),
_ => None,
}
}
/// If this error corresponds to a syntax error, then a reference to it is
/// returned by this method.
pub fn syntax_error(&self) -> Option<®ex_syntax::Error> {
match self.kind {
BuildErrorKind::Syntax { ref err, .. } => Some(err),
_ => None,
}
}
pub(crate) fn ast(pid: PatternID, err: ast::Error) -> BuildError {
let err = regex_syntax::Error::from(err);
BuildError { kind: BuildErrorKind::Syntax { pid, err } }
}
pub(crate) fn hir(pid: PatternID, err: hir::Error) -> BuildError {
let err = regex_syntax::Error::from(err);
BuildError { kind: BuildErrorKind::Syntax { pid, err } }
}
pub(crate) fn nfa(err: nfa::thompson::BuildError) -> BuildError {
BuildError { kind: BuildErrorKind::NFA(err) }
}
}
#[cfg(feature = "std")]
impl std::error::Error for BuildError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self.kind {
BuildErrorKind::Syntax { ref err, .. } => Some(err),
BuildErrorKind::NFA(ref err) => Some(err),
}
}
}
impl core::fmt::Display for BuildError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self.kind {
BuildErrorKind::Syntax { pid, .. } => {
write!(f, "error parsing pattern {}", pid.as_usize())
}
BuildErrorKind::NFA(_) => write!(f, "error building NFA"),
}
}
}
/// An error that occurs when a search should be retried.
///
/// This retry error distinguishes between two different failure modes.
///
/// The first is one where potential quadratic behavior has been detected.
/// In this case, whatever optimization that led to this behavior should be
/// stopped, and the next best strategy should be used.
///
/// The second indicates that the underlying regex engine has failed for some
/// reason. This usually occurs because either a lazy DFA's cache has become
/// ineffective or because a non-ASCII byte has been seen *and* a Unicode word
/// boundary was used in one of the patterns. In this failure case, a different
/// regex engine that won't fail in these ways (PikeVM, backtracker or the
/// one-pass DFA) should be used.
///
/// This is an internal error only and should never bleed into the public
/// API.
#[derive(Debug)]
pub(crate) enum RetryError {
Quadratic(RetryQuadraticError),
Fail(RetryFailError),
}
#[cfg(feature = "std")]
impl std::error::Error for RetryError {}
impl core::fmt::Display for RetryError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match *self {
RetryError::Quadratic(ref err) => err.fmt(f),
RetryError::Fail(ref err) => err.fmt(f),
}
}
}
impl From<MatchError> for RetryError {
fn from(merr: MatchError) -> RetryError {
RetryError::Fail(RetryFailError::from(merr))
}
}
/// An error that occurs when potential quadratic behavior has been detected
/// when applying either the "reverse suffix" or "reverse inner" optimizations.
///
/// When this error occurs, callers should abandon the "reverse" optimization
/// and use a normal forward search.
#[derive(Debug)]
pub(crate) struct RetryQuadraticError(());
impl RetryQuadraticError {
pub(crate) fn new() -> RetryQuadraticError {
RetryQuadraticError(())
}
}
#[cfg(feature = "std")]
impl std::error::Error for RetryQuadraticError {}
impl core::fmt::Display for RetryQuadraticError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "regex engine gave up to avoid quadratic behavior")
}
}
impl From<RetryQuadraticError> for RetryError {
fn from(err: RetryQuadraticError) -> RetryError {
RetryError::Quadratic(err)
}
}
/// An error that occurs when a regex engine "gives up" for some reason before
/// finishing a search. Usually this occurs because of heuristic Unicode word
/// boundary support or because of ineffective cache usage in the lazy DFA.
///
/// When this error occurs, callers should retry the regex search with a
/// different regex engine.
///
/// Note that this has convenient `From` impls that will automatically
/// convert a `MatchError` into this error. This works because the meta
/// regex engine internals guarantee that errors like `HaystackTooLong` and
/// `UnsupportedAnchored` will never occur. The only errors left are `Quit` and
/// `GaveUp`, which both correspond to this "failure" error.
#[derive(Debug)]
pub(crate) struct RetryFailError {
offset: usize,
}
impl RetryFailError {
pub(crate) fn from_offset(offset: usize) -> RetryFailError {
RetryFailError { offset }
}
}
#[cfg(feature = "std")]
impl std::error::Error for RetryFailError {}
impl core::fmt::Display for RetryFailError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "regex engine failed at offset {:?}", self.offset)
}
}
impl From<RetryFailError> for RetryError {
fn from(err: RetryFailError) -> RetryError {
RetryError::Fail(err)
}
}
impl From<MatchError> for RetryFailError {
fn from(merr: MatchError) -> RetryFailError {
use crate::util::search::MatchErrorKind::*;
match *merr.kind() {
Quit { offset, .. } => RetryFailError::from_offset(offset),
GaveUp { offset } => RetryFailError::from_offset(offset),
// These can never occur because we avoid them by construction
// or with higher level control flow logic. For example, the
// backtracker's wrapper will never hand out a backtracker engine
// when the haystack would be too long.
HaystackTooLong { .. } | UnsupportedAnchored { .. } => {
unreachable!("found impossible error in meta engine: {}", merr)
}
}
}
}