public class RegExp
extends java.lang.Object
Automaton
.
Regular expressions are built from the following abstract syntax:
regexp | ::= | unionexp | ||
| | ||||
unionexp | ::= | interexp | unionexp | (union) | |
| | interexp | |||
interexp | ::= | concatexp & interexp | (intersection) | [OPTIONAL] |
| | concatexp | |||
concatexp | ::= | repeatexp concatexp | (concatenation) | |
| | repeatexp | |||
repeatexp | ::= | repeatexp ? | (zero or one occurrence) | |
| | repeatexp * | (zero or more occurrences) | ||
| | repeatexp + | (one or more occurrences) | ||
| | repeatexp {n} | (n occurrences) | ||
| | repeatexp {n,} | (n or more occurrences) | ||
| | repeatexp {n,m} | (n to m occurrences, including both) | ||
| | complexp | |||
complexp | ::= | ~ complexp | (complement) | [OPTIONAL] |
| | charclassexp | |||
charclassexp | ::= | [ charclasses ] | (character class) | |
| | [^ charclasses ] | (negated character class) | ||
| | simpleexp | |||
charclasses | ::= | charclass charclasses | ||
| | charclass | |||
charclass | ::= | charexp - charexp | (character range, including end-points) | |
| | charexp | |||
simpleexp | ::= | charexp | ||
| | . | (any single character) | ||
| | # | (the empty language) | [OPTIONAL] | |
| | @ | (any string) | [OPTIONAL] | |
| | " <Unicode string without double-quotes> " | (a string) | ||
| | ( ) | (the empty string) | ||
| | ( unionexp ) | (precedence override) | ||
| | < <identifier> > | (named automaton) | [OPTIONAL] | |
| | <n-m> | (numerical interval) | [OPTIONAL] | |
charexp | ::= | <Unicode character> | (a single non-reserved character) | |
| | \ <Unicode character> | (a single character) |
The productions marked [OPTIONAL] are only allowed if
specified by the syntax flags passed to the RegExp
constructor.
The reserved characters used in the (enabled) syntax must be escaped with
backslash (\) or double-quotes ("..."). (In
contrast to other regexp syntaxes, this is required also in character
classes.) Be aware that dash (-) has a special meaning in
charclass expressions. An identifier is a string not containing right
angle bracket (>) or dash (-). Numerical
intervals are specified by non-negative decimal integers and include both end
points, and if n and m have the same number
of digits, then the conforming strings must have that length (i.e. prefixed
by 0's).
Modifier and Type | Class and Description |
---|---|
(package private) static class |
RegExp.Kind |
Modifier and Type | Field and Description |
---|---|
static int |
ALL
Syntax flag, enables all optional regexp syntax.
|
static int |
ANYSTRING
Syntax flag, enables anystring (@).
|
static int |
AUTOMATON
Syntax flag, enables named automata (<identifier>).
|
(package private) int |
c |
static int |
COMPLEMENT
Syntax flag, enables complement (~).
|
(package private) int |
digits |
static int |
EMPTY
Syntax flag, enables empty language (#).
|
(package private) RegExp |
exp1 |
(package private) RegExp |
exp2 |
(package private) int |
flags |
(package private) int |
from |
static int |
INTERSECTION
Syntax flag, enables intersection (&).
|
static int |
INTERVAL
Syntax flag, enables numerical intervals (
<n-m>).
|
(package private) RegExp.Kind |
kind |
(package private) int |
max |
(package private) int |
min |
static int |
NONE
Syntax flag, enables no optional regexp syntax.
|
private java.lang.String |
originalString |
(package private) int |
pos |
(package private) java.lang.String |
s |
(package private) int |
to |
Constructor and Description |
---|
RegExp() |
RegExp(java.lang.String s)
Constructs new
RegExp from a string. |
RegExp(java.lang.String s,
int syntax_flags)
Constructs new
RegExp from a string. |
Modifier and Type | Method and Description |
---|---|
private boolean |
check(int flag) |
private void |
findLeaves(RegExp exp,
RegExp.Kind kind,
java.util.List<Automaton> list,
java.util.Map<java.lang.String,Automaton> automata,
AutomatonProvider automaton_provider,
int maxDeterminizedStates) |
java.util.Set<java.lang.String> |
getIdentifiers()
Returns set of automaton identifiers that occur in this regular expression.
|
(package private) void |
getIdentifiers(java.util.Set<java.lang.String> set) |
java.lang.String |
getOriginalString()
The string that was used to construct the regex.
|
(package private) static RegExp |
makeAnyChar() |
(package private) static RegExp |
makeAnyString() |
(package private) static RegExp |
makeAutomaton(java.lang.String s) |
(package private) static RegExp |
makeChar(int c) |
(package private) static RegExp |
makeCharRange(int from,
int to) |
(package private) static RegExp |
makeComplement(RegExp exp) |
(package private) static RegExp |
makeConcatenation(RegExp exp1,
RegExp exp2) |
(package private) static RegExp |
makeEmpty() |
(package private) static RegExp |
makeIntersection(RegExp exp1,
RegExp exp2) |
(package private) static RegExp |
makeInterval(int min,
int max,
int digits) |
(package private) static RegExp |
makeOptional(RegExp exp) |
(package private) static RegExp |
makeRepeat(RegExp exp) |
(package private) static RegExp |
makeRepeat(RegExp exp,
int min) |
(package private) static RegExp |
makeRepeat(RegExp exp,
int min,
int max) |
private static RegExp |
makeString(RegExp exp1,
RegExp exp2) |
(package private) static RegExp |
makeString(java.lang.String s) |
(package private) static RegExp |
makeUnion(RegExp exp1,
RegExp exp2) |
private boolean |
match(int c) |
private boolean |
more() |
private int |
next() |
(package private) RegExp |
parseCharClass() |
(package private) RegExp |
parseCharClasses() |
(package private) RegExp |
parseCharClassExp() |
(package private) int |
parseCharExp() |
(package private) RegExp |
parseComplExp() |
(package private) RegExp |
parseConcatExp() |
(package private) RegExp |
parseInterExp() |
(package private) RegExp |
parseRepeatExp() |
(package private) RegExp |
parseSimpleExp() |
(package private) RegExp |
parseUnionExp() |
private boolean |
peek(java.lang.String s) |
Automaton |
toAutomaton()
Constructs new
Automaton from this RegExp . |
Automaton |
toAutomaton(AutomatonProvider automaton_provider,
int maxDeterminizedStates)
Constructs new
Automaton from this RegExp . |
Automaton |
toAutomaton(int maxDeterminizedStates)
Constructs new
Automaton from this RegExp . |
private Automaton |
toAutomaton(java.util.Map<java.lang.String,Automaton> automata,
AutomatonProvider automaton_provider,
int maxDeterminizedStates) |
Automaton |
toAutomaton(java.util.Map<java.lang.String,Automaton> automata,
int maxDeterminizedStates)
Constructs new
Automaton from this RegExp . |
private Automaton |
toAutomatonInternal(java.util.Map<java.lang.String,Automaton> automata,
AutomatonProvider automaton_provider,
int maxDeterminizedStates) |
java.lang.String |
toString()
Constructs string from parsed regular expression.
|
(package private) void |
toStringBuilder(java.lang.StringBuilder b) |
java.lang.String |
toStringTree()
Like to string, but more verbose (shows the higherchy more clearly).
|
(package private) void |
toStringTree(java.lang.StringBuilder b,
java.lang.String indent) |
public static final int INTERSECTION
public static final int COMPLEMENT
public static final int EMPTY
public static final int ANYSTRING
public static final int AUTOMATON
public static final int INTERVAL
public static final int ALL
public static final int NONE
private final java.lang.String originalString
RegExp.Kind kind
RegExp exp1
RegExp exp2
java.lang.String s
int c
int min
int max
int digits
int from
int to
int flags
int pos
RegExp()
public RegExp(java.lang.String s) throws java.lang.IllegalArgumentException
RegExp
from a string. Same as
RegExp(s, ALL)
.s
- regexp stringjava.lang.IllegalArgumentException
- if an error occurred while parsing the
regular expressionpublic RegExp(java.lang.String s, int syntax_flags) throws java.lang.IllegalArgumentException
RegExp
from a string.s
- regexp stringsyntax_flags
- boolean 'or' of optional syntax constructs to be
enabledjava.lang.IllegalArgumentException
- if an error occurred while parsing the
regular expressionpublic Automaton toAutomaton()
Automaton
from this RegExp
. Same
as toAutomaton(null)
(empty automaton map).public Automaton toAutomaton(int maxDeterminizedStates) throws java.lang.IllegalArgumentException, TooComplexToDeterminizeException
Automaton
from this RegExp
. The
constructed automaton is minimal and deterministic and has no transitions
to dead states.maxDeterminizedStates
- maximum number of states in the resulting
automata. If the automata would need more than this many states
TooComplextToDeterminizeException is thrown. Higher number require more
space but can process more complex regexes.java.lang.IllegalArgumentException
- if this regular expression uses a named
identifier that is not available from the automaton providerTooComplexToDeterminizeException
- if determinizing this regexp
requires more than maxDeterminizedStates statespublic Automaton toAutomaton(AutomatonProvider automaton_provider, int maxDeterminizedStates) throws java.lang.IllegalArgumentException, TooComplexToDeterminizeException
Automaton
from this RegExp
. The
constructed automaton is minimal and deterministic and has no transitions
to dead states.automaton_provider
- provider of automata for named identifiersmaxDeterminizedStates
- maximum number of states in the resulting
automata. If the automata would need more than this many states
TooComplextToDeterminizeException is thrown. Higher number require more
space but can process more complex regexes.java.lang.IllegalArgumentException
- if this regular expression uses a named
identifier that is not available from the automaton providerTooComplexToDeterminizeException
- if determinizing this regexp
requires more than maxDeterminizedStates statespublic Automaton toAutomaton(java.util.Map<java.lang.String,Automaton> automata, int maxDeterminizedStates) throws java.lang.IllegalArgumentException, TooComplexToDeterminizeException
Automaton
from this RegExp
. The
constructed automaton is minimal and deterministic and has no transitions
to dead states.automata
- a map from automaton identifiers to automata (of type
Automaton
).maxDeterminizedStates
- maximum number of states in the resulting
automata. If the automata would need more than this many states
TooComplexToDeterminizeException is thrown. Higher number require more
space but can process more complex regexes.java.lang.IllegalArgumentException
- if this regular expression uses a named
identifier that does not occur in the automaton mapTooComplexToDeterminizeException
- if determinizing this regexp
requires more than maxDeterminizedStates statesprivate Automaton toAutomaton(java.util.Map<java.lang.String,Automaton> automata, AutomatonProvider automaton_provider, int maxDeterminizedStates) throws java.lang.IllegalArgumentException, TooComplexToDeterminizeException
java.lang.IllegalArgumentException
TooComplexToDeterminizeException
private Automaton toAutomatonInternal(java.util.Map<java.lang.String,Automaton> automata, AutomatonProvider automaton_provider, int maxDeterminizedStates) throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException
private void findLeaves(RegExp exp, RegExp.Kind kind, java.util.List<Automaton> list, java.util.Map<java.lang.String,Automaton> automata, AutomatonProvider automaton_provider, int maxDeterminizedStates)
public java.lang.String getOriginalString()
public java.lang.String toString()
toString
in class java.lang.Object
void toStringBuilder(java.lang.StringBuilder b)
public java.lang.String toStringTree()
void toStringTree(java.lang.StringBuilder b, java.lang.String indent)
public java.util.Set<java.lang.String> getIdentifiers()
void getIdentifiers(java.util.Set<java.lang.String> set)
static RegExp makeChar(int c)
static RegExp makeCharRange(int from, int to)
static RegExp makeAnyChar()
static RegExp makeEmpty()
static RegExp makeString(java.lang.String s)
static RegExp makeAnyString()
static RegExp makeAutomaton(java.lang.String s)
static RegExp makeInterval(int min, int max, int digits)
private boolean peek(java.lang.String s)
private boolean match(int c)
private boolean more()
private int next() throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException
private boolean check(int flag)
final RegExp parseUnionExp() throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException
final RegExp parseInterExp() throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException
final RegExp parseConcatExp() throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException
final RegExp parseRepeatExp() throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException
final RegExp parseComplExp() throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException
final RegExp parseCharClassExp() throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException
final RegExp parseCharClasses() throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException
final RegExp parseCharClass() throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException
final RegExp parseSimpleExp() throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException
final int parseCharExp() throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException