public final class HTMLStripCharFilter extends BaseCharFilter
Modifier and Type | Class and Description |
---|---|
private static class |
HTMLStripCharFilter.TextSegment |
Modifier and Type | Field and Description |
---|---|
private static int |
AMPERSAND |
private static int |
BANG |
private static char |
BLOCK_LEVEL_END_TAG_REPLACEMENT |
private static char |
BLOCK_LEVEL_START_TAG_REPLACEMENT |
private static char |
BR_END_TAG_REPLACEMENT |
private static char |
BR_START_TAG_REPLACEMENT |
private static int |
CDATA |
private static int |
CHARACTER_REFERENCE_TAIL |
private static int |
COMMENT |
private int |
cumulativeDiff |
private static int |
DOUBLE_QUOTED_STRING |
private static int |
END_TAG_TAIL_EXCLUDE |
private static int |
END_TAG_TAIL_INCLUDE |
private static int |
END_TAG_TAIL_SUBSTITUTE |
private HTMLStripCharFilter.TextSegment |
entitySegment |
private static CharArrayMap<java.lang.Character> |
entityValues |
private int |
eofReturnValue |
private boolean |
escapeBR |
private CharArraySet |
escapedTags |
private boolean |
escapeSCRIPT |
private boolean |
escapeSTYLE |
private static int |
INITIAL_INPUT_SEGMENT_SIZE |
private HTMLStripCharFilter.TextSegment |
inputSegment |
private int |
inputStart |
private static int |
LEFT_ANGLE_BRACKET |
private static int |
LEFT_ANGLE_BRACKET_SLASH |
private static int |
LEFT_ANGLE_BRACKET_SPACE |
private static int |
NUMERIC_CHARACTER |
private int |
outputCharCount |
private HTMLStripCharFilter.TextSegment |
outputSegment |
private int |
previousRestoreState |
private static char |
REPLACEMENT_CHARACTER |
private int |
restoreState |
private static int |
SCRIPT |
private static int |
SCRIPT_COMMENT |
private static char |
SCRIPT_REPLACEMENT |
private static int |
SERVER_SIDE_INCLUDE |
private static int |
SINGLE_QUOTED_STRING |
private static int |
START_TAG_TAIL_EXCLUDE |
private static int |
START_TAG_TAIL_INCLUDE |
private static int |
START_TAG_TAIL_SUBSTITUTE |
private static int |
STYLE |
private static int |
STYLE_COMMENT |
private static char |
STYLE_REPLACEMENT |
private static java.util.Map<java.lang.String,java.lang.String> |
upperCaseVariantsAccepted |
private int |
yychar
the number of characters up to the start of the matched text
|
private int |
yycolumn
the number of characters from the last newline up to the start of the
matched text
|
private static int |
YYEOF
This character denotes the end of file
|
private static int |
YYINITIAL
lexical states
|
private int |
yyline
number of newlines encountered up to the start of the matched text
|
private static int[] |
ZZ_ACTION
Translates DFA states to action switch labels.
|
private static java.lang.String |
ZZ_ACTION_PACKED_0 |
private static int[] |
ZZ_ATTRIBUTE
ZZ_ATTRIBUTE[aState] contains the attributes of state
aState |
private static java.lang.String |
ZZ_ATTRIBUTE_PACKED_0 |
private static int |
ZZ_BUFFERSIZE
initial size of the lookahead buffer
|
private static char[] |
ZZ_CMAP
Translates characters to character classes
|
private static java.lang.String |
ZZ_CMAP_PACKED
Translates characters to character classes
|
private static java.lang.String[] |
ZZ_ERROR_MSG |
private static int[] |
ZZ_LEXSTATE
ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
at the beginning of a line
l is of the form l = 2*k, k a non negative integer
|
private static int |
ZZ_NO_MATCH |
private static int |
ZZ_PUSHBACK_2BIG |
private static int[] |
ZZ_ROWMAP
Translates a state to a row index in the transition table
|
private static java.lang.String |
ZZ_ROWMAP_PACKED_0 |
private static int[] |
ZZ_TRANS
The transition table of the DFA
|
private static java.lang.String |
ZZ_TRANS_PACKED_0 |
private static java.lang.String |
ZZ_TRANS_PACKED_1 |
private static java.lang.String |
ZZ_TRANS_PACKED_10 |
private static java.lang.String |
ZZ_TRANS_PACKED_11 |
private static java.lang.String |
ZZ_TRANS_PACKED_12 |
private static java.lang.String |
ZZ_TRANS_PACKED_2 |
private static java.lang.String |
ZZ_TRANS_PACKED_3 |
private static java.lang.String |
ZZ_TRANS_PACKED_4 |
private static java.lang.String |
ZZ_TRANS_PACKED_5 |
private static java.lang.String |
ZZ_TRANS_PACKED_6 |
private static java.lang.String |
ZZ_TRANS_PACKED_7 |
private static java.lang.String |
ZZ_TRANS_PACKED_8 |
private static java.lang.String |
ZZ_TRANS_PACKED_9 |
private static int |
ZZ_UNKNOWN_ERROR |
private boolean |
zzAtBOL
zzAtBOL == true iff the scanner is currently at the beginning of a line
|
private boolean |
zzAtEOF
zzAtEOF == true iff the scanner is at the EOF
|
private char[] |
zzBuffer
this buffer contains the current text to be matched and is
the source of the yytext() string
|
private int |
zzCurrentPos
the current text position in the buffer
|
private int |
zzEndRead
endRead marks the last character in the buffer, that has been read
from input
|
private boolean |
zzEOFDone
denotes if the user-EOF-code has already been executed
|
private int |
zzFinalHighSurrogate
The number of occupied positions in zzBuffer beyond zzEndRead.
|
private int |
zzLexicalState
the current lexical state
|
private int |
zzMarkedPos
the textposition at the last accepting state
|
private java.io.Reader |
zzReader
the input device
|
private int |
zzStartRead
startRead marks the beginning of the yytext() string in the buffer
|
private int |
zzState
the current state of the DFA
|
input
Constructor and Description |
---|
HTMLStripCharFilter(java.io.Reader in)
Creates a new scanner
|
HTMLStripCharFilter(java.io.Reader in,
java.util.Set<java.lang.String> escapedTags)
Creates a new HTMLStripCharFilter over the provided Reader
with the specified start and end tags.
|
Modifier and Type | Method and Description |
---|---|
void |
close()
Closes the underlying input stream.
|
(package private) static int |
getInitialBufferSize() |
private int |
nextChar()
Resumes scanning until the next regular expression is matched,
the end of input is encountered or an I/O-Error occurs.
|
int |
read() |
int |
read(char[] cbuf,
int off,
int len) |
private void |
yybegin(int newState)
Enters a new lexical state
|
private char |
yycharat(int pos)
Returns the character at position pos from the
matched text.
|
private void |
yyclose()
Closes the input stream.
|
private int |
yylength()
Returns the length of the matched text region.
|
private void |
yypushback(int number)
Pushes the specified amount of characters back into the input stream.
|
private void |
yyreset(java.io.Reader reader)
Resets the scanner to read from a new input stream.
|
private int |
yystate()
Returns the current lexical state.
|
private java.lang.String |
yytext()
Returns the text matched by the current regular expression.
|
private void |
zzDoEOF()
Contains user EOF-code, which will be executed exactly once,
when the end of file is reached
|
private boolean |
zzRefill()
Refills the input buffer.
|
private void |
zzScanError(int errorCode)
Reports an error that occurred while scanning.
|
private static int[] |
zzUnpackAction() |
private static int |
zzUnpackAction(java.lang.String packed,
int offset,
int[] result) |
private static int[] |
zzUnpackAttribute() |
private static int |
zzUnpackAttribute(java.lang.String packed,
int offset,
int[] result) |
private static char[] |
zzUnpackCMap(java.lang.String packed)
Unpacks the compressed character translation table.
|
private static int[] |
zzUnpackRowMap() |
private static int |
zzUnpackRowMap(java.lang.String packed,
int offset,
int[] result) |
private static int[] |
zzUnpackTrans() |
private static int |
zzUnpackTrans(java.lang.String packed,
int offset,
int[] result) |
addOffCorrectMap, correct, getLastCumulativeDiff
correctOffset
private static final int YYEOF
private static final int ZZ_BUFFERSIZE
private static final int YYINITIAL
private static final int AMPERSAND
private static final int NUMERIC_CHARACTER
private static final int CHARACTER_REFERENCE_TAIL
private static final int LEFT_ANGLE_BRACKET
private static final int BANG
private static final int COMMENT
private static final int SCRIPT
private static final int SCRIPT_COMMENT
private static final int LEFT_ANGLE_BRACKET_SLASH
private static final int LEFT_ANGLE_BRACKET_SPACE
private static final int CDATA
private static final int SERVER_SIDE_INCLUDE
private static final int SINGLE_QUOTED_STRING
private static final int DOUBLE_QUOTED_STRING
private static final int END_TAG_TAIL_INCLUDE
private static final int END_TAG_TAIL_EXCLUDE
private static final int END_TAG_TAIL_SUBSTITUTE
private static final int START_TAG_TAIL_INCLUDE
private static final int START_TAG_TAIL_EXCLUDE
private static final int START_TAG_TAIL_SUBSTITUTE
private static final int STYLE
private static final int STYLE_COMMENT
private static final int[] ZZ_LEXSTATE
private static final java.lang.String ZZ_CMAP_PACKED
private static final char[] ZZ_CMAP
private static final int[] ZZ_ACTION
private static final java.lang.String ZZ_ACTION_PACKED_0
private static final int[] ZZ_ROWMAP
private static final java.lang.String ZZ_ROWMAP_PACKED_0
private static final int[] ZZ_TRANS
private static final java.lang.String ZZ_TRANS_PACKED_0
private static final java.lang.String ZZ_TRANS_PACKED_1
private static final java.lang.String ZZ_TRANS_PACKED_2
private static final java.lang.String ZZ_TRANS_PACKED_3
private static final java.lang.String ZZ_TRANS_PACKED_4
private static final java.lang.String ZZ_TRANS_PACKED_5
private static final java.lang.String ZZ_TRANS_PACKED_6
private static final java.lang.String ZZ_TRANS_PACKED_7
private static final java.lang.String ZZ_TRANS_PACKED_8
private static final java.lang.String ZZ_TRANS_PACKED_9
private static final java.lang.String ZZ_TRANS_PACKED_10
private static final java.lang.String ZZ_TRANS_PACKED_11
private static final java.lang.String ZZ_TRANS_PACKED_12
private static final int ZZ_UNKNOWN_ERROR
private static final int ZZ_NO_MATCH
private static final int ZZ_PUSHBACK_2BIG
private static final java.lang.String[] ZZ_ERROR_MSG
private static final int[] ZZ_ATTRIBUTE
aState
private static final java.lang.String ZZ_ATTRIBUTE_PACKED_0
private java.io.Reader zzReader
private int zzState
private int zzLexicalState
private char[] zzBuffer
private int zzMarkedPos
private int zzCurrentPos
private int zzStartRead
private int zzEndRead
private int yyline
private int yychar
private int yycolumn
private boolean zzAtBOL
private boolean zzAtEOF
private boolean zzEOFDone
private int zzFinalHighSurrogate
private static final java.util.Map<java.lang.String,java.lang.String> upperCaseVariantsAccepted
private static final CharArrayMap<java.lang.Character> entityValues
private static final int INITIAL_INPUT_SEGMENT_SIZE
private static final char BLOCK_LEVEL_START_TAG_REPLACEMENT
private static final char BLOCK_LEVEL_END_TAG_REPLACEMENT
private static final char BR_START_TAG_REPLACEMENT
private static final char BR_END_TAG_REPLACEMENT
private static final char SCRIPT_REPLACEMENT
private static final char STYLE_REPLACEMENT
private static final char REPLACEMENT_CHARACTER
private CharArraySet escapedTags
private int inputStart
private int cumulativeDiff
private boolean escapeBR
private boolean escapeSCRIPT
private boolean escapeSTYLE
private int restoreState
private int previousRestoreState
private int outputCharCount
private int eofReturnValue
private HTMLStripCharFilter.TextSegment inputSegment
private HTMLStripCharFilter.TextSegment outputSegment
private HTMLStripCharFilter.TextSegment entitySegment
public HTMLStripCharFilter(java.io.Reader in, java.util.Set<java.lang.String> escapedTags)
in
- Reader to strip html tags from.escapedTags
- Tags in this set (both start and end tags)
will not be filtered out.public HTMLStripCharFilter(java.io.Reader in)
in
- the java.io.Reader to read input from.private static int[] zzUnpackAction()
private static int zzUnpackAction(java.lang.String packed, int offset, int[] result)
private static int[] zzUnpackRowMap()
private static int zzUnpackRowMap(java.lang.String packed, int offset, int[] result)
private static int[] zzUnpackTrans()
private static int zzUnpackTrans(java.lang.String packed, int offset, int[] result)
private static int[] zzUnpackAttribute()
private static int zzUnpackAttribute(java.lang.String packed, int offset, int[] result)
public int read() throws java.io.IOException
read
in class java.io.Reader
java.io.IOException
public int read(char[] cbuf, int off, int len) throws java.io.IOException
read
in class java.io.Reader
java.io.IOException
public void close() throws java.io.IOException
CharFilter
NOTE:
The default implementation closes the input Reader, so
be sure to call super.close()
when overriding this method.
close
in interface java.io.Closeable
close
in interface java.lang.AutoCloseable
close
in class CharFilter
java.io.IOException
static int getInitialBufferSize()
private static char[] zzUnpackCMap(java.lang.String packed)
packed
- the packed character translation tableprivate boolean zzRefill() throws java.io.IOException
false
, iff there was new input.java.io.IOException
- if any I/O-Error occursprivate final void yyclose() throws java.io.IOException
java.io.IOException
private final void yyreset(java.io.Reader reader)
reader
- the new input streamprivate final int yystate()
private final void yybegin(int newState)
newState
- the new lexical stateprivate final java.lang.String yytext()
private final char yycharat(int pos)
pos
- the position of the character to fetch.
A value from 0 to yylength()-1.private final int yylength()
private void zzScanError(int errorCode)
errorCode
- the code of the errormessage to displayprivate void yypushback(int number)
number
- the number of characters to be read again.
This number must not be greater than yylength()!private void zzDoEOF()
private int nextChar() throws java.io.IOException
java.io.IOException
- if any I/O-Error occurs