public class BoilerpipeHTMLContentHandler
extends java.lang.Object
implements org.xml.sax.ContentHandler
ContentHandler, used by BoilerpipeSAXInput. Can
be used by different parser implementations, e.g. NekoHTML and TagSoup.| Constructor and Description |
|---|
BoilerpipeHTMLContentHandler()
Constructs a
BoilerpipeHTMLContentHandler using the
DefaultTagActionMap. |
BoilerpipeHTMLContentHandler(TagActionMap tagActions)
Constructs a
BoilerpipeHTMLContentHandler using the given
TagActionMap. |
| Modifier and Type | Method and Description |
|---|---|
void |
addLabelAction(LabelAction la) |
protected void |
addTextBlock(TextBlock tb) |
void |
addWhitespaceIfNecessary() |
void |
characters(char[] ch,
int start,
int length) |
void |
endDocument() |
void |
endElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName) |
void |
endPrefixMapping(java.lang.String prefix) |
void |
flushBlock() |
java.lang.String |
getTitle() |
void |
ignorableWhitespace(char[] ch,
int start,
int length) |
void |
processingInstruction(java.lang.String target,
java.lang.String data) |
void |
recycle()
Recycles this instance.
|
void |
setDocumentLocator(org.xml.sax.Locator locator) |
void |
setTitle(java.lang.String s) |
void |
skippedEntity(java.lang.String name) |
void |
startDocument() |
void |
startElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName,
org.xml.sax.Attributes atts) |
void |
startPrefixMapping(java.lang.String prefix,
java.lang.String uri) |
TextDocument |
toTextDocument()
Returns a
TextDocument containing the extracted TextBlock
s. |
public BoilerpipeHTMLContentHandler()
BoilerpipeHTMLContentHandler using the
DefaultTagActionMap.public BoilerpipeHTMLContentHandler(TagActionMap tagActions)
BoilerpipeHTMLContentHandler using the given
TagActionMap.tagActions - The TagActionMap to use, e.g.
DefaultTagActionMap.public void recycle()
public void endDocument()
throws org.xml.sax.SAXException
endDocument in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void endPrefixMapping(java.lang.String prefix)
throws org.xml.sax.SAXException
endPrefixMapping in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void ignorableWhitespace(char[] ch,
int start,
int length)
throws org.xml.sax.SAXException
ignorableWhitespace in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void processingInstruction(java.lang.String target,
java.lang.String data)
throws org.xml.sax.SAXException
processingInstruction in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void setDocumentLocator(org.xml.sax.Locator locator)
setDocumentLocator in interface org.xml.sax.ContentHandlerpublic void skippedEntity(java.lang.String name)
throws org.xml.sax.SAXException
skippedEntity in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void startDocument()
throws org.xml.sax.SAXException
startDocument in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void startPrefixMapping(java.lang.String prefix,
java.lang.String uri)
throws org.xml.sax.SAXException
startPrefixMapping in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void startElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName,
org.xml.sax.Attributes atts)
throws org.xml.sax.SAXException
startElement in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void endElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName)
throws org.xml.sax.SAXException
endElement in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void characters(char[] ch,
int start,
int length)
throws org.xml.sax.SAXException
characters in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void flushBlock()
protected void addTextBlock(TextBlock tb)
public java.lang.String getTitle()
public void setTitle(java.lang.String s)
public TextDocument toTextDocument()
TextDocument containing the extracted TextBlock
s. NOTE: Only call this after parsing.TextDocumentpublic void addWhitespaceIfNecessary()
public void addLabelAction(LabelAction la) throws java.lang.IllegalStateException
java.lang.IllegalStateException