org.egothor.html
Class HTMLPrinter
java.lang.Object
org.cyberneko.html.filters.DefaultFilter
org.egothor.html.HTMLPrinter
- All Implemented Interfaces:
- org.apache.xerces.xni.parser.XMLComponent, org.apache.xerces.xni.parser.XMLDocumentFilter, org.apache.xerces.xni.parser.XMLDocumentSource, org.apache.xerces.xni.XMLDocumentHandler, org.cyberneko.html.HTMLComponent
public class HTMLPrinter
- extends org.cyberneko.html.filters.DefaultFilter
This class reformats HTML documents in our DB to a format that can be used for the science. It was used for
a syllable XBW paper.
Fields inherited from class org.cyberneko.html.filters.DefaultFilter |
fDocumentHandler, fDocumentSource |
Constructor Summary |
HTMLPrinter(java.io.OutputStream outputStream,
java.lang.String encoding)
|
HTMLPrinter(java.io.Writer writer,
java.lang.String encoding,
boolean strictScript)
|
Method Summary |
void |
characters(org.apache.xerces.xni.XMLString text,
org.apache.xerces.xni.Augmentations augs)
|
void |
comment(org.apache.xerces.xni.XMLString text,
org.apache.xerces.xni.Augmentations augs)
|
void |
emptyElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attributes,
org.apache.xerces.xni.Augmentations augs)
|
void |
endElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.Augmentations augs)
|
void |
endGeneralEntity(java.lang.String name,
org.apache.xerces.xni.Augmentations augs)
|
static void |
filter(java.lang.String systemId,
byte[] file,
int file_len,
java.io.PrintStream ps,
java.lang.String inpEnc,
java.lang.String outEnc)
|
protected void |
printAttributeValue(java.lang.String text)
|
protected void |
printCharacters(org.apache.xerces.xni.XMLString text,
boolean normalize)
|
protected void |
printEndElement(org.apache.xerces.xni.QName element)
|
protected void |
printEntity(java.lang.String name)
|
protected void |
printStartElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attributes)
|
protected void |
printStartElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attributes,
boolean empty)
|
void |
startDocument(org.apache.xerces.xni.XMLLocator locator,
java.lang.String encoding,
org.apache.xerces.xni.Augmentations augs)
|
void |
startDocument(org.apache.xerces.xni.XMLLocator locator,
java.lang.String encoding,
org.apache.xerces.xni.NamespaceContext nscontext,
org.apache.xerces.xni.Augmentations augs)
|
void |
startElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attributes,
org.apache.xerces.xni.Augmentations augs)
|
void |
startGeneralEntity(java.lang.String name,
org.apache.xerces.xni.XMLResourceIdentifier id,
java.lang.String encoding,
org.apache.xerces.xni.Augmentations augs)
|
Methods inherited from class org.cyberneko.html.filters.DefaultFilter |
doctypeDecl, endCDATA, endDocument, endPrefixMapping, getDocumentHandler, getDocumentSource, getFeatureDefault, getPropertyDefault, getRecognizedFeatures, getRecognizedProperties, ignorableWhitespace, merge, processingInstruction, reset, setDocumentHandler, setDocumentSource, setFeature, setProperty, startCDATA, startPrefixMapping, textDecl, xmlDecl |
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
NOTIFY_CHAR_REFS
public static final java.lang.String NOTIFY_CHAR_REFS
- See Also:
- Constant Field Values
NOTIFY_HTML_BUILTIN_REFS
public static final java.lang.String NOTIFY_HTML_BUILTIN_REFS
- See Also:
- Constant Field Values
AUGMENTATIONS
protected static final java.lang.String AUGMENTATIONS
- See Also:
- Constant Field Values
FILTERS
protected static final java.lang.String FILTERS
- See Also:
- Constant Field Values
enc
protected java.lang.String enc
pw
protected java.io.PrintWriter pw
rootSeen
protected boolean rootSeen
httpEqSeen
protected boolean httpEqSeen
depth
protected int depth
charNorm
protected boolean charNorm
printChars
protected boolean printChars
HTMLPrinter
public HTMLPrinter(java.io.OutputStream outputStream,
java.lang.String encoding)
throws java.io.UnsupportedEncodingException
- Throws:
java.io.UnsupportedEncodingException
HTMLPrinter
public HTMLPrinter(java.io.Writer writer,
java.lang.String encoding,
boolean strictScript)
startDocument
public void startDocument(org.apache.xerces.xni.XMLLocator locator,
java.lang.String encoding,
org.apache.xerces.xni.NamespaceContext nscontext,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
- Specified by:
startDocument
in interface org.apache.xerces.xni.XMLDocumentHandler
- Overrides:
startDocument
in class org.cyberneko.html.filters.DefaultFilter
- Throws:
org.apache.xerces.xni.XNIException
startDocument
public void startDocument(org.apache.xerces.xni.XMLLocator locator,
java.lang.String encoding,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
- Overrides:
startDocument
in class org.cyberneko.html.filters.DefaultFilter
- Throws:
org.apache.xerces.xni.XNIException
comment
public void comment(org.apache.xerces.xni.XMLString text,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
- Specified by:
comment
in interface org.apache.xerces.xni.XMLDocumentHandler
- Overrides:
comment
in class org.cyberneko.html.filters.DefaultFilter
- Throws:
org.apache.xerces.xni.XNIException
startElement
public void startElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attributes,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
- Specified by:
startElement
in interface org.apache.xerces.xni.XMLDocumentHandler
- Overrides:
startElement
in class org.cyberneko.html.filters.DefaultFilter
- Throws:
org.apache.xerces.xni.XNIException
emptyElement
public void emptyElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attributes,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
- Specified by:
emptyElement
in interface org.apache.xerces.xni.XMLDocumentHandler
- Overrides:
emptyElement
in class org.cyberneko.html.filters.DefaultFilter
- Throws:
org.apache.xerces.xni.XNIException
characters
public void characters(org.apache.xerces.xni.XMLString text,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
- Specified by:
characters
in interface org.apache.xerces.xni.XMLDocumentHandler
- Overrides:
characters
in class org.cyberneko.html.filters.DefaultFilter
- Throws:
org.apache.xerces.xni.XNIException
endElement
public void endElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
- Specified by:
endElement
in interface org.apache.xerces.xni.XMLDocumentHandler
- Overrides:
endElement
in class org.cyberneko.html.filters.DefaultFilter
- Throws:
org.apache.xerces.xni.XNIException
startGeneralEntity
public void startGeneralEntity(java.lang.String name,
org.apache.xerces.xni.XMLResourceIdentifier id,
java.lang.String encoding,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
- Specified by:
startGeneralEntity
in interface org.apache.xerces.xni.XMLDocumentHandler
- Overrides:
startGeneralEntity
in class org.cyberneko.html.filters.DefaultFilter
- Throws:
org.apache.xerces.xni.XNIException
endGeneralEntity
public void endGeneralEntity(java.lang.String name,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
- Specified by:
endGeneralEntity
in interface org.apache.xerces.xni.XMLDocumentHandler
- Overrides:
endGeneralEntity
in class org.cyberneko.html.filters.DefaultFilter
- Throws:
org.apache.xerces.xni.XNIException
printAttributeValue
protected void printAttributeValue(java.lang.String text)
printCharacters
protected void printCharacters(org.apache.xerces.xni.XMLString text,
boolean normalize)
printStartElement
protected void printStartElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attributes)
printStartElement
protected void printStartElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attributes,
boolean empty)
printEndElement
protected void printEndElement(org.apache.xerces.xni.QName element)
printEntity
protected void printEntity(java.lang.String name)
filter
public static void filter(java.lang.String systemId,
byte[] file,
int file_len,
java.io.PrintStream ps,
java.lang.String inpEnc,
java.lang.String outEnc)
throws java.lang.Exception
- Throws:
java.lang.Exception