Java源码示例:org.cyberneko.html.HTMLConfiguration

示例1
/**
 * Parse an entire HTML document or a document fragment. Use lowercase translation for names of tags and attributes.
 * @param document a HTML code to parse.
 * @param encoding an encoding to use for a parser.
 * @return a parsed document representation.
 */
public static Document parseDocument(String document, String encoding) throws IOException, SAXException {
    DOMParser parser = new DOMParser(new HTMLConfiguration());

    try {
        // These URLs are predefined parameters' names (check org.cyberneko.html.HTMLConfiguration for more information)
        parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
        parser.setProperty("http://cyberneko.org/html/properties/default-encoding", encoding);
    } catch (SAXNotRecognizedException | SAXNotSupportedException e) {
        logger.error("Unexpected parser configuration error occurred: " + e.getMessage());
        throw new RuntimeException(e);
    }

    StringReader reader = new StringReader(document);
    InputSource source = new InputSource(reader);
    parser.parse(source);

    return parser.getDocument();
}
 
示例2
@Override
public Document parse() throws SAXException, IOException
{
    //temporay NekoHTML fix until nekohtml gets fixed
    if (!neko_fixed)
    {
        HTMLElements.Element li = HTMLElements.getElement(HTMLElements.LI);
        HTMLElements.Element[] oldparents = li.parent;
        li.parent = new HTMLElements.Element[oldparents.length + 1];
        for (int i = 0; i < oldparents.length; i++)
            li.parent[i] = oldparents[i];
        li.parent[oldparents.length] = HTMLElements.getElement(HTMLElements.MENU);
        neko_fixed = true;
    }
    
    DOMParser parser = new DOMParser(new HTMLConfiguration());
    parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
    if (charset != null)
        parser.setProperty("http://cyberneko.org/html/properties/default-encoding", charset);
    parser.parse(new org.xml.sax.InputSource(getDocumentSource().getInputStream()));
    return parser.getDocument();
}
 
示例3
public Document parse() throws SAXException, IOException
{
    DOMParser parser = new DOMParser(new HTMLConfiguration());
    parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
    parser.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
    if (charset != null)
        parser.setProperty("http://cyberneko.org/html/properties/default-encoding", charset);
    
    //preparation for filters, not used now
    /*XMLDocumentFilter attributeFilter = new DOMAttributeFilter();
    XMLDocumentFilter[] filters = { attributeFilter };
    parser.setProperty("http://cyberneko.org/html/properties/filters", filters);*/        
    
    parser.parse(new org.xml.sax.InputSource(is));
    doc = parser.getDocument();
    return doc;
}
 
示例4
public HTMLSAXParser() {
     super(new HTMLConfiguration());
}
 
示例5
NekoDOMParser( HTMLConfiguration configuration, DocumentAdapter adapter ) {
    super( configuration );
    _documentAdapter = adapter;
}
 
示例6
/** Constructs a script object with the specified configuration. */
ScriptFilter( HTMLConfiguration config ) {
    _configuration = config;
}
 
示例7
public HTMLParser() {
	super(new HTMLConfiguration());
}