java - XML 解析器和 xpath 表达式

标签 java xml xpath

我正在使用 java default documentbuilder 来解析一个少于 100 行代码的 xml 文档。解析一个文档需要 35 毫秒,执行单个 xpath 表达式需要 15 毫秒。如何优化 xml 和解析器所花费的时间? .

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;


public class XMLParser {


    public static final Logger LOGGER = Logger.getLogger(XMLParser.class.getName());

    private Map<String,List<NamedNodeMap>> fileVsProperties = new HashMap<String, List<NamedNodeMap>>();

    private Document document;

    public XMLParser(File file){
            this.document = XMLUtil.getDocument(file);
    }

    public void setProperties(Element file){
        NodeList properties = file.getElementsByTagName("property");
        List<NamedNodeMap> props = new ArrayList<NamedNodeMap>();
        String type = file.getAttribute("type");
        String path = file.getAttribute("path");

        if("".equals(path)){
            LOGGER.log(Level.INFO,"Attribute path is required for a file.");
            return;
        }

        path = path+":"+type;

        for(int i = 0;i<properties.getLength();i++){
            Element property = (Element) properties.item(i);
            props.add(property.getAttributes());
        }
        setProperties(props,path);
    }

    private void setProperties(List<NamedNodeMap> properties , String path){
        List<NamedNodeMap>  previousValue = fileVsProperties.get(path);
        if(previousValue != null){
            previousValue.addAll(properties);
        }else{
            fileVsProperties.put(path,properties);
        }

    }

    public Element getConfiguration(String branchName) throws XPathExpressionException{
        return (Element)XMLUtil.getElements("/configurations/configuration[@name='"+branchName+"']",document.getDocumentElement(),XPathConstants.NODE);
    }

    public static void main(String[] args) throws XPathExpressionException {
        long start = System.currentTimeMillis();
        File doc = new File("install.xml");
        XMLParser parser = new XMLParser(doc);
        long end = System.currentTimeMillis();
        System.out.println("Time Taken For Parsing :: "+ (end-start) + " milliseconds");
        start = end;
        Element configuration = parser.getConfiguration("BHARATHIKANNAN");
        end = System.currentTimeMillis();
        System.out.println("Time Taken For XPATH Expression TO Finding the Configuration :: "+ (end-start) + " milliseconds");
        start = end;
        NodeList files = parser.getFiles(configuration);
        for(int i=0;i<files.getLength();i++){
            parser.setProperties((Element) files.item(i));
        }
        end = System.currentTimeMillis();
        System.out.println(parser.fileVsProperties);
        System.out.println("Time Taken For Setting Properties :: "+ (end-start) + " milliseconds");
    }

    public NodeList getFiles(Element configuration){
        return configuration.getElementsByTagName("file");
    }

}


class XMLUtil{
    private static DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    private static DocumentBuilder builder;
    public static final Logger LOGGER = Logger.getLogger(XMLUtil.class.getName());

    private static XPathFactory xpathFactory = XPathFactory.newInstance();

    private static XPath xpath;

    static {

        try {
            builder = factory.newDocumentBuilder();
            xpath = xpathFactory.newXPath();
        } catch (ParserConfigurationException e) {
            LOGGER.log(Level.INFO,"");
        }
    }

    public static Document getDocument(File f){
        Document doc = null;
        try {
            doc = builder.parse(f);
        } catch (SAXException e) {
            LOGGER.log(Level.WARNING,"Invalid XML Document ",e);
        } catch (IOException e) {
            LOGGER.log(Level.SEVERE,"No Document Found in the given path",e);
        }
        return doc;
    }

    public static Object getElements(String xpathExpression , Element ele ,QName dataType) throws XPathExpressionException{
        return xpath.evaluate(xpathExpression, ele,dataType);
    }


}

XML 文件

    <?xml version="1.0"?>
<!--
        Note : Default configuration loaded using your current branch name . You can extend configurations using extend attribute in configuration
        node . 
-->
<configurations>
        <configuration name="default">
                <files>
                        <file type="xml" path="conf/server.xml.orig">
                                <property regex="(port=).*" replace="\18080" xpath="/Server/Connector"></property>
                                <property regex="(port=).*" replace="\18080"></property>
                        </file>
                        <file type="text" path="conf/system_properties.conf">
                                <property regex="(username=).*" replace="\1root" ></property>
                        </file>
                </files>
        </configuration>
        <configuration name="BHARATHIKANNAN" extends="default">
                <files>
                        <file type="text" path="conf/system_properties.conf">
                                <property regex="(username=).*" replace="\1root" ></property>
                        </file>
                </files>
        </configuration>
</configurations>

输出:

Time Taken For Parsing :: 24 milliseconds
Time Taken For XPATH Expression TO Finding the Configuration :: 14 milliseconds
{conf/system_properties.conf:text=[com.sun.org.apache.xerces.internal.dom.AttributeMap@75d9fd51]}
Time Taken For Setting Properties :: 0 milliseconds

最佳答案

最近有人问了一个非常相似的任务,但文档更大 (2Mb),我在这里给出了一些 Saxon 计时:

https://stackoverflow.com/questions/12497928/xpath-speed-comparision/12508614#12508614

这些计时比您在更大的文档上看到的要快得多。由于您已经在使用 Java,因此切换到 Saxon 应该非常简单。

但需要注意的是,您在进入 main() 时立即开始计时,这意味着您主要测量类加载时间而不是 XML 处理时间。我的测量在测量开始前注意预热 Java VM。

请注意,如果您使用的是 Saxon,到目前为止最好使用 Saxon 的原生树模型,而不是 DOM 或其他替代模型。我们最近在这里发布了一些测量结果:

http://dev.saxonica.com/blog/mike/2012/09/index.html#000194

DOM 的结果平均比 Saxon 的本地树差 8 倍,在最坏的情况下差 23 倍。

关于java - XML 解析器和 xpath 表达式,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/12527491/

相关文章:

当 id 为 String 时,Java mongoTemplate findOne 查询不返回结果

java - 在哪里提供 try catch block

java - 发现多个带有绑定(bind)类型接口(interface)的 ejb

xml - 如何从 bash 中的 pom 文件中删除版本号

java - 应该打印数组的所有值的方法仅返回空值。

xml - Xpath:第一个前后兄弟

java - 尝试在 android 中的 Strings.xml 中的 textView 中设置图像

javascript - 无法使用 python selenium 获取生成的 html 源

xpath - XPath评估失败

java - 如果属性具有特定值,则删除 XML 节点