2

I have an xml file in the following pattern which contains a few Complex Empty Elements(elements with no content, only attributes).

<items>
    <item id="0" name="a" />
    <item id="1" name="b" />
</items>

I'm at lose to parse the attributes from them. This is what I have done so far :

DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(inputStream);
Element itemsElement = document.getDocumentElement();
if (itemsElement.getTagName().equals(TAG_ITEMS)) {
    NodeList nodeList = itemsElement.getChildNodes();
    for (int i = 0; i < nodeList.getLength(); i++) {
        // process each item node
        Node node = nodeList.item(i);
        if (node.getNodeType() == Node.TEXT_NODE) { // Is this the right way?
            Text text = (Text) node;
            // Do stuff with attributes
        }
    }
}

I cannot cast these Text nodes to Element nodes and get attributes, I cannot get attributes from node using getAttributes - NPE at NamedNodeMap attributes.getLength(), I cannot cast it to Text and get attributes. How can I parse the attributes?

2 Answers 2

3

You are not interested in the text context of the nodes inside of items but in the attributes of the nodes item. you could proceed as follow:

//process each item node
Node node = nodeList.item(i);
if (node.getNodeName().equals("item")) {
    NamedNodeMap attributes = node.getAttributes();
    System.out.printf("id=%s, name=%s%n", 
            attributes.getNamedItem("id").getTextContent(),
            attributes.getNamedItem("name").getTextContent());
}

This would print:

id=0, name=a
id=1, name=b
Sign up to request clarification or add additional context in comments.

Comments

2

Assuming you want to get the indiviudal attributes of the nodes you need to one of two things (or both depending on your needs)...

You need to test if the current node is an ELEMENT_NODE or if the current node's name is equal to item (assuming all the node names are the same), for example...

import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;

public class Test {

    public static final String TAG_ITEMS = "items";

    public static void main(String[] args) {
        try (InputStream is = Test.class.getResourceAsStream("/Test.xml")) {
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            DocumentBuilder builder = factory.newDocumentBuilder();
            Document document = builder.parse(is);
            Element itemsElement = document.getDocumentElement();
            if (itemsElement.getTagName().equals(TAG_ITEMS)) {
                NodeList nodeList = itemsElement.getChildNodes();
                for (int i = 0; i < nodeList.getLength(); i++) {
                    Node node = nodeList.item(i);
                    if (node.getNodeType() == Node.ELEMENT_NODE) {
                        NamedNodeMap attributes = node.getAttributes();
                        Node idAtt = attributes.getNamedItem("id");
                        Node nameAtt = attributes.getNamedItem("name");
                        System.out.println("id = " + idAtt.getNodeValue());
                        System.out.println("name = " + nameAtt.getNodeValue());
                    }
                }
            }
        } catch (Exception exp) {
            exp.printStackTrace();
        }
    }

}

Which will output...

id = 0
name = a
id = 1
name = b

All of this could be greatly reduced by using XPath, for example, if all the item nodes are the same name, then you could just use

/items/item

As the query. If the node names are different, but the attributes are the same, then you could use

/items/*[@id]

which will list all the nodes under items which has an id attribute, or

/items/*[@name]

which will list all the nodes under items which has an name attribute...

import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class Test {

    public static void main(String[] args) {
        try (InputStream is = Test.class.getResourceAsStream("/Test.xml")) {
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            DocumentBuilder builder = factory.newDocumentBuilder();
            Document document = builder.parse(is);

            XPath xpath = XPathFactory.newInstance().newXPath();
            XPathExpression expression = xpath.compile("/items/item");
            NodeList nodes = (NodeList) expression.evaluate(document, XPathConstants.NODESET);
            process(nodes);

            expression = xpath.compile("/items/*[@id]");
            nodes = (NodeList) expression.evaluate(document, XPathConstants.NODESET);
            process(nodes);

            expression = xpath.compile("/items/*[@name]");
            nodes = (NodeList) expression.evaluate(document, XPathConstants.NODESET);
            process(nodes);
        } catch (Exception exp) {
            exp.printStackTrace();
        }
    }

    protected static void process(NodeList nodes) {
        for (int index = 0; index < nodes.getLength(); index++) {
            Node item = nodes.item(index);
            NamedNodeMap attributes = item.getAttributes();
            Node idAtt = attributes.getNamedItem("id");
            Node nameAtt = attributes.getNamedItem("name");
            System.out.println("id = " + idAtt.getNodeValue() + "; name = " + nameAtt.getNodeValue());
        }
    }

}

2 Comments

Only thing that changes is the attribute values, like in the example.
It depends, for a single pass, maybe a little, as the XPath needs to walk the DOM to build it's list and then you need to process it separately, it's just, personally, simpler...

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.