1

I'm writing an xml file using the stax and the dom api. The stax api writes the newline unchanged. The dom api escapes the newline in the attribute to 
. Why is there a difference? How do I force stax to also escape the newline?

import org.w3c.dom.Document;
import org.w3c.dom.Element;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;

public class WriteXml {

    public static void main(String[] args) throws Exception {

        // returns "foo SPACE bar" because writeWithStax literally writes the newline
        Path staxFile = writeWithStax("foo\nbar");
        System.out.println("staxFile = " + Files.readString(staxFile));
        System.out.println("attribute read with stax = " + readWithStax(staxFile));
        System.out.println("attribute read with dom = " + readWithDom(staxFile));
        System.out.println();

        // returns "foo NEWLINE bar" because writeWithDom escapes the newline to "
"
        Path domFile = writeWithDom("foo\nbar");
        System.out.println("domFile = " + Files.readString(domFile));
        System.out.println("attribute read with stax = " + readWithStax(domFile));
        System.out.println("attribute read with dom = " + readWithDom(domFile));
        System.out.println();

    }

    private static Path writeWithStax(String text) throws Exception {
        Path file = Files.createTempFile("stax", ".xml");
        try (OutputStream out = Files.newOutputStream(file)) {
            XMLOutputFactory factory = XMLOutputFactory.newInstance();
            XMLStreamWriter writer = factory.createXMLStreamWriter(out);
            writer.writeStartDocument();
            writer.writeStartElement("element");
            writer.writeAttribute("attribute", text);
            writer.writeEndElement();
            writer.writeEndDocument();
            writer.flush();
            writer.close();
        }
        return file;
    }

    private static Path writeWithDom(String text) throws Exception {
        Path file = Files.createTempFile("dom", ".xml");
        try (OutputStream out = Files.newOutputStream(file)) {
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            DocumentBuilder builder = factory.newDocumentBuilder();
            Document document = builder.newDocument();
            Element warningElement = document.createElement("element");
            warningElement.setAttribute("attribute", text);
            document.appendChild(warningElement);
            TransformerFactory transformerFactory = TransformerFactory.newInstance();
            Transformer transformer = transformerFactory.newTransformer();
            transformer.transform(new DOMSource(document), new StreamResult(out));
        }
        return file;
    }

    private static String readWithStax(Path file) throws Exception {
        try (InputStream in = Files.newInputStream(file)) {
            XMLInputFactory factory = XMLInputFactory.newInstance();
            XMLStreamReader reader = factory.createXMLStreamReader(in);
            while (reader.hasNext()) {
                int event = reader.next();
                if (event == XMLStreamReader.START_ELEMENT
                        && "element".equals(reader.getLocalName())) {
                    return reader.getAttributeValue(null, "attribute");
                }
            }
        }
        return null;
    }

    private static String readWithDom(Path file) throws Exception {
        try (InputStream in = Files.newInputStream(file)) {
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            DocumentBuilder builder = factory.newDocumentBuilder();
            Document document = builder.parse(in);
            return document.getDocumentElement().getAttribute("attribute");
        }
    }

}

Based on the first answer I have now written this workaround. This feels like a hack, but I guess it works.

private static Path writeWithStax(String text) throws Exception {
    Path file = Files.createTempFile("stax", ".xml");
    try (Writer out = Files.newBufferedWriter(file, UTF_8)) {
        XMLOutputFactory factory = XMLOutputFactory.newInstance();
        XMLStreamWriter writer = factory.createXMLStreamWriter(out);
        writer.writeStartDocument();
        writer.writeStartElement("element");

        // hack to preserve whitespace
        writeAttributePreservingWhitespace(out, writer, "attribute", text);

        writer.writeEndElement();
        writer.writeEndDocument();
        writer.flush();
        writer.close();
    }
    return file;
}

private static void writeAttributePreservingWhitespace(Writer out, XMLStreamWriter writer, String name, String value) throws XMLStreamException, IOException {
    // see https://www.w3.org/TR/xml/#AVNormalize
    if (value.contains("\t") || value.contains("\n") || value.contains("\r")) {
        writer.flush();
        out.write(" ");
        writeXMLContent(out, name);
        out.write("=\"");
        writeXMLContent(out, value);
        out.write("\"");
    } else {
        writer.writeAttribute(name, value);
    }
}

private static void writeXMLContent(Writer out, String text) throws IOException {
    for (char ch : text.toCharArray()) {
        switch (ch) {
            case '<' -> out.write("&lt;");
            case '&' -> out.write("&amp;");
            case '>' -> out.write("&gt;");
            case '"' -> out.write("&quot;");
            case '\t' -> out.write("&#x9;");
            case '\n' -> out.write("&#xA;");
            case '\r' -> out.write("&#xD;");
            default -> out.write(ch);
        }
    }
}
6
  • What is effectively written to both files? May be it's a read issue, not a write one. Commented May 9 at 13:13
  • Stax writes the attribute as attribute="foo{newline}bar" and Dom writes attribute="foo&#10;bar". Commented May 9 at 14:06
  • According to the XML Specification, newlines are not allowed in attribute values, and each must be converted to a space. So the DOM is doing exactly what it should be doing. Commented May 9 at 22:11
  • Why bother? When returning an attribute's value, a conforming XML processor should replace each newline with a space. Commented May 10 at 15:45
  • I too assumed the way the file is written is irrelevant, but when the new line is encoded as ‘&#10;’ then it is preserved. That is why I care. Commented May 10 at 19:53

1 Answer 1

1

New lines would have to be manually escaped as XMLStreamWriter will not do that

The XMLStreamWriter does not perform well formedness checking on its input. However the writeCharacters method is required to escape & , < and > For attribute values the writeAttribute method will escape the above characters plus " to ensure that all character content and attribute values are well formed.

New lines are not listed for escaping.

Sign up to request clarification or add additional context in comments.

2 Comments

So ... you're telling me to use StAX I should wrap my XMLStreamWriter around a Writer and for each attribute that contains a newline flush the XMLStreamWriter and write the attribute directly to the underlying Writer myself? Feels ... dirty.
I'm saying \n needs to be manually converted. Something like writer.writeAttribute("attribute", text.replace("\n", "&#10;"); But won't probably work as it could be written as &amp;#10; so there's no easy way to make stax do that I believe.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.