Java Tutorial/XML/DOM Parser — различия между версиями
Admin (обсуждение | вклад) м (1 версия) |
|
(нет различий)
|
Текущая версия на 05:17, 1 июня 2010
Содержание
- 1 A DOM Error Checker: Using DOM for Syntax Checking
- 2 A DOM Parse Tree Lister
- 3 Converting an XML Fragment into a DOM Fragment
- 4 Create DOM Document out of string
- 5 DOM Objects That Make Up the Parse Tree
- 6 Getting a DOM Element by Id
- 7 Getting a Node Relative to Another Node in a DOM Document
- 8 Getting the Declared Entities in a DOM Document
- 9 Getting the Notations in a DOM Document
- 10 Getting the Root Element in a DOM Document
- 11 Getting the Value of an Entity Reference in a DOM Document
- 12 Ignorable Whitespace and Element Content
- 13 Listing the Contents of Parse Tree Nodes: Using the DOM Parser to Extract XML Document Data
- 14 Parse an XML string: Using DOM and a StringReader.
- 15 Read XML as DOM
- 16 Remove the element from parent
- 17 Source To InputSource
- 18 Use DOM L3 DOMBuilder, DOMBuilderFilter DOMWriter and other DOM L3 functionality to preparse, revalidate and safe document.
- 19 Visiting All the Elements in a DOM Document
A DOM Error Checker: Using DOM for Syntax Checking
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public class DOMCheck {
static public void main(String[] arg) {
boolean validate = true;
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setValidating(validate);
dbf.setNamespaceAware(true);
try {
DocumentBuilder builder = dbf.newDocumentBuilder();
builder.setErrorHandler(new MyErrorHandler());
InputSource is = new InputSource("person.xml");
Document doc = builder.parse(is);
} catch (SAXException e) {
System.out.println(e);
} catch (ParserConfigurationException e) {
System.err.println(e);
} catch (IOException e) {
System.err.println(e);
}
}
}
class MyErrorHandler implements ErrorHandler {
public void warning(SAXParseException e) throws SAXException {
show("Warning", e);
throw (e);
}
public void error(SAXParseException e) throws SAXException {
show("Error", e);
throw (e);
}
public void fatalError(SAXParseException e) throws SAXException {
show("Fatal Error", e);
throw (e);
}
private void show(String type, SAXParseException e) {
System.out.println(type + ": " + e.getMessage());
System.out.println("Line " + e.getLineNumber() + " Column " + e.getColumnNumber());
System.out.println("System ID: " + e.getSystemId());
}
}
Error: Document is invalid: no grammar found. Line 3 Column 7 System ID: file:///C:/Java_Dev/eclipse31/Eclipse/person.xml org.xml.sax.SAXParseException: Document is invalid: no grammar found.
A DOM Parse Tree Lister
- Using the DOM Parser to Build a Document Tree
- A Class That Walks through a DOM Parse Tree
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public class DOMDump {
static public void main(String[] arg) {
boolean validate = true;
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setValidating(validate);
dbf.setNamespaceAware(true);
dbf.setIgnoringElementContentWhitespace(true);
// Parse the input to produce a parse tree with its root
// in the form of a Document object
Document doc = null;
try {
DocumentBuilder builder = dbf.newDocumentBuilder();
builder.setErrorHandler(new MyErrorHandler());
InputSource is = new InputSource("personWithDTD.xml");
doc = builder.parse(is);
} catch (SAXException e) {
System.exit(1);
} catch (ParserConfigurationException e) {
System.err.println(e);
System.exit(1);
} catch (IOException e) {
System.err.println(e);
System.exit(1);
}
dump(doc);
}
private static void dump(Document doc) {
dumpLoop((Node) doc, "");
}
private static void dumpLoop(Node node, String indent) {
switch (node.getNodeType()) {
case Node.CDATA_SECTION_NODE:
System.out.println(indent + "CDATA_SECTION_NODE");
break;
case Node.ruMENT_NODE:
System.out.println(indent + "COMMENT_NODE");
break;
case Node.DOCUMENT_FRAGMENT_NODE:
System.out.println(indent + "DOCUMENT_FRAGMENT_NODE");
break;
case Node.DOCUMENT_NODE:
System.out.println(indent + "DOCUMENT_NODE");
break;
case Node.DOCUMENT_TYPE_NODE:
System.out.println(indent + "DOCUMENT_TYPE_NODE");
break;
case Node.ELEMENT_NODE:
System.out.println(indent + "ELEMENT_NODE");
break;
case Node.ENTITY_NODE:
System.out.println(indent + "ENTITY_NODE");
break;
case Node.ENTITY_REFERENCE_NODE:
System.out.println(indent + "ENTITY_REFERENCE_NODE");
break;
case Node.NOTATION_NODE:
System.out.println(indent + "NOTATION_NODE");
break;
case Node.PROCESSING_INSTRUCTION_NODE:
System.out.println(indent + "PROCESSING_INSTRUCTION_NODE");
break;
case Node.TEXT_NODE:
System.out.print(indent + "TEXT_NODE");
System.out.println(" : "+node.getTextContent());
break;
default:
System.out.println(indent + "Unknown node");
break;
}
NodeList list = node.getChildNodes();
for (int i = 0; i < list.getLength(); i++){
dumpLoop(list.item(i), indent + " ");
}
}
}
class MyErrorHandler implements ErrorHandler {
public void warning(SAXParseException e) throws SAXException {
show("Warning", e);
throw (e);
}
public void error(SAXParseException e) throws SAXException {
show("Error", e);
throw (e);
}
public void fatalError(SAXParseException e) throws SAXException {
show("Fatal Error", e);
throw (e);
}
private void show(String type, SAXParseException e) {
System.out.println(type + ": " + e.getMessage());
System.out.println("Line " + e.getLineNumber() + " Column " + e.getColumnNumber());
System.out.println("System ID: " + e.getSystemId());
}
}
DOCUMENT_NODE COMMENT_NODE DOCUMENT_TYPE_NODE ELEMENT_NODE ELEMENT_NODE ELEMENT_NODE TEXT_NODE : B D ELEMENT_NODE TEXT_NODE : 999 555-8888 ELEMENT_NODE TEXT_NODE : b@xyz.net
Converting an XML Fragment into a DOM Fragment
import java.io.File;
import java.io.StringReader;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
public class Main {
public static void main(String[] argv) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
Document doc = factory.newDocumentBuilder().parse(new File("infilename.xml"));
String fragment = "<fragment>aaa</fragment>";
factory = DocumentBuilderFactory.newInstance();
Document d = factory.newDocumentBuilder().parse(new InputSource(new StringReader(fragment)));
Node node = doc.importNode(d.getDocumentElement(), true);
DocumentFragment docfrag = doc.createDocumentFragment();
while (node.hasChildNodes()) {
docfrag.appendChild(node.removeChild(node.getFirstChild()));
}
Element element = doc.getDocumentElement();
element.appendChild(docfrag);
}
}
Create DOM Document out of string
import java.io.StringReader;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
public class Main {
public static Document load(String xml) throws Exception {
DocumentBuilder builder = getDocumentBuilder();
Document document = builder.parse(new InputSource(new StringReader(xml)));
return document;
}
public static DocumentBuilder getDocumentBuilder() throws ParserConfigurationException {
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
builderFactory.setNamespaceAware(true);
DocumentBuilder builder = builderFactory.newDocumentBuilder();
return builder;
}
}
DOM Objects That Make Up the Parse Tree
A DOM parser loads the entire document into a memory-resident tree structure so that the nodes of the tree can be randomly accessed by an application program. The nodes are all linked together in parent/child relationships that are representative of the relationships in the original document.
DOM Objects That Make Up the Parse Tree
NameDescriptionAttrAn attribute consisting of a name (sometimes called a key) and a value to be associated with the name.CDATASectionA block of text in an escape format to allow for the inclusion of special characters. .rumentThe text of a comment.DocumentThe root node of the entire document tree.DocumentFragmentA lightweight form of Document primarily used for editing a parse tree by extracting and inserting parts of the tree.DocumentTypeThe node in the tree that contains descriptive information about the format of the elements (it is the schema or DTD information).ElementA tag used to mark up a section of text.EntityAn entity, either parsed or unparsed. This is the entity itself, not the declaration.EntityReferenceAn unexpanded entity. A parser may choose to expand all entity references omitting objects of this type.NotationA notation declared as part of the DTD or schema. It is either an unparsed entity or processing instruction.ProcessingInstructionA processing instruction is a processor-specific instruction included in the document.TextCharacter data.
Getting a DOM Element by Id
import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
public class Main {
public static void main(String[] argv) throws Exception{
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
factory.setExpandEntityReferences(false);
Document doc = factory.newDocumentBuilder().parse(new File("filename"));
Element element = doc.getElementById("key1");
String attrValue = element.getAttribute("value");
}
}
Getting a Node Relative to Another Node in a DOM Document
import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class Main {
public static void main(String[] argv) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
factory.setExpandEntityReferences(false);
Document doc1 = factory.newDocumentBuilder().parse(new File("filename"));
NodeList list = doc1.getElementsByTagName("entry");
Element element = (Element) list.item(0);
Document doc2 = factory.newDocumentBuilder().parse(new File("infilename2.xml"));
// Make a copy of the element subtree suitable for inserting into doc2
Node node = doc2.importNode(element, true);
// Get the parent
Node parent = node.getParentNode();
// Get children
NodeList children = node.getChildNodes();
// Get first child; null if no children
Node child = node.getFirstChild();
// Get last child; null if no children
child = node.getLastChild();
// Get next sibling; null if node is last child
Node sibling = node.getNextSibling();
// Get previous sibling; null if node is first child
sibling = node.getPreviousSibling();
// Get first sibling
sibling = node.getParentNode().getFirstChild();
// Get last sibling
sibling = node.getParentNode().getLastChild();
}
}
Getting the Declared Entities in a DOM Document
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Entity;
import org.w3c.dom.EntityReference;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class Main {
public static void main(String[] argv) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
factory.setExpandEntityReferences(false);
Document doc = factory.newDocumentBuilder().parse(new File("filename"));
Map entityValues = new HashMap();
getEntityValues(doc, entityValues);
NamedNodeMap entities = doc.getDoctype().getEntities();
for (int i = 0; i < entities.getLength(); i++) {
Entity entity = (Entity) entities.item(i);
System.out.println(entity);
String entityName = entity.getNodeName();
System.out.println(entityName);
String entityPublicId = entity.getPublicId();
System.out.println(entityPublicId);
String entitySystemId = entity.getSystemId();
System.out.println(entitySystemId);
Node entityValue = (Node) entityValues.get(entityName);
System.out.println(entityValue);
}
}
public static void getEntityValues(Node node, Map map) {
if (node instanceof EntityReference) {
map.put(node.getNodeName(), node);
}
NodeList list = node.getChildNodes();
for (int i = 0; i < list.getLength(); i++) {
getEntityValues(list.item(i), map);
}
}
}
Getting the Notations in a DOM Document
import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Notation;
public class Main {
public static void main(String[] argv) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
factory.setExpandEntityReferences(false);
Document doc = factory.newDocumentBuilder().parse(new File("filename"));
NamedNodeMap notations = doc.getDoctype().getNotations();
for (int i = 0; i < notations.getLength(); i++) {
Notation notation = (Notation) notations.item(i);
String notationName = notation.getNodeName();
String notationPublicId = notation.getPublicId();
String notationSystemId = notation.getSystemId();
}
}
}
Getting the Root Element in a DOM Document
import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
public class Main {
public static void main(String[] argv) throws Exception{
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
factory.setExpandEntityReferences(false);
Document doc = factory.newDocumentBuilder().parse(new File("filename"));
Element root = null;
NodeList list = doc.getChildNodes();
for (int i = 0; i < list.getLength(); i++) {
if (list.item(i) instanceof Element) {
root = (Element) list.item(i);
break;
}
}
root = doc.getDocumentElement();
}
}
Getting the Value of an Entity Reference in a DOM Document
import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.rument;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.EntityReference;
import org.w3c.dom.Text;
public class Main {
public static void main(String[] argv) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
factory.setExpandEntityReferences(false);
Document doc = factory.newDocumentBuilder().parse(new File("filename"));
Element root = doc.getDocumentElement();
EntityReference eref = (EntityReference) root.getFirstChild();
Comment comment = (Comment) eref.getFirstChild();
Element elem = (Element) eref.getFirstChild().getNextSibling();
Text text = (Text) eref.getLastChild();
}
}
Ignorable Whitespace and Element Content
builderFactory.setNamespaceAware(true); // Set namespace aware
builderFactory.setValidating(true); // and validating parser features
builderFactory.setIgnoringElementContentWhitespace(true);
<?xml version="1.0" standalone="yes"?> <folks> <person> <name> B D </name> <phone> 999 555-8888 </phone> <email> b@xyz.net </email> </person> </folks>
Listing the Contents of Parse Tree Nodes: Using the DOM Parser to Extract XML Document Data
/*
Code revised from
Java, XML, and JAXP by Arthur Griffith John Wiley & Sons 2002
*/
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Attr;
import org.w3c.dom.CDATASection;
import org.w3c.dom.rument;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.Entity;
import org.w3c.dom.EntityReference;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Notation;
import org.w3c.dom.ProcessingInstruction;
import org.w3c.dom.Text;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public class DOMDump {
static public void main(String[] arg) {
boolean validate = true;
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setValidating(validate);
dbf.setNamespaceAware(true);
dbf.setIgnoringElementContentWhitespace(true);
// Parse the input to produce a parse tree with its root
// in the form of a Document object
Document doc = null;
try {
DocumentBuilder builder = dbf.newDocumentBuilder();
builder.setErrorHandler(new MyErrorHandler());
InputSource is = new InputSource("personWithDTD.xml");
doc = builder.parse(is);
} catch (SAXException e) {
System.exit(1);
} catch (ParserConfigurationException e) {
System.err.println(e);
System.exit(1);
} catch (IOException e) {
System.err.println(e);
System.exit(1);
}
dump(doc);
}
private static void dump(Document doc) {
dumpLoop((Node) doc, "");
}
private static void dumpLoop(Node node, String indent) {
switch (node.getNodeType()) {
case Node.ATTRIBUTE_NODE:
dumpAttributeNode((Attr) node, indent);
break;
case Node.CDATA_SECTION_NODE:
dumpCDATASectionNode((CDATASection) node, indent);
break;
case Node.ruMENT_NODE:
dumpCommentNode((Comment) node, indent);
break;
case Node.DOCUMENT_NODE:
dumpDocument((Document) node, indent);
break;
case Node.DOCUMENT_FRAGMENT_NODE:
dumpDocumentFragment((DocumentFragment) node, indent);
break;
case Node.DOCUMENT_TYPE_NODE:
dumpDocumentType((DocumentType) node, indent);
break;
case Node.ELEMENT_NODE:
dumpElement((Element) node, indent);
break;
case Node.ENTITY_NODE:
dumpEntityNode((Entity) node, indent);
break;
case Node.ENTITY_REFERENCE_NODE:
dumpEntityReferenceNode((EntityReference) node, indent);
break;
case Node.NOTATION_NODE:
dumpNotationNode((Notation) node, indent);
break;
case Node.PROCESSING_INSTRUCTION_NODE:
dumpProcessingInstructionNode((ProcessingInstruction) node, indent);
break;
case Node.TEXT_NODE:
dumpTextNode((Text) node, indent);
break;
default:
System.out.println(indent + "Unknown node");
break;
}
NodeList list = node.getChildNodes();
for (int i = 0; i < list.getLength(); i++)
dumpLoop(list.item(i), indent + " ");
}
/* Display the contents of a ATTRIBUTE_NODE */
private static void dumpAttributeNode(Attr node, String indent) {
System.out.println(indent + "ATTRIBUTE " + node.getName() + "=\"" + node.getValue() + "\"");
}
/* Display the contents of a CDATA_SECTION_NODE */
private static void dumpCDATASectionNode(CDATASection node, String indent) {
System.out.println(indent + "CDATA SECTION length=" + node.getLength());
System.out.println(indent + "\"" + node.getData() + "\"");
}
/* Display the contents of a COMMENT_NODE */
private static void dumpCommentNode(Comment node, String indent) {
System.out.println(indent + "COMMENT length=" + node.getLength());
System.out.println(indent + " " + node.getData());
}
/* Display the contents of a DOCUMENT_NODE */
private static void dumpDocument(Document node, String indent) {
System.out.println(indent + "DOCUMENT");
}
/* Display the contents of a DOCUMENT_FRAGMENT_NODE */
private static void dumpDocumentFragment(DocumentFragment node, String indent) {
System.out.println(indent + "DOCUMENT FRAGMENT");
}
/* Display the contents of a DOCUMENT_TYPE_NODE */
private static void dumpDocumentType(DocumentType node, String indent) {
System.out.println(indent + "DOCUMENT_TYPE: " + node.getName());
if (node.getPublicId() != null)
System.out.println(indent + " Public ID: " + node.getPublicId());
if (node.getSystemId() != null)
System.out.println(indent + " System ID: " + node.getSystemId());
NamedNodeMap entities = node.getEntities();
if (entities.getLength() > 0) {
for (int i = 0; i < entities.getLength(); i++) {
dumpLoop(entities.item(i), indent + " ");
}
}
NamedNodeMap notations = node.getNotations();
if (notations.getLength() > 0) {
for (int i = 0; i < notations.getLength(); i++)
dumpLoop(notations.item(i), indent + " ");
}
}
/* Display the contents of a ELEMENT_NODE */
private static void dumpElement(Element node, String indent) {
System.out.println(indent + "ELEMENT: " + node.getTagName());
NamedNodeMap nm = node.getAttributes();
for (int i = 0; i < nm.getLength(); i++)
dumpLoop(nm.item(i), indent + " ");
}
/* Display the contents of a ENTITY_NODE */
private static void dumpEntityNode(Entity node, String indent) {
System.out.println(indent + "ENTITY: " + node.getNodeName());
}
/* Display the contents of a ENTITY_REFERENCE_NODE */
private static void dumpEntityReferenceNode(EntityReference node, String indent) {
System.out.println(indent + "ENTITY REFERENCE: " + node.getNodeName());
}
/* Display the contents of a NOTATION_NODE */
private static void dumpNotationNode(Notation node, String indent) {
System.out.println(indent + "NOTATION");
System.out.print(indent + " " + node.getNodeName() + "=");
if (node.getPublicId() != null)
System.out.println(node.getPublicId());
else
System.out.println(node.getSystemId());
}
/* Display the contents of a PROCESSING_INSTRUCTION_NODE */
private static void dumpProcessingInstructionNode(ProcessingInstruction node, String indent) {
System.out.println(indent + "PI: target=" + node.getTarget());
System.out.println(indent + " " + node.getData());
}
/* Display the contents of a TEXT_NODE */
private static void dumpTextNode(Text node, String indent) {
System.out.println(indent + "TEXT length=" + node.getLength());
System.out.println(indent + " " + node.getData());
}
}
class MyErrorHandler implements ErrorHandler {
public void warning(SAXParseException e) throws SAXException {
show("Warning", e);
throw (e);
}
public void error(SAXParseException e) throws SAXException {
show("Error", e);
throw (e);
}
public void fatalError(SAXParseException e) throws SAXException {
show("Fatal Error", e);
throw (e);
}
private void show(String type, SAXParseException e) {
System.out.println(type + ": " + e.getMessage());
System.out.println("Line " + e.getLineNumber() + " Column " + e.getColumnNumber());
System.out.println("System ID: " + e.getSystemId());
}
}
DOCUMENT COMMENT length=45 This document is both well formed and valid DOCUMENT_TYPE: folks ELEMENT: folks ELEMENT: person ELEMENT: name TEXT length=15 Bertha D. Blues ELEMENT: phone TEXT length=12 999 555-8888 ELEMENT: email TEXT length=14 b@xyz.net
Parse an XML string: Using DOM and a StringReader.
import java.io.StringReader;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.CharacterData;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
public class Main {
public static void main(String arg[]) throws Exception{
String xmlRecords = "<data><employee><name>A</name>"
+ "<title>Manager</title></employee></data>";
DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xmlRecords));
Document doc = db.parse(is);
NodeList nodes = doc.getElementsByTagName("employee");
for (int i = 0; i < nodes.getLength(); i++) {
Element element = (Element) nodes.item(i);
NodeList name = element.getElementsByTagName("name");
Element line = (Element) name.item(0);
System.out.println("Name: " + getCharacterDataFromElement(line));
NodeList title = element.getElementsByTagName("title");
line = (Element) title.item(0);
System.out.println("Title: " + getCharacterDataFromElement(line));
}
}
public static String getCharacterDataFromElement(Element e) {
Node child = e.getFirstChild();
if (child instanceof CharacterData) {
CharacterData cd = (CharacterData) child;
return cd.getData();
}
return "";
}
}
Read XML as DOM
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
*
*
* @author Costin Manolache
*/
public class Main {
public static class NullResolver implements EntityResolver {
public InputSource resolveEntity (String publicId,
String systemId)
throws SAXException, IOException
{
return new InputSource(new StringReader(""));
}
}
/** Read XML as DOM.
*/
public static Document readXml(InputStream is)
throws SAXException, IOException, ParserConfigurationException
{
DocumentBuilderFactory dbf =
DocumentBuilderFactory.newInstance();
dbf.setValidating(false);
dbf.setIgnoringComments(false);
dbf.setIgnoringElementContentWhitespace(true);
//dbf.setCoalescing(true);
//dbf.setExpandEntityReferences(true);
DocumentBuilder db = null;
db = dbf.newDocumentBuilder();
db.setEntityResolver( new NullResolver() );
// db.setErrorHandler( new MyErrorHandler());
Document doc = db.parse(is);
return doc;
}
}
Remove the element from parent
import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
public class Main {
public static void main(String[] argv) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
factory.setExpandEntityReferences(false);
Document doc = factory.newDocumentBuilder().parse(new File("filename"));
Element element = (Element) doc.getElementsByTagName("b").item(0);
Node parent = element.getParentNode();
parent.removeChild(element);
parent.normalize();
}
}
Source To InputSource
/*
* Copyright 2003-2008 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.OutputStream;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
public class Main {
public static void ElementToStream(Element element, OutputStream out) {
try {
DOMSource source = new DOMSource(element);
StreamResult result = new StreamResult(out);
TransformerFactory transFactory = TransformerFactory.newInstance();
Transformer transformer = transFactory.newTransformer();
transformer.transform(source, result);
} catch (Exception ex) {
}
}
/**
* Utility to get the bytes uri
*
* @param source the resource to get
*/
public static InputSource sourceToInputSource(Source source) {
if (source instanceof SAXSource) {
return ((SAXSource) source).getInputSource();
} else if (source instanceof DOMSource) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Node node = ((DOMSource) source).getNode();
if (node instanceof Document) {
node = ((Document) node).getDocumentElement();
}
Element domElement = (Element) node;
ElementToStream(domElement, baos);
InputSource isource = new InputSource(source.getSystemId());
isource.setByteStream(new ByteArrayInputStream(baos.toByteArray()));
return isource;
} else if (source instanceof StreamSource) {
StreamSource ss = (StreamSource) source;
InputSource isource = new InputSource(ss.getSystemId());
isource.setByteStream(ss.getInputStream());
isource.setCharacterStream(ss.getReader());
isource.setPublicId(ss.getPublicId());
return isource;
} else {
return getInputSourceFromURI(source.getSystemId());
}
}
/**
* Utility to get the bytes uri.
* Does NOT handle authenticated URLs,
* use getInputSourceFromURI(uri, username, password)
*
* @param uri the resource to get
*/
public static InputSource getInputSourceFromURI(String uri) {
return new InputSource(uri);
}
}
Use DOM L3 DOMBuilder, DOMBuilderFilter DOMWriter and other DOM L3 functionality to preparse, revalidate and safe document.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.w3c.dom.DOMConfiguration;
import org.w3c.dom.DOMError;
import org.w3c.dom.DOMErrorHandler;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSOutput;
import org.w3c.dom.ls.LSParser;
import org.w3c.dom.ls.LSParserFilter;
import org.w3c.dom.ls.LSSerializer;
import org.w3c.dom.traversal.NodeFilter;
/**
* This sample program illustrates how to use DOM L3 DOMBuilder,
* DOMBuilderFilter DOMWriter and other DOM L3 functionality to preparse,
* revalidate and safe document.
*/
public class DOM3 implements DOMErrorHandler, LSParserFilter {
/** Default namespaces support (true). */
protected static final boolean DEFAULT_NAMESPACES = true;
/** Default validation support (false). */
protected static final boolean DEFAULT_VALIDATION = false;
/** Default Schema validation support (false). */
protected static final boolean DEFAULT_SCHEMA_VALIDATION = false;
static LSParser builder;
public static void main(String[] argv) {
if (argv.length == 0) {
printUsage();
System.exit(1);
}
try {
// get DOM Implementation using DOM Registry
System.setProperty(DOMImplementationRegistry.PROPERTY,
"org.apache.xerces.dom.DOMXSImplementationSourceImpl");
DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance();
DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS");
// create DOMBuilder
builder = impl.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null);
DOMConfiguration config = builder.getDomConfig();
// create Error Handler
DOMErrorHandler errorHandler = new DOM3();
// create filter
LSParserFilter filter = new DOM3();
builder.setFilter(filter);
// set error handler
config.setParameter("error-handler", errorHandler);
// set validation feature
// config.setParameter("validate", Boolean.FALSE);
config.setParameter("validate", Boolean.TRUE);
// set schema language
config.setParameter("schema-type", "http://www.w3.org/2001/XMLSchema");
// config.setParameter("psvi",Boolean.TRUE);
// config.setParameter("schema-type","http://www.w3.org/TR/REC-xml");
// set schema location
config.setParameter("schema-location", "personal.xsd");
// parse document
System.out.println("Parsing " + argv[0] + "...");
Document doc = builder.parseURI(argv[0]);
// set error handler on the Document
config = doc.getDomConfig();
config.setParameter("error-handler", errorHandler);
// set validation feature
config.setParameter("validate", Boolean.TRUE);
config.setParameter("schema-type", "http://www.w3.org/2001/XMLSchema");
// config.setParameter("schema-type","http://www.w3.org/TR/REC-xml");
config.setParameter("schema-location", "data/personal.xsd");
// remove comments from the document
config.setParameter("comments", Boolean.FALSE);
System.out.println("Normalizing document... ");
doc.normalizeDocument();
// create DOMWriter
LSSerializer domWriter = impl.createLSSerializer();
System.out.println("Serializing document... ");
config = domWriter.getDomConfig();
config.setParameter("xml-declaration", Boolean.FALSE);
// config.setParameter("validate",errorHandler);
// serialize document to standard output
// domWriter.writeNode(System.out, doc);
LSOutput dOut = impl.createLSOutput();
dOut.setByteStream(System.out);
domWriter.write(doc, dOut);
} catch (Exception ex) {
ex.printStackTrace();
}
}
private static void printUsage() {
System.err.println("usage: java dom.DOM3 uri ...");
System.err.println();
System.err.println("NOTE: You can only validate DOM tree against XML Schemas.");
} // printUsage()
public boolean handleError(DOMError error) {
short severity = error.getSeverity();
if (severity == DOMError.SEVERITY_ERROR) {
System.out.println("[dom3-error]: " + error.getMessage());
}
if (severity == DOMError.SEVERITY_WARNING) {
System.out.println("[dom3-warning]: " + error.getMessage());
}
return true;
}
/**
* @see org.w3c.dom.ls.LSParserFilter#acceptNode(Node)
*/
public short acceptNode(Node enode) {
return NodeFilter.FILTER_ACCEPT;
}
/**
* @see org.w3c.dom.ls.LSParserFilter#getWhatToShow()
*/
public int getWhatToShow() {
return NodeFilter.SHOW_ELEMENT;
}
/**
* @see org.w3c.dom.ls.LSParserFilter#startElement(Element)
*/
public short startElement(Element elt) {
return LSParserFilter.FILTER_ACCEPT;
}
}
Visiting All the Elements in a DOM Document
import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
public class Main {
public static void main(String[] argv) throws Exception{
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
factory.setExpandEntityReferences(false);
Document doc = factory.newDocumentBuilder().parse(new File("filename"));
NodeList list = doc.getElementsByTagName("*");
for (int i = 0; i < list.getLength(); i++) {
Element element = (Element) list.item(i);
}
}
}