Java Tutorial/XML/DOM Parser

Содержание

1 A DOM Error Checker: Using DOM for Syntax Checking
2 A DOM Parse Tree Lister
3 Converting an XML Fragment into a DOM Fragment
4 Create DOM Document out of string
5 DOM Objects That Make Up the Parse Tree
6 Getting a DOM Element by Id
7 Getting a Node Relative to Another Node in a DOM Document
8 Getting the Declared Entities in a DOM Document
9 Getting the Notations in a DOM Document
10 Getting the Root Element in a DOM Document
11 Getting the Value of an Entity Reference in a DOM Document
12 Ignorable Whitespace and Element Content
13 Listing the Contents of Parse Tree Nodes: Using the DOM Parser to Extract XML Document Data
14 Parse an XML string: Using DOM and a StringReader.
15 Read XML as DOM
16 Remove the element from parent
17 Source To InputSource
18 Use DOM L3 DOMBuilder, DOMBuilderFilter DOMWriter and other DOM L3 functionality to preparse, revalidate and safe document.
19 Visiting All the Elements in a DOM Document

A DOM Error Checker: Using DOM for Syntax Checking

import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public class DOMCheck {
  static public void main(String[] arg) {
    boolean validate = true;
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setValidating(validate);
    dbf.setNamespaceAware(true);
    try {
      DocumentBuilder builder = dbf.newDocumentBuilder();
      builder.setErrorHandler(new MyErrorHandler());
      InputSource is = new InputSource("person.xml");
      Document doc = builder.parse(is);
    } catch (SAXException e) {
      System.out.println(e);
    } catch (ParserConfigurationException e) {
      System.err.println(e);
    } catch (IOException e) {
      System.err.println(e);
    }
  }
}
class MyErrorHandler implements ErrorHandler {
  public void warning(SAXParseException e) throws SAXException {
    show("Warning", e);
    throw (e);
  }
  public void error(SAXParseException e) throws SAXException {
    show("Error", e);
    throw (e);
  }
  public void fatalError(SAXParseException e) throws SAXException {
    show("Fatal Error", e);
    throw (e);
  }
  private void show(String type, SAXParseException e) {
    System.out.println(type + ": " + e.getMessage());
    System.out.println("Line " + e.getLineNumber() + " Column " + e.getColumnNumber());
    System.out.println("System ID: " + e.getSystemId());
  }
}

Error: Document is invalid: no grammar found.
Line 3 Column 7
System ID: file:///C:/Java_Dev/eclipse31/Eclipse/person.xml
org.xml.sax.SAXParseException: Document is invalid: no grammar found.

A DOM Parse Tree Lister

Using the DOM Parser to Build a Document Tree
A Class That Walks through a DOM Parse Tree

import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public class DOMDump {
  static public void main(String[] arg) {
    boolean validate = true;
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setValidating(validate);
    dbf.setNamespaceAware(true);
    dbf.setIgnoringElementContentWhitespace(true);
    // Parse the input to produce a parse tree with its root
    // in the form of a Document object
    Document doc = null;
    try {
      DocumentBuilder builder = dbf.newDocumentBuilder();
      builder.setErrorHandler(new MyErrorHandler());
      InputSource is = new InputSource("personWithDTD.xml");
      doc = builder.parse(is);
    } catch (SAXException e) {
      System.exit(1);
    } catch (ParserConfigurationException e) {
      System.err.println(e);
      System.exit(1);
    } catch (IOException e) {
      System.err.println(e);
      System.exit(1);
    }
    dump(doc);
  }
  private static void dump(Document doc) {
    dumpLoop((Node) doc, "");
  }
  private static void dumpLoop(Node node, String indent) {
    switch (node.getNodeType()) {
    case Node.CDATA_SECTION_NODE:
      System.out.println(indent + "CDATA_SECTION_NODE");
      break;
    case Node.ruMENT_NODE:
      System.out.println(indent + "COMMENT_NODE");
      break;
    case Node.DOCUMENT_FRAGMENT_NODE:
      System.out.println(indent + "DOCUMENT_FRAGMENT_NODE");
      break;
    case Node.DOCUMENT_NODE:
      System.out.println(indent + "DOCUMENT_NODE");
      break;
    case Node.DOCUMENT_TYPE_NODE:
      System.out.println(indent + "DOCUMENT_TYPE_NODE");
      break;
    case Node.ELEMENT_NODE:
      System.out.println(indent + "ELEMENT_NODE");
      break;
    case Node.ENTITY_NODE:
      System.out.println(indent + "ENTITY_NODE");
      break;
    case Node.ENTITY_REFERENCE_NODE:
      System.out.println(indent + "ENTITY_REFERENCE_NODE");
      break;
    case Node.NOTATION_NODE:
      System.out.println(indent + "NOTATION_NODE");
      break;
    case Node.PROCESSING_INSTRUCTION_NODE:
      System.out.println(indent + "PROCESSING_INSTRUCTION_NODE");
      break;
    case Node.TEXT_NODE:
      System.out.print(indent + "TEXT_NODE");
      System.out.println(" : "+node.getTextContent());
      break;
    default:
      System.out.println(indent + "Unknown node");
      break;
    }
    NodeList list = node.getChildNodes();
    for (int i = 0; i < list.getLength(); i++){
      dumpLoop(list.item(i), indent + "   ");
    }
  }
}

class MyErrorHandler implements ErrorHandler {
  public void warning(SAXParseException e) throws SAXException {
    show("Warning", e);
    throw (e);
  }
  public void error(SAXParseException e) throws SAXException {
    show("Error", e);
    throw (e);
  }
  public void fatalError(SAXParseException e) throws SAXException {
    show("Fatal Error", e);
    throw (e);
  }
  private void show(String type, SAXParseException e) {
    System.out.println(type + ": " + e.getMessage());
    System.out.println("Line " + e.getLineNumber() + " Column " + e.getColumnNumber());
    System.out.println("System ID: " + e.getSystemId());
  }
}

DOCUMENT_NODE
   COMMENT_NODE
   DOCUMENT_TYPE_NODE
   ELEMENT_NODE
      ELEMENT_NODE
         ELEMENT_NODE
            TEXT_NODE : B D
         ELEMENT_NODE
            TEXT_NODE : 999 555-8888
         ELEMENT_NODE
            TEXT_NODE : b@xyz.net

Converting an XML Fragment into a DOM Fragment

import java.io.File;
import java.io.StringReader;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
public class Main {
  public static void main(String[] argv) throws Exception {
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setValidating(true);
    Document doc = factory.newDocumentBuilder().parse(new File("infilename.xml"));
    String fragment = "<fragment>aaa</fragment>";
    factory = DocumentBuilderFactory.newInstance();
    Document d = factory.newDocumentBuilder().parse(new InputSource(new StringReader(fragment)));
    Node node = doc.importNode(d.getDocumentElement(), true);
    DocumentFragment docfrag = doc.createDocumentFragment();
    while (node.hasChildNodes()) {
      docfrag.appendChild(node.removeChild(node.getFirstChild()));
    }
    Element element = doc.getDocumentElement();
    element.appendChild(docfrag);
  }
}

Create DOM Document out of string

import java.io.StringReader;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;

public class Main {
  public static Document load(String xml) throws Exception {
    DocumentBuilder builder = getDocumentBuilder();         
    Document document = builder.parse(new InputSource(new StringReader(xml)));
    return document;
}
  public static DocumentBuilder getDocumentBuilder() throws ParserConfigurationException {
    DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
    builderFactory.setNamespaceAware(true);
    DocumentBuilder builder = builderFactory.newDocumentBuilder();
    return builder;
}
}

DOM Objects That Make Up the Parse Tree

A DOM parser loads the entire document into a memory-resident tree structure so that the nodes of the tree can be randomly accessed by an application program. The nodes are all linked together in parent/child relationships that are representative of the relationships in the original document.

DOM Objects That Make Up the Parse Tree

NameDescriptionAttrAn attribute consisting of a name (sometimes called a key) and a value to be associated with the name.CDATASectionA block of text in an escape format to allow for the inclusion of special characters. .rumentThe text of a comment.DocumentThe root node of the entire document tree.DocumentFragmentA lightweight form of Document primarily used for editing a parse tree by extracting and inserting parts of the tree.DocumentTypeThe node in the tree that contains descriptive information about the format of the elements (it is the schema or DTD information).ElementA tag used to mark up a section of text.EntityAn entity, either parsed or unparsed. This is the entity itself, not the declaration.EntityReferenceAn unexpanded entity. A parser may choose to expand all entity references omitting objects of this type.NotationA notation declared as part of the DTD or schema. It is either an unparsed entity or processing instruction.ProcessingInstructionA processing instruction is a processor-specific instruction included in the document.TextCharacter data.

Getting a DOM Element by Id

import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
public class Main {
  public static void main(String[] argv) throws Exception{
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setValidating(true);
    factory.setExpandEntityReferences(false);
    Document doc = factory.newDocumentBuilder().parse(new File("filename"));
    Element element = doc.getElementById("key1");
    String attrValue = element.getAttribute("value");
  }
}

Getting a Node Relative to Another Node in a DOM Document

import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class Main {
  public static void main(String[] argv) throws Exception {
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setValidating(true);
    factory.setExpandEntityReferences(false);
    Document doc1 = factory.newDocumentBuilder().parse(new File("filename"));
    NodeList list = doc1.getElementsByTagName("entry");
    Element element = (Element) list.item(0);
    Document doc2 = factory.newDocumentBuilder().parse(new File("infilename2.xml"));
    // Make a copy of the element subtree suitable for inserting into doc2
    Node node = doc2.importNode(element, true);
    // Get the parent
    Node parent = node.getParentNode();
    // Get children
    NodeList children = node.getChildNodes();
    // Get first child; null if no children
    Node child = node.getFirstChild();
    // Get last child; null if no children
    child = node.getLastChild();
    // Get next sibling; null if node is last child
    Node sibling = node.getNextSibling();
    // Get previous sibling; null if node is first child
    sibling = node.getPreviousSibling();
    // Get first sibling
    sibling = node.getParentNode().getFirstChild();
    // Get last sibling
    sibling = node.getParentNode().getLastChild();
  }
}

Getting the Declared Entities in a DOM Document

import java.io.File;
import java.util.HashMap;
import java.util.Map;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Entity;
import org.w3c.dom.EntityReference;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class Main {
  public static void main(String[] argv) throws Exception {
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setValidating(true);
    factory.setExpandEntityReferences(false);
    Document doc = factory.newDocumentBuilder().parse(new File("filename"));
    Map entityValues = new HashMap();
    getEntityValues(doc, entityValues);
    NamedNodeMap entities = doc.getDoctype().getEntities();
    for (int i = 0; i < entities.getLength(); i++) {
      Entity entity = (Entity) entities.item(i);
      System.out.println(entity);
      String entityName = entity.getNodeName();
      System.out.println(entityName);
      String entityPublicId = entity.getPublicId();
      System.out.println(entityPublicId);
      String entitySystemId = entity.getSystemId();
      System.out.println(entitySystemId);
      Node entityValue = (Node) entityValues.get(entityName);
      System.out.println(entityValue);
    }
  }
  public static void getEntityValues(Node node, Map map) {
    if (node instanceof EntityReference) {
      map.put(node.getNodeName(), node);
    }
    NodeList list = node.getChildNodes();
    for (int i = 0; i < list.getLength(); i++) {
      getEntityValues(list.item(i), map);
    }
  }
}

Getting the Notations in a DOM Document

import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Notation;
public class Main {
  public static void main(String[] argv) throws Exception {
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setValidating(true);
    factory.setExpandEntityReferences(false);
    Document doc = factory.newDocumentBuilder().parse(new File("filename"));
    NamedNodeMap notations = doc.getDoctype().getNotations();
    for (int i = 0; i < notations.getLength(); i++) {
      Notation notation = (Notation) notations.item(i);
      String notationName = notation.getNodeName();
      String notationPublicId = notation.getPublicId();
      String notationSystemId = notation.getSystemId();
    }
  }
}

Getting the Root Element in a DOM Document

import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
public class Main {
  public static void main(String[] argv) throws Exception{
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setValidating(true);
    factory.setExpandEntityReferences(false);
    Document doc = factory.newDocumentBuilder().parse(new File("filename"));
    Element root = null;
    NodeList list = doc.getChildNodes();
    for (int i = 0; i < list.getLength(); i++) {
      if (list.item(i) instanceof Element) {
        root = (Element) list.item(i);
        break;
      }
    }
    root = doc.getDocumentElement();
  }
}

Getting the Value of an Entity Reference in a DOM Document

import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.rument;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.EntityReference;
import org.w3c.dom.Text;
public class Main {
  public static void main(String[] argv) throws Exception {
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setValidating(true);
    factory.setExpandEntityReferences(false);
    Document doc = factory.newDocumentBuilder().parse(new File("filename"));
    Element root = doc.getDocumentElement();
    EntityReference eref = (EntityReference) root.getFirstChild();
    Comment comment = (Comment) eref.getFirstChild(); 
    Element elem = (Element) eref.getFirstChild().getNextSibling(); 
    Text text = (Text) eref.getLastChild(); 
  }
}

Ignorable Whitespace and Element Content

builderFactory.setNamespaceAware(true);        // Set namespace aware
   builderFactory.setValidating(true);            // and validating parser features
   builderFactory.setIgnoringElementContentWhitespace(true);

<?xml version="1.0" standalone="yes"?>
<folks>
    <person>
        <name>
            B D
        </name>
        <phone>
            999 555-8888
        </phone>
        <email>
            b@xyz.net
        </email>
    </person>
</folks>

Listing the Contents of Parse Tree Nodes: Using the DOM Parser to Extract XML Document Data

/*
Code revised from
Java, XML, and JAXP by Arthur Griffith John Wiley & Sons 2002
*/

import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Attr;
import org.w3c.dom.CDATASection;
import org.w3c.dom.rument;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.Entity;
import org.w3c.dom.EntityReference;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Notation;
import org.w3c.dom.ProcessingInstruction;
import org.w3c.dom.Text;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public class DOMDump {
  static public void main(String[] arg) {
    boolean validate = true;
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setValidating(validate);
    dbf.setNamespaceAware(true);
    dbf.setIgnoringElementContentWhitespace(true);
    // Parse the input to produce a parse tree with its root
    // in the form of a Document object
    Document doc = null;
    try {
      DocumentBuilder builder = dbf.newDocumentBuilder();
      builder.setErrorHandler(new MyErrorHandler());
      InputSource is = new InputSource("personWithDTD.xml");
      doc = builder.parse(is);
    } catch (SAXException e) {
      System.exit(1);
    } catch (ParserConfigurationException e) {
      System.err.println(e);
      System.exit(1);
    } catch (IOException e) {
      System.err.println(e);
      System.exit(1);
    }
    dump(doc);
  }
  private static void dump(Document doc) {
    dumpLoop((Node) doc, "");
  }
  private static void dumpLoop(Node node, String indent) {
    switch (node.getNodeType()) {
    case Node.ATTRIBUTE_NODE:
      dumpAttributeNode((Attr) node, indent);
      break;
    case Node.CDATA_SECTION_NODE:
      dumpCDATASectionNode((CDATASection) node, indent);
      break;
    case Node.ruMENT_NODE:
      dumpCommentNode((Comment) node, indent);
      break;
    case Node.DOCUMENT_NODE:
      dumpDocument((Document) node, indent);
      break;
    case Node.DOCUMENT_FRAGMENT_NODE:
      dumpDocumentFragment((DocumentFragment) node, indent);
      break;
    case Node.DOCUMENT_TYPE_NODE:
      dumpDocumentType((DocumentType) node, indent);
      break;
    case Node.ELEMENT_NODE:
      dumpElement((Element) node, indent);
      break;
    case Node.ENTITY_NODE:
      dumpEntityNode((Entity) node, indent);
      break;
    case Node.ENTITY_REFERENCE_NODE:
      dumpEntityReferenceNode((EntityReference) node, indent);
      break;
    case Node.NOTATION_NODE:
      dumpNotationNode((Notation) node, indent);
      break;
    case Node.PROCESSING_INSTRUCTION_NODE:
      dumpProcessingInstructionNode((ProcessingInstruction) node, indent);
      break;
    case Node.TEXT_NODE:
      dumpTextNode((Text) node, indent);
      break;
    default:
      System.out.println(indent + "Unknown node");
      break;
    }
    NodeList list = node.getChildNodes();
    for (int i = 0; i < list.getLength(); i++)
      dumpLoop(list.item(i), indent + "   ");
  }
  /* Display the contents of a ATTRIBUTE_NODE */
  private static void dumpAttributeNode(Attr node, String indent) {
    System.out.println(indent + "ATTRIBUTE " + node.getName() + "=\"" + node.getValue() + "\"");
  }
  /* Display the contents of a CDATA_SECTION_NODE */
  private static void dumpCDATASectionNode(CDATASection node, String indent) {
    System.out.println(indent + "CDATA SECTION length=" + node.getLength());
    System.out.println(indent + "\"" + node.getData() + "\"");
  }
  /* Display the contents of a COMMENT_NODE */
  private static void dumpCommentNode(Comment node, String indent) {
    System.out.println(indent + "COMMENT length=" + node.getLength());
    System.out.println(indent + "  " + node.getData());
  }
  /* Display the contents of a DOCUMENT_NODE */
  private static void dumpDocument(Document node, String indent) {
    System.out.println(indent + "DOCUMENT");
  }
  /* Display the contents of a DOCUMENT_FRAGMENT_NODE */
  private static void dumpDocumentFragment(DocumentFragment node, String indent) {
    System.out.println(indent + "DOCUMENT FRAGMENT");
  }
  /* Display the contents of a DOCUMENT_TYPE_NODE */
  private static void dumpDocumentType(DocumentType node, String indent) {
    System.out.println(indent + "DOCUMENT_TYPE: " + node.getName());
    if (node.getPublicId() != null)
      System.out.println(indent + " Public ID: " + node.getPublicId());
    if (node.getSystemId() != null)
      System.out.println(indent + " System ID: " + node.getSystemId());
    NamedNodeMap entities = node.getEntities();
    if (entities.getLength() > 0) {
      for (int i = 0; i < entities.getLength(); i++) {
        dumpLoop(entities.item(i), indent + "  ");
      }
    }
    NamedNodeMap notations = node.getNotations();
    if (notations.getLength() > 0) {
      for (int i = 0; i < notations.getLength(); i++)
        dumpLoop(notations.item(i), indent + "  ");
    }
  }
  /* Display the contents of a ELEMENT_NODE */
  private static void dumpElement(Element node, String indent) {
    System.out.println(indent + "ELEMENT: " + node.getTagName());
    NamedNodeMap nm = node.getAttributes();
    for (int i = 0; i < nm.getLength(); i++)
      dumpLoop(nm.item(i), indent + "  ");
  }
  /* Display the contents of a ENTITY_NODE */
  private static void dumpEntityNode(Entity node, String indent) {
    System.out.println(indent + "ENTITY: " + node.getNodeName());
  }
  /* Display the contents of a ENTITY_REFERENCE_NODE */
  private static void dumpEntityReferenceNode(EntityReference node, String indent) {
    System.out.println(indent + "ENTITY REFERENCE: " + node.getNodeName());
  }
  /* Display the contents of a NOTATION_NODE */
  private static void dumpNotationNode(Notation node, String indent) {
    System.out.println(indent + "NOTATION");
    System.out.print(indent + "  " + node.getNodeName() + "=");
    if (node.getPublicId() != null)
      System.out.println(node.getPublicId());
    else
      System.out.println(node.getSystemId());
  }
  /* Display the contents of a PROCESSING_INSTRUCTION_NODE */
  private static void dumpProcessingInstructionNode(ProcessingInstruction node, String indent) {
    System.out.println(indent + "PI: target=" + node.getTarget());
    System.out.println(indent + "  " + node.getData());
  }
  /* Display the contents of a TEXT_NODE */
  private static void dumpTextNode(Text node, String indent) {
    System.out.println(indent + "TEXT length=" + node.getLength());
    System.out.println(indent + "  " + node.getData());
  }
}
class MyErrorHandler implements ErrorHandler {
  public void warning(SAXParseException e) throws SAXException {
    show("Warning", e);
    throw (e);
  }
  public void error(SAXParseException e) throws SAXException {
    show("Error", e);
    throw (e);
  }
  public void fatalError(SAXParseException e) throws SAXException {
    show("Fatal Error", e);
    throw (e);
  }
  private void show(String type, SAXParseException e) {
    System.out.println(type + ": " + e.getMessage());
    System.out.println("Line " + e.getLineNumber() + " Column " + e.getColumnNumber());
    System.out.println("System ID: " + e.getSystemId());
  }
}

DOCUMENT
COMMENT length=45
   This document is both well formed and valid 
DOCUMENT_TYPE: folks
ELEMENT: folks
   ELEMENT: person
      ELEMENT: name
         TEXT length=15
           Bertha D. Blues
      ELEMENT: phone
         TEXT length=12
           999 555-8888
      ELEMENT: email
         TEXT length=14
           b@xyz.net

Parse an XML string: Using DOM and a StringReader.

import java.io.StringReader;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.CharacterData;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
public class Main {
  public static void main(String arg[]) throws Exception{
    String xmlRecords = "<data><employee><name>A</name>"
        + "<title>Manager</title></employee></data>";
    DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
    InputSource is = new InputSource();
    is.setCharacterStream(new StringReader(xmlRecords));
    Document doc = db.parse(is);
    NodeList nodes = doc.getElementsByTagName("employee");
    for (int i = 0; i < nodes.getLength(); i++) {
      Element element = (Element) nodes.item(i);
      NodeList name = element.getElementsByTagName("name");
      Element line = (Element) name.item(0);
      System.out.println("Name: " + getCharacterDataFromElement(line));
      NodeList title = element.getElementsByTagName("title");
      line = (Element) title.item(0);
      System.out.println("Title: " + getCharacterDataFromElement(line));
    }
  }
  public static String getCharacterDataFromElement(Element e) {
    Node child = e.getFirstChild();
    if (child instanceof CharacterData) {
      CharacterData cd = (CharacterData) child;
      return cd.getData();
    }
    return "";
  }
}

Read XML as DOM

import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
 *  
 *
 * @author Costin Manolache
 */
public class Main {
  public static class NullResolver implements EntityResolver {
    public InputSource resolveEntity (String publicId,
                                               String systemId)
        throws SAXException, IOException
    {
        return new InputSource(new StringReader(""));
    }
}
  /** Read XML as DOM.
   */
  public static Document readXml(InputStream is)
      throws SAXException, IOException, ParserConfigurationException
  {
      DocumentBuilderFactory dbf =
          DocumentBuilderFactory.newInstance();
      dbf.setValidating(false);
      dbf.setIgnoringComments(false);
      dbf.setIgnoringElementContentWhitespace(true);
      //dbf.setCoalescing(true);
      //dbf.setExpandEntityReferences(true);
      DocumentBuilder db = null;
      db = dbf.newDocumentBuilder();
      db.setEntityResolver( new NullResolver() );
      // db.setErrorHandler( new MyErrorHandler());
      Document doc = db.parse(is);
      return doc;
  }
}

Remove the element from parent

import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
public class Main {
  public static void main(String[] argv) throws Exception {
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setValidating(true);
    factory.setExpandEntityReferences(false);
    Document doc = factory.newDocumentBuilder().parse(new File("filename"));
    Element element = (Element) doc.getElementsByTagName("b").item(0);
    Node parent = element.getParentNode();
    parent.removeChild(element);
    parent.normalize();
  }
}

Source To InputSource

/*
 * Copyright  2003-2008 The Apache Software Foundation.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.OutputStream;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
public class Main {
  public static void ElementToStream(Element element, OutputStream out) {
    try {
      DOMSource source = new DOMSource(element);
      StreamResult result = new StreamResult(out);
      TransformerFactory transFactory = TransformerFactory.newInstance();
      Transformer transformer = transFactory.newTransformer();
      transformer.transform(source, result);
    } catch (Exception ex) {
    }
  }
  /**
   * Utility to get the bytes uri
   *
   * @param source the resource to get
   */
  public static InputSource sourceToInputSource(Source source) {
      if (source instanceof SAXSource) {
          return ((SAXSource) source).getInputSource();
      } else if (source instanceof DOMSource) {
          ByteArrayOutputStream baos = new ByteArrayOutputStream();
          Node node = ((DOMSource) source).getNode();
          if (node instanceof Document) {
              node = ((Document) node).getDocumentElement();
          }
          Element domElement = (Element) node;
          ElementToStream(domElement, baos);
          InputSource isource = new InputSource(source.getSystemId());
          isource.setByteStream(new ByteArrayInputStream(baos.toByteArray()));
          return isource;
      } else if (source instanceof StreamSource) {
          StreamSource ss = (StreamSource) source;
          InputSource isource = new InputSource(ss.getSystemId());
          isource.setByteStream(ss.getInputStream());
          isource.setCharacterStream(ss.getReader());
          isource.setPublicId(ss.getPublicId());
          return isource;
      } else {
          return getInputSourceFromURI(source.getSystemId());
      }
  }
  /**
   * Utility to get the bytes uri.
   * Does NOT handle authenticated URLs,
   * use getInputSourceFromURI(uri, username, password)
   *
   * @param uri the resource to get
   */
  public static InputSource getInputSourceFromURI(String uri) {
      return new InputSource(uri);
  }
}

Use DOM L3 DOMBuilder, DOMBuilderFilter DOMWriter and other DOM L3 functionality to preparse, revalidate and safe document.

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import org.w3c.dom.DOMConfiguration;
import org.w3c.dom.DOMError;
import org.w3c.dom.DOMErrorHandler;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSOutput;
import org.w3c.dom.ls.LSParser;
import org.w3c.dom.ls.LSParserFilter;
import org.w3c.dom.ls.LSSerializer;
import org.w3c.dom.traversal.NodeFilter;
/**
 * This sample program illustrates how to use DOM L3 DOMBuilder,
 * DOMBuilderFilter DOMWriter and other DOM L3 functionality to preparse,
 * revalidate and safe document.
 */
public class DOM3 implements DOMErrorHandler, LSParserFilter {
  /** Default namespaces support (true). */
  protected static final boolean DEFAULT_NAMESPACES = true;
  /** Default validation support (false). */
  protected static final boolean DEFAULT_VALIDATION = false;
  /** Default Schema validation support (false). */
  protected static final boolean DEFAULT_SCHEMA_VALIDATION = false;
  static LSParser builder;
  public static void main(String[] argv) {
    if (argv.length == 0) {
      printUsage();
      System.exit(1);
    }
    try {
      // get DOM Implementation using DOM Registry
      System.setProperty(DOMImplementationRegistry.PROPERTY,
          "org.apache.xerces.dom.DOMXSImplementationSourceImpl");
      DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance();
      DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS");
      // create DOMBuilder
      builder = impl.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null);
      DOMConfiguration config = builder.getDomConfig();
      // create Error Handler
      DOMErrorHandler errorHandler = new DOM3();
      // create filter
      LSParserFilter filter = new DOM3();
      builder.setFilter(filter);
      // set error handler
      config.setParameter("error-handler", errorHandler);
      // set validation feature
      // config.setParameter("validate", Boolean.FALSE);
      config.setParameter("validate", Boolean.TRUE);
      // set schema language
      config.setParameter("schema-type", "http://www.w3.org/2001/XMLSchema");
      // config.setParameter("psvi",Boolean.TRUE);
      // config.setParameter("schema-type","http://www.w3.org/TR/REC-xml");
      // set schema location
      config.setParameter("schema-location", "personal.xsd");
      // parse document
      System.out.println("Parsing " + argv[0] + "...");
      Document doc = builder.parseURI(argv[0]);
      // set error handler on the Document
      config = doc.getDomConfig();
      config.setParameter("error-handler", errorHandler);
      // set validation feature
      config.setParameter("validate", Boolean.TRUE);
      config.setParameter("schema-type", "http://www.w3.org/2001/XMLSchema");
      // config.setParameter("schema-type","http://www.w3.org/TR/REC-xml");
      config.setParameter("schema-location", "data/personal.xsd");
      // remove comments from the document
      config.setParameter("comments", Boolean.FALSE);
      System.out.println("Normalizing document... ");
      doc.normalizeDocument();
      // create DOMWriter
      LSSerializer domWriter = impl.createLSSerializer();
      System.out.println("Serializing document... ");
      config = domWriter.getDomConfig();
      config.setParameter("xml-declaration", Boolean.FALSE);
      // config.setParameter("validate",errorHandler);
      // serialize document to standard output
      // domWriter.writeNode(System.out, doc);
      LSOutput dOut = impl.createLSOutput();
      dOut.setByteStream(System.out);
      domWriter.write(doc, dOut);
    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }
  private static void printUsage() {
    System.err.println("usage: java dom.DOM3 uri ...");
    System.err.println();
    System.err.println("NOTE: You can only validate DOM tree against XML Schemas.");
  } // printUsage()
  public boolean handleError(DOMError error) {
    short severity = error.getSeverity();
    if (severity == DOMError.SEVERITY_ERROR) {
      System.out.println("[dom3-error]: " + error.getMessage());
    }
    if (severity == DOMError.SEVERITY_WARNING) {
      System.out.println("[dom3-warning]: " + error.getMessage());
    }
    return true;
  }
  /**
   * @see org.w3c.dom.ls.LSParserFilter#acceptNode(Node)
   */
  public short acceptNode(Node enode) {
    return NodeFilter.FILTER_ACCEPT;
  }
  /**
   * @see org.w3c.dom.ls.LSParserFilter#getWhatToShow()
   */
  public int getWhatToShow() {
    return NodeFilter.SHOW_ELEMENT;
  }
  /**
   * @see org.w3c.dom.ls.LSParserFilter#startElement(Element)
   */
  public short startElement(Element elt) {
    return LSParserFilter.FILTER_ACCEPT;
  }
}

Visiting All the Elements in a DOM Document

import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
public class Main {
  public static void main(String[] argv) throws Exception{
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setValidating(true);
    factory.setExpandEntityReferences(false);
    Document doc = factory.newDocumentBuilder().parse(new File("filename"));
    NodeList list = doc.getElementsByTagName("*");
    for (int i = 0; i < list.getLength(); i++) {
      Element element = (Element) list.item(i);
    }
  }
}

Java Tutorial/XML/DOM Parser

Содержание

A DOM Error Checker: Using DOM for Syntax Checking

A DOM Parse Tree Lister

Converting an XML Fragment into a DOM Fragment

Create DOM Document out of string

DOM Objects That Make Up the Parse Tree

Getting a DOM Element by Id

Getting a Node Relative to Another Node in a DOM Document

Getting the Declared Entities in a DOM Document

Getting the Notations in a DOM Document

Getting the Root Element in a DOM Document

Getting the Value of an Entity Reference in a DOM Document

Ignorable Whitespace and Element Content

Listing the Contents of Parse Tree Nodes: Using the DOM Parser to Extract XML Document Data

Parse an XML string: Using DOM and a StringReader.

Read XML as DOM

Remove the element from parent

Source To InputSource

Use DOM L3 DOMBuilder, DOMBuilderFilter DOMWriter and other DOM L3 functionality to preparse, revalidate and safe document.

Visiting All the Elements in a DOM Document

Навигация

Персональные инструменты

Пространства имён

Варианты

Просмотры

Ещё

Поиск

Разделы

Навигация

Инструменты