Java Tutorial/Swing/HTML Document — различия между версиями
Admin (обсуждение | вклад) м (1 версия)  | 
				Admin (обсуждение | вклад)  м (1 версия)  | 
				
(нет различий) 
 | |
Текущая версия на 15:31, 31 мая 2010
Содержание
ElementIterator Class
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.AttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
public class MainClass {
  public static void main(String args[]) throws Exception {
    URL url = new URL("http://www.google.ru");
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);
    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);
    for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator
        .next()) {
      AttributeSet attributes = iterator.getAttributes();
      String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF);
      System.out.print(srcString);
      int startOffset = iterator.getStartOffset();
      int endOffset = iterator.getEndOffset();
      int length = endOffset - startOffset;
      String text = htmlDoc.getText(startOffset, length);
      System.out.println(" � " + text);
    }
  }
}
   
   
url?sa=p&pref=ig&pval=3&q=http://www.google.ca/ig%3Fhl%3Den&usg=__o-KrRDBI3nbRElKzYEMqfOl3_t0= � Personalize this page https://www.google.ru/accounts/Login?continue=http://www.google.ca/&hl=en � Sign in http://images.google.ca/imghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wi � Images http://groups.google.ca/grphp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wg � Groups http://news.google.ca/nwshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wn � News /maps?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wl � Maps http://scholar.google.ru/schhp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=ws � Scholar /intl/en/options/ � more�� /advanced_search?hl=en � Advanced Search /preferences?hl=en � Preferences /language_tools?hl=en � Language Tools
Finding out interested element
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.AttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
public class MainClass {
  public static void main(String args[]) throws Exception {
    URL url = new URL("http://www.google.ru");
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);
    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);
    for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator
        .next()) {
      AttributeSet attributes = iterator.getAttributes();
      String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF);
      System.out.print(srcString);
      int startOffset = iterator.getStartOffset();
      int endOffset = iterator.getEndOffset();
      int length = endOffset - startOffset;
      String text = htmlDoc.getText(startOffset, length);
      System.out.println(" � " + text);
    }
  }
}
   
   
url?sa=p&pref=ig&pval=3&q=http://www.google.ca/ig%3Fhl%3Den&usg=__o-KrRDBI3nbRElKzYEMqfOl3_t0= � Personalize this page https://www.google.ru/accounts/Login?continue=http://www.google.ca/&hl=en � Sign in http://images.google.ca/imghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wi � Images http://groups.google.ca/grphp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wg � Groups http://news.google.ca/nwshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wn � News /maps?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wl � Maps http://scholar.google.ru/schhp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=ws � Scholar /intl/en/options/ � more�� /advanced_search?hl=en � Advanced Search /preferences?hl=en � Preferences /language_tools?hl=en � Language Tools
HTML Tag Constants
- A
 - DIR
 - IMG
 - SCRIPT
 - ADDRESS
 - DIV
 - IMPLIED
 - SELECT
 - APPLET
 - DL
 - INPUT
 - SMALL
 - AREA
 - DT
 - ISINDEX
 - SPAN
 - B
 - EM
 - KBD
 - STRIKE
 - BASE
 - FONT
 - LI
 - STRONG
 - BASEFONT
 - FORM
 - LINK
 - STYLE
 - BIG
 - FRAME
 - MAP
 - SUB
 - BLOCKQUOTE
 - FRAMESET
 - MENU
 - SUP
 - BODY
 - H1
 - META
 - TABLE
 - BR
 - H2
 - NOFRAMES
 - TD
 - CAPTION
 - H3
 - OBJECT
 - TEXTAREA
 - CENTER
 - H4
 - OL
 - TH
 - CITE
 - H5
 - OPTION
 - TITLE
 - CODE
 - H6
 - P
 - TR
 - COMMENT
 - HEAD
 - PARAM
 - TT
 - CONTENT
 - HR
 - PRE
 - U
 - DD
 - HTML
 - S
 - UL
 - DFN
 - I
 - SAMP
 - VAR
 
HyperlinkListener Example
import java.io.IOException;
import java.net.URL;
import javax.swing.JEditorPane;
import javax.swing.JFrame;
import javax.swing.JScrollPane;
import javax.swing.event.HyperlinkEvent;
import javax.swing.event.HyperlinkListener;
import javax.swing.text.Document;
class ActivatedHyperlinkListener implements HyperlinkListener {
  JEditorPane editorPane;
  public ActivatedHyperlinkListener(JEditorPane editorPane) {
    this.editorPane = editorPane;
  }
  public void hyperlinkUpdate(HyperlinkEvent hyperlinkEvent) {
    HyperlinkEvent.EventType type = hyperlinkEvent.getEventType();
    final URL url = hyperlinkEvent.getURL();
    if (type == HyperlinkEvent.EventType.ENTERED) {
      System.out.println("URL: " + url);
    } else if (type == HyperlinkEvent.EventType.ACTIVATED) {
      System.out.println("Activated");
      Document doc = editorPane.getDocument();
      try {
        editorPane.setPage(url);
      } catch (IOException ioException) {
        System.out.println("Error following link, Invalid link");
        editorPane.setDocument(doc);
      }
    }
  }
}
public class EditorPaneSample {
  public static void main(String args[]) {
    JFrame frame = new JFrame("EditorPane Example");
    frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
    try {
      JEditorPane editorPane = new JEditorPane("http://www.google.ru");
      editorPane.setEditable(false);
      HyperlinkListener hyperlinkListener = new ActivatedHyperlinkListener(editorPane);
      editorPane.addHyperlinkListener(hyperlinkListener);
      JScrollPane scrollPane = new JScrollPane(editorPane);
      frame.add(scrollPane);
    } catch (IOException e) {
      System.err.println("Unable to load: " + e);
    }
    frame.setSize(640, 480);
    frame.setVisible(true);
  }
}
   
   
Iterating Across HTML Documents for Links
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.AttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
public class MainClass {
  public static void main(String args[]) throws Exception {
    URL url = new URL("http://www.google.ru");
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);
    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);
    for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator
        .next()) {
      AttributeSet attributes = iterator.getAttributes();
      String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF);
      System.out.print(srcString);
      int startOffset = iterator.getStartOffset();
      int endOffset = iterator.getEndOffset();
      int length = endOffset - startOffset;
      String text = htmlDoc.getText(startOffset, length);
      System.out.println(" - " + text);
    }
  }
}
   
   
url?sa=p&pref=ig&pval=3&q=http://www.google.ca/ig%3Fhl%3Den&usg=__o-KrRDBI3nbRElKzYEMqfOl3_t0= � Personalize this page https://www.google.ru/accounts/Login?continue=http://www.google.ca/&hl=en � Sign in http://images.google.ca/imghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wi � Images http://groups.google.ca/grphp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wg � Groups http://news.google.ca/nwshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wn � News /maps?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wl � Maps http://scholar.google.ru/schhp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=ws � Scholar /intl/en/options/ � more�� /advanced_search?hl=en � Advanced Search /preferences?hl=en � Preferences /language_tools?hl=en � Language Tools
Look for specific tag types, such as HTML.Tag.H1, HTML.Tag.H2
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.AttributeSet;
import javax.swing.text.Element;
import javax.swing.text.ElementIterator;
import javax.swing.text.StyleConstants;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
public class MainClass {
  public static void main(String args[]) throws Exception {
    URL url = new URL("http://www.google.ru");
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);
    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);
    Element element;
    ElementIterator iterator = new ElementIterator(htmlDoc);
    while ((element = iterator.next()) != null) {
      AttributeSet attributes = element.getAttributes();
      Object name = attributes.getAttribute(StyleConstants.NameAttribute);
    
      if ((name instanceof HTML.Tag)
          && (name == HTML.Tag.H1 || name == HTML.Tag.H2 || name == HTML.Tag.P )) {
        // Build up content text as it may be within multiple elements
        int count = element.getElementCount();
        for (int i = 0; i < count; i++) {
          Element child = element.getElement(i);
          AttributeSet childAttributes = child.getAttributes();
          if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) {
            int startOffset = child.getStartOffset();
            int endOffset = child.getEndOffset();
            int length = endOffset - startOffset;
            System.out.println(htmlDoc.getText(startOffset, length));
          }
        }
      }
    }
  }
}