Java Tutorial/Swing/HTML Document

Материал из Java эксперт
Версия от 15:31, 31 мая 2010; Admin (обсуждение | вклад) (1 версия)
(разн.) ← Предыдущая | Текущая версия (разн.) | Следующая → (разн.)
Перейти к: навигация, поиск

ElementIterator Class

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.AttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
public class MainClass {
  public static void main(String args[]) throws Exception {
    URL url = new URL("http://www.google.ru");
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);
    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);
    for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator
        .next()) {
      AttributeSet attributes = iterator.getAttributes();
      String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF);
      System.out.print(srcString);
      int startOffset = iterator.getStartOffset();
      int endOffset = iterator.getEndOffset();
      int length = endOffset - startOffset;
      String text = htmlDoc.getText(startOffset, length);
      System.out.println(" � " + text);
    }
  }
}



url?sa=p&pref=ig&pval=3&q=http://www.google.ca/ig%3Fhl%3Den&usg=__o-KrRDBI3nbRElKzYEMqfOl3_t0= � Personalize this page
https://www.google.ru/accounts/Login?continue=http://www.google.ca/&hl=en � Sign in
  http://images.google.ca/imghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wi � Images
  http://groups.google.ca/grphp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wg � Groups
  http://news.google.ca/nwshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wn � News
  /maps?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wl � Maps
  http://scholar.google.ru/schhp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=ws � Scholar
  /intl/en/options/ � more��
  /advanced_search?hl=en � Advanced Search
  /preferences?hl=en � Preferences
  /language_tools?hl=en � Language Tools


Finding out interested element

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.AttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
public class MainClass {
  public static void main(String args[]) throws Exception {
    URL url = new URL("http://www.google.ru");
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);
    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);
    for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator
        .next()) {
      AttributeSet attributes = iterator.getAttributes();
      String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF);
      System.out.print(srcString);
      int startOffset = iterator.getStartOffset();
      int endOffset = iterator.getEndOffset();
      int length = endOffset - startOffset;
      String text = htmlDoc.getText(startOffset, length);
      System.out.println(" � " + text);
    }
  }
}



url?sa=p&pref=ig&pval=3&q=http://www.google.ca/ig%3Fhl%3Den&usg=__o-KrRDBI3nbRElKzYEMqfOl3_t0= � Personalize this page
https://www.google.ru/accounts/Login?continue=http://www.google.ca/&hl=en � Sign in
  http://images.google.ca/imghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wi � Images
  http://groups.google.ca/grphp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wg � Groups
  http://news.google.ca/nwshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wn � News
  /maps?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wl � Maps
  http://scholar.google.ru/schhp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=ws � Scholar
  /intl/en/options/ � more��
  /advanced_search?hl=en � Advanced Search
  /preferences?hl=en � Preferences
  /language_tools?hl=en � Language Tools


HTML Tag Constants

  1. A
  2. DIR
  3. IMG
  4. SCRIPT
  5. ADDRESS
  6. DIV
  7. IMPLIED
  8. SELECT
  9. APPLET
  10. DL
  11. INPUT
  12. SMALL
  13. AREA
  14. DT
  15. ISINDEX
  16. SPAN
  17. B
  18. EM
  19. KBD
  20. STRIKE
  21. BASE
  22. FONT
  23. LI
  24. STRONG
  25. BASEFONT
  26. FORM
  27. LINK
  28. STYLE
  29. BIG
  30. FRAME
  31. MAP
  32. SUB
  33. BLOCKQUOTE
  34. FRAMESET
  35. MENU
  36. SUP
  37. BODY
  38. H1
  39. META
  40. TABLE
  41. BR
  42. H2
  43. NOFRAMES
  44. TD
  45. CAPTION
  46. H3
  47. OBJECT
  48. TEXTAREA
  49. CENTER
  50. H4
  51. OL
  52. TH
  53. CITE
  54. H5
  55. OPTION
  56. TITLE
  57. CODE
  58. H6
  59. P
  60. TR
  61. COMMENT
  62. HEAD
  63. PARAM
  64. TT
  65. CONTENT
  66. HR
  67. PRE
  68. U
  69. DD
  70. HTML
  71. S
  72. UL
  73. DFN
  74. I
  75. SAMP
  76. VAR


HyperlinkListener Example

import java.io.IOException;
import java.net.URL;
import javax.swing.JEditorPane;
import javax.swing.JFrame;
import javax.swing.JScrollPane;
import javax.swing.event.HyperlinkEvent;
import javax.swing.event.HyperlinkListener;
import javax.swing.text.Document;
class ActivatedHyperlinkListener implements HyperlinkListener {
  JEditorPane editorPane;
  public ActivatedHyperlinkListener(JEditorPane editorPane) {
    this.editorPane = editorPane;
  }
  public void hyperlinkUpdate(HyperlinkEvent hyperlinkEvent) {
    HyperlinkEvent.EventType type = hyperlinkEvent.getEventType();
    final URL url = hyperlinkEvent.getURL();
    if (type == HyperlinkEvent.EventType.ENTERED) {
      System.out.println("URL: " + url);
    } else if (type == HyperlinkEvent.EventType.ACTIVATED) {
      System.out.println("Activated");
      Document doc = editorPane.getDocument();
      try {
        editorPane.setPage(url);
      } catch (IOException ioException) {
        System.out.println("Error following link, Invalid link");
        editorPane.setDocument(doc);
      }
    }
  }
}
public class EditorPaneSample {
  public static void main(String args[]) {
    JFrame frame = new JFrame("EditorPane Example");
    frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
    try {
      JEditorPane editorPane = new JEditorPane("http://www.google.ru");
      editorPane.setEditable(false);
      HyperlinkListener hyperlinkListener = new ActivatedHyperlinkListener(editorPane);
      editorPane.addHyperlinkListener(hyperlinkListener);
      JScrollPane scrollPane = new JScrollPane(editorPane);
      frame.add(scrollPane);
    } catch (IOException e) {
      System.err.println("Unable to load: " + e);
    }
    frame.setSize(640, 480);
    frame.setVisible(true);
  }
}





Iterating Across HTML Documents for Links

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.AttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
public class MainClass {
  public static void main(String args[]) throws Exception {
    URL url = new URL("http://www.google.ru");
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);
    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);
    for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator
        .next()) {
      AttributeSet attributes = iterator.getAttributes();
      String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF);
      System.out.print(srcString);
      int startOffset = iterator.getStartOffset();
      int endOffset = iterator.getEndOffset();
      int length = endOffset - startOffset;
      String text = htmlDoc.getText(startOffset, length);
      System.out.println(" - " + text);
    }
  }
}



url?sa=p&pref=ig&pval=3&q=http://www.google.ca/ig%3Fhl%3Den&usg=__o-KrRDBI3nbRElKzYEMqfOl3_t0= � Personalize this page
https://www.google.ru/accounts/Login?continue=http://www.google.ca/&hl=en � Sign in
  http://images.google.ca/imghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wi � Images
  http://groups.google.ca/grphp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wg � Groups
  http://news.google.ca/nwshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wn � News
  /maps?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wl � Maps
  http://scholar.google.ru/schhp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=ws � Scholar
  /intl/en/options/ � more��
  /advanced_search?hl=en � Advanced Search
  /preferences?hl=en � Preferences
  /language_tools?hl=en � Language Tools


Look for specific tag types, such as HTML.Tag.H1, HTML.Tag.H2

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.AttributeSet;
import javax.swing.text.Element;
import javax.swing.text.ElementIterator;
import javax.swing.text.StyleConstants;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
public class MainClass {
  public static void main(String args[]) throws Exception {
    URL url = new URL("http://www.google.ru");
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);
    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);
    Element element;
    ElementIterator iterator = new ElementIterator(htmlDoc);
    while ((element = iterator.next()) != null) {
      AttributeSet attributes = element.getAttributes();
      Object name = attributes.getAttribute(StyleConstants.NameAttribute);
    
      if ((name instanceof HTML.Tag)
          && (name == HTML.Tag.H1 || name == HTML.Tag.H2 || name == HTML.Tag.P )) {
        // Build up content text as it may be within multiple elements
        int count = element.getElementCount();
        for (int i = 0; i < count; i++) {
          Element child = element.getElement(i);
          AttributeSet childAttributes = child.getAttributes();
          if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) {
            int startOffset = child.getStartOffset();
            int endOffset = child.getEndOffset();
            int length = endOffset - startOffset;
            System.out.println(htmlDoc.getText(startOffset, length));
          }
        }
      }
    }
  }
}