Java Tutorial/Swing/HTML Document
Содержание
ElementIterator Class
<source lang="java">
import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import javax.swing.text.AttributeSet; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLDocument; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; public class MainClass {
public static void main(String args[]) throws Exception { URL url = new URL("http://www.google.ru"); URLConnection connection = url.openConnection(); InputStream is = connection.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); HTMLEditorKit htmlKit = new HTMLEditorKit(); HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); HTMLEditorKit.Parser parser = new ParserDelegator(); HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0); parser.parse(br, callback, true); for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator .next()) { AttributeSet attributes = iterator.getAttributes(); String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF); System.out.print(srcString); int startOffset = iterator.getStartOffset(); int endOffset = iterator.getEndOffset(); int length = endOffset - startOffset; String text = htmlDoc.getText(startOffset, length); System.out.println(" � " + text); } }
}</source>
url?sa=p&pref=ig&pval=3&q=http://www.google.ca/ig%3Fhl%3Den&usg=__o-KrRDBI3nbRElKzYEMqfOl3_t0= � Personalize this page https://www.google.ru/accounts/Login?continue=http://www.google.ca/&hl=en � Sign in http://images.google.ca/imghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wi � Images http://groups.google.ca/grphp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wg � Groups http://news.google.ca/nwshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wn � News /maps?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wl � Maps http://scholar.google.ru/schhp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=ws � Scholar /intl/en/options/ � more�� /advanced_search?hl=en � Advanced Search /preferences?hl=en � Preferences /language_tools?hl=en � Language Tools
Finding out interested element
<source lang="java">
import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import javax.swing.text.AttributeSet; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLDocument; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; public class MainClass {
public static void main(String args[]) throws Exception { URL url = new URL("http://www.google.ru"); URLConnection connection = url.openConnection(); InputStream is = connection.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); HTMLEditorKit htmlKit = new HTMLEditorKit(); HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); HTMLEditorKit.Parser parser = new ParserDelegator(); HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0); parser.parse(br, callback, true); for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator .next()) { AttributeSet attributes = iterator.getAttributes(); String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF); System.out.print(srcString); int startOffset = iterator.getStartOffset(); int endOffset = iterator.getEndOffset(); int length = endOffset - startOffset; String text = htmlDoc.getText(startOffset, length); System.out.println(" � " + text); } }
}</source>
url?sa=p&pref=ig&pval=3&q=http://www.google.ca/ig%3Fhl%3Den&usg=__o-KrRDBI3nbRElKzYEMqfOl3_t0= � Personalize this page https://www.google.ru/accounts/Login?continue=http://www.google.ca/&hl=en � Sign in http://images.google.ca/imghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wi � Images http://groups.google.ca/grphp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wg � Groups http://news.google.ca/nwshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wn � News /maps?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wl � Maps http://scholar.google.ru/schhp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=ws � Scholar /intl/en/options/ � more�� /advanced_search?hl=en � Advanced Search /preferences?hl=en � Preferences /language_tools?hl=en � Language Tools
HTML Tag Constants
- A
- DIR
- IMG
- SCRIPT
- ADDRESS
- DIV
- IMPLIED
- SELECT
- APPLET
- DL
- INPUT
- SMALL
- AREA
- DT
- ISINDEX
- SPAN
- B
- EM
- KBD
- STRIKE
- BASE
- FONT
- LI
- STRONG
- BASEFONT
- FORM
- LINK
- STYLE
- BIG
- FRAME
- MAP
- SUB
- BLOCKQUOTE
- FRAMESET
- MENU
- SUP
- BODY
- H1
- META
- TABLE
- BR
- H2
- NOFRAMES
- TD
- CAPTION
- H3
- OBJECT
- TEXTAREA
- CENTER
- H4
- OL
- TH
- CITE
- H5
- OPTION
- TITLE
- CODE
- H6
- P
- TR
- COMMENT
- HEAD
- PARAM
- TT
- CONTENT
- HR
- PRE
- U
- DD
- HTML
- S
- UL
- DFN
- I
- SAMP
- VAR
HyperlinkListener Example
<source lang="java">
import java.io.IOException; import java.net.URL; import javax.swing.JEditorPane; import javax.swing.JFrame; import javax.swing.JScrollPane; import javax.swing.event.HyperlinkEvent; import javax.swing.event.HyperlinkListener; import javax.swing.text.Document; class ActivatedHyperlinkListener implements HyperlinkListener {
JEditorPane editorPane; public ActivatedHyperlinkListener(JEditorPane editorPane) { this.editorPane = editorPane; } public void hyperlinkUpdate(HyperlinkEvent hyperlinkEvent) { HyperlinkEvent.EventType type = hyperlinkEvent.getEventType(); final URL url = hyperlinkEvent.getURL(); if (type == HyperlinkEvent.EventType.ENTERED) { System.out.println("URL: " + url); } else if (type == HyperlinkEvent.EventType.ACTIVATED) { System.out.println("Activated"); Document doc = editorPane.getDocument(); try { editorPane.setPage(url); } catch (IOException ioException) { System.out.println("Error following link, Invalid link"); editorPane.setDocument(doc); } } }
} public class EditorPaneSample {
public static void main(String args[]) { JFrame frame = new JFrame("EditorPane Example"); frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); try { JEditorPane editorPane = new JEditorPane("http://www.google.ru"); editorPane.setEditable(false); HyperlinkListener hyperlinkListener = new ActivatedHyperlinkListener(editorPane); editorPane.addHyperlinkListener(hyperlinkListener); JScrollPane scrollPane = new JScrollPane(editorPane); frame.add(scrollPane); } catch (IOException e) { System.err.println("Unable to load: " + e); } frame.setSize(640, 480); frame.setVisible(true); }
}</source>
Iterating Across HTML Documents for Links
<source lang="java">
import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import javax.swing.text.AttributeSet; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLDocument; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; public class MainClass {
public static void main(String args[]) throws Exception { URL url = new URL("http://www.google.ru"); URLConnection connection = url.openConnection(); InputStream is = connection.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); HTMLEditorKit htmlKit = new HTMLEditorKit(); HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); HTMLEditorKit.Parser parser = new ParserDelegator(); HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0); parser.parse(br, callback, true); for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator .next()) { AttributeSet attributes = iterator.getAttributes(); String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF); System.out.print(srcString); int startOffset = iterator.getStartOffset(); int endOffset = iterator.getEndOffset(); int length = endOffset - startOffset; String text = htmlDoc.getText(startOffset, length); System.out.println(" - " + text); } }
}</source>
url?sa=p&pref=ig&pval=3&q=http://www.google.ca/ig%3Fhl%3Den&usg=__o-KrRDBI3nbRElKzYEMqfOl3_t0= � Personalize this page https://www.google.ru/accounts/Login?continue=http://www.google.ca/&hl=en � Sign in http://images.google.ca/imghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wi � Images http://groups.google.ca/grphp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wg � Groups http://news.google.ca/nwshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wn � News /maps?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wl � Maps http://scholar.google.ru/schhp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=ws � Scholar /intl/en/options/ � more�� /advanced_search?hl=en � Advanced Search /preferences?hl=en � Preferences /language_tools?hl=en � Language Tools
Look for specific tag types, such as HTML.Tag.H1, HTML.Tag.H2
<source lang="java">
import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import javax.swing.text.AttributeSet; import javax.swing.text.Element; import javax.swing.text.ElementIterator; import javax.swing.text.StyleConstants; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLDocument; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; public class MainClass {
public static void main(String args[]) throws Exception { URL url = new URL("http://www.google.ru"); URLConnection connection = url.openConnection(); InputStream is = connection.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); HTMLEditorKit htmlKit = new HTMLEditorKit(); HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); HTMLEditorKit.Parser parser = new ParserDelegator(); HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0); parser.parse(br, callback, true); Element element; ElementIterator iterator = new ElementIterator(htmlDoc); while ((element = iterator.next()) != null) { AttributeSet attributes = element.getAttributes(); Object name = attributes.getAttribute(StyleConstants.NameAttribute); if ((name instanceof HTML.Tag) && (name == HTML.Tag.H1 || name == HTML.Tag.H2 || name == HTML.Tag.P )) { // Build up content text as it may be within multiple elements int count = element.getElementCount(); for (int i = 0; i < count; i++) { Element child = element.getElement(i); AttributeSet childAttributes = child.getAttributes(); if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) { int startOffset = child.getStartOffset(); int endOffset = child.getEndOffset(); int length = endOffset - startOffset; System.out.println(htmlDoc.getText(startOffset, length)); } } } } }
}</source>