Java Tutorial/Swing/HTML Document
Версия от 17:44, 31 мая 2010; (обсуждение)
Содержание
ElementIterator Class
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.AttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
public class MainClass {
public static void main(String args[]) throws Exception {
URL url = new URL("http://www.google.ru");
URLConnection connection = url.openConnection();
InputStream is = connection.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
HTMLEditorKit htmlKit = new HTMLEditorKit();
HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
HTMLEditorKit.Parser parser = new ParserDelegator();
HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
parser.parse(br, callback, true);
for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator
.next()) {
AttributeSet attributes = iterator.getAttributes();
String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF);
System.out.print(srcString);
int startOffset = iterator.getStartOffset();
int endOffset = iterator.getEndOffset();
int length = endOffset - startOffset;
String text = htmlDoc.getText(startOffset, length);
System.out.println(" � " + text);
}
}
}
url?sa=p&pref=ig&pval=3&q=http://www.google.ca/ig%3Fhl%3Den&usg=__o-KrRDBI3nbRElKzYEMqfOl3_t0= � Personalize this page https://www.google.ru/accounts/Login?continue=http://www.google.ca/&hl=en � Sign in http://images.google.ca/imghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wi � Images http://groups.google.ca/grphp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wg � Groups http://news.google.ca/nwshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wn � News /maps?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wl � Maps http://scholar.google.ru/schhp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=ws � Scholar /intl/en/options/ � more�� /advanced_search?hl=en � Advanced Search /preferences?hl=en � Preferences /language_tools?hl=en � Language Tools
Finding out interested element
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.AttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
public class MainClass {
public static void main(String args[]) throws Exception {
URL url = new URL("http://www.google.ru");
URLConnection connection = url.openConnection();
InputStream is = connection.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
HTMLEditorKit htmlKit = new HTMLEditorKit();
HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
HTMLEditorKit.Parser parser = new ParserDelegator();
HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
parser.parse(br, callback, true);
for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator
.next()) {
AttributeSet attributes = iterator.getAttributes();
String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF);
System.out.print(srcString);
int startOffset = iterator.getStartOffset();
int endOffset = iterator.getEndOffset();
int length = endOffset - startOffset;
String text = htmlDoc.getText(startOffset, length);
System.out.println(" � " + text);
}
}
}
url?sa=p&pref=ig&pval=3&q=http://www.google.ca/ig%3Fhl%3Den&usg=__o-KrRDBI3nbRElKzYEMqfOl3_t0= � Personalize this page https://www.google.ru/accounts/Login?continue=http://www.google.ca/&hl=en � Sign in http://images.google.ca/imghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wi � Images http://groups.google.ca/grphp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wg � Groups http://news.google.ca/nwshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wn � News /maps?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wl � Maps http://scholar.google.ru/schhp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=ws � Scholar /intl/en/options/ � more�� /advanced_search?hl=en � Advanced Search /preferences?hl=en � Preferences /language_tools?hl=en � Language Tools
HTML Tag Constants
- A
- DIR
- IMG
- SCRIPT
- ADDRESS
- DIV
- IMPLIED
- SELECT
- APPLET
- DL
- INPUT
- SMALL
- AREA
- DT
- ISINDEX
- SPAN
- B
- EM
- KBD
- STRIKE
- BASE
- FONT
- LI
- STRONG
- BASEFONT
- FORM
- LINK
- STYLE
- BIG
- FRAME
- MAP
- SUB
- BLOCKQUOTE
- FRAMESET
- MENU
- SUP
- BODY
- H1
- META
- TABLE
- BR
- H2
- NOFRAMES
- TD
- CAPTION
- H3
- OBJECT
- TEXTAREA
- CENTER
- H4
- OL
- TH
- CITE
- H5
- OPTION
- TITLE
- CODE
- H6
- P
- TR
- COMMENT
- HEAD
- PARAM
- TT
- CONTENT
- HR
- PRE
- U
- DD
- HTML
- S
- UL
- DFN
- I
- SAMP
- VAR
HyperlinkListener Example
import java.io.IOException;
import java.net.URL;
import javax.swing.JEditorPane;
import javax.swing.JFrame;
import javax.swing.JScrollPane;
import javax.swing.event.HyperlinkEvent;
import javax.swing.event.HyperlinkListener;
import javax.swing.text.Document;
class ActivatedHyperlinkListener implements HyperlinkListener {
JEditorPane editorPane;
public ActivatedHyperlinkListener(JEditorPane editorPane) {
this.editorPane = editorPane;
}
public void hyperlinkUpdate(HyperlinkEvent hyperlinkEvent) {
HyperlinkEvent.EventType type = hyperlinkEvent.getEventType();
final URL url = hyperlinkEvent.getURL();
if (type == HyperlinkEvent.EventType.ENTERED) {
System.out.println("URL: " + url);
} else if (type == HyperlinkEvent.EventType.ACTIVATED) {
System.out.println("Activated");
Document doc = editorPane.getDocument();
try {
editorPane.setPage(url);
} catch (IOException ioException) {
System.out.println("Error following link, Invalid link");
editorPane.setDocument(doc);
}
}
}
}
public class EditorPaneSample {
public static void main(String args[]) {
JFrame frame = new JFrame("EditorPane Example");
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
try {
JEditorPane editorPane = new JEditorPane("http://www.google.ru");
editorPane.setEditable(false);
HyperlinkListener hyperlinkListener = new ActivatedHyperlinkListener(editorPane);
editorPane.addHyperlinkListener(hyperlinkListener);
JScrollPane scrollPane = new JScrollPane(editorPane);
frame.add(scrollPane);
} catch (IOException e) {
System.err.println("Unable to load: " + e);
}
frame.setSize(640, 480);
frame.setVisible(true);
}
}
Iterating Across HTML Documents for Links
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.AttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
public class MainClass {
public static void main(String args[]) throws Exception {
URL url = new URL("http://www.google.ru");
URLConnection connection = url.openConnection();
InputStream is = connection.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
HTMLEditorKit htmlKit = new HTMLEditorKit();
HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
HTMLEditorKit.Parser parser = new ParserDelegator();
HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
parser.parse(br, callback, true);
for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator
.next()) {
AttributeSet attributes = iterator.getAttributes();
String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF);
System.out.print(srcString);
int startOffset = iterator.getStartOffset();
int endOffset = iterator.getEndOffset();
int length = endOffset - startOffset;
String text = htmlDoc.getText(startOffset, length);
System.out.println(" - " + text);
}
}
}
url?sa=p&pref=ig&pval=3&q=http://www.google.ca/ig%3Fhl%3Den&usg=__o-KrRDBI3nbRElKzYEMqfOl3_t0= � Personalize this page https://www.google.ru/accounts/Login?continue=http://www.google.ca/&hl=en � Sign in http://images.google.ca/imghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wi � Images http://groups.google.ca/grphp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wg � Groups http://news.google.ca/nwshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wn � News /maps?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wl � Maps http://scholar.google.ru/schhp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=ws � Scholar /intl/en/options/ � more�� /advanced_search?hl=en � Advanced Search /preferences?hl=en � Preferences /language_tools?hl=en � Language Tools
Look for specific tag types, such as HTML.Tag.H1, HTML.Tag.H2
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.AttributeSet;
import javax.swing.text.Element;
import javax.swing.text.ElementIterator;
import javax.swing.text.StyleConstants;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
public class MainClass {
public static void main(String args[]) throws Exception {
URL url = new URL("http://www.google.ru");
URLConnection connection = url.openConnection();
InputStream is = connection.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
HTMLEditorKit htmlKit = new HTMLEditorKit();
HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
HTMLEditorKit.Parser parser = new ParserDelegator();
HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
parser.parse(br, callback, true);
Element element;
ElementIterator iterator = new ElementIterator(htmlDoc);
while ((element = iterator.next()) != null) {
AttributeSet attributes = element.getAttributes();
Object name = attributes.getAttribute(StyleConstants.NameAttribute);
if ((name instanceof HTML.Tag)
&& (name == HTML.Tag.H1 || name == HTML.Tag.H2 || name == HTML.Tag.P )) {
// Build up content text as it may be within multiple elements
int count = element.getElementCount();
for (int i = 0; i < count; i++) {
Element child = element.getElement(i);
AttributeSet childAttributes = child.getAttributes();
if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) {
int startOffset = child.getStartOffset();
int endOffset = child.getEndOffset();
int length = endOffset - startOffset;
System.out.println(htmlDoc.getText(startOffset, length));
}
}
}
}
}
}