Java Tutorial/PDF/HTML Parser
HtmlParser from iText
import java.io.FileOutputStream;
import com.lowagie.text.Document;
import com.lowagie.text.html.HtmlParser;
import com.lowagie.text.pdf.PdfWriter;
public class MainClass {
public static void main(String[] args) throws Exception {
Document document = new Document();
PdfWriter.getInstance(document, new FileOutputStream("html1.pdf"));
HtmlParser.parse(document, "example.html");
}
}
Parsing Html
import java.io.FileOutputStream;
import java.io.FileReader;
import java.util.ArrayList;
import com.lowagie.text.Document;
import com.lowagie.text.Element;
import com.lowagie.text.html.simpleparser.HTMLWorker;
import com.lowagie.text.html.simpleparser.StyleSheet;
import com.lowagie.text.pdf.PdfWriter;
public class MainClass {
public static void main(String[] args) throws Exception {
Document document = new Document();
StyleSheet st = new StyleSheet();
st.loadTagStyle("body", "leading", "16,0");
PdfWriter.getInstance(document, new FileOutputStream("html2.pdf"));
document.open();
ArrayList p = HTMLWorker.parseToList(new FileReader("example.html"), st);
for (int k = 0; k < p.size(); ++k)
document.add((Element) p.get(k));
document.close();
}
}
Parsing Html Snippets
import java.io.FileOutputStream;
import java.io.FileReader;
import java.util.ArrayList;
import com.lowagie.text.Document;
import com.lowagie.text.Element;
import com.lowagie.text.html.simpleparser.HTMLWorker;
import com.lowagie.text.html.simpleparser.StyleSheet;
import com.lowagie.text.pdf.PdfWriter;
public class MainClass {
public static void main(String[] args) throws Exception {
Document document = new Document();
StyleSheet styles = new StyleSheet();
styles.loadTagStyle("ol", "leading", "16,0");
PdfWriter.getInstance(document, new FileOutputStream("html3.pdf"));
document.open();
ArrayList objects;
objects = HTMLWorker.parseToList(new FileReader("data.html"), styles);
for (int k = 0; k < objects.size(); ++k)
document.add((Element) objects.get(k));
document.close();
}
}