java - extract text from pdf files -
i need extract text (word word) pdf file.
import java.io.*; import com.itextpdf.text.*; import com.itextpdf.text.pdf.*; import com.itextpdf.text.pdf.parser.*; public class pdf { private static string inputfile = "http://ontology.buffalo.edu/ontology%28pic%29.pdf" ; private static string outputfile = "c:/new3.pdf"; public static void main(string[] args) throws documentexception, ioexception { document document = new document(); pdfwriter writer = pdfwriter.getinstance(document, new fileoutputstream(outputfile)); document.open(); pdfreader reader = new pdfreader(inputfile); int n = reader.getnumberofpages(); pdfimportedpage page; // go through pages (int = 1; <= n; i++) { page = writer.getimportedpage(reader, i); system.out.println(i); image instance = image.getinstance(page); document.add(instance); } document.close(); pdfreader readern = new pdfreader(outputfile); pdftextextractor parse = new pdftextextractor(); (int = 1; <= n; i++) system.out.println(parser.gettextfrompage(reader,i)); } when compile code, have error:
the constructor pdftextextractor undefined
how fix this?
pdftextextractor contains static methods , constructor private. itext
you can call so:
string myline = pdftextextractor.gettextfrompage(reader, pagenumber)
Comments
Post a Comment