Thursday, November 24, 2011

Java code to read hyperlinks from PDF

**** Java code to read PDF links
**** Java code to extract hyperlinks from PDF
**** Extract Annotation link from PDF
**** Read Annotation type as link in PDF

Here the code to read or extract hyperlink target reference values from PDF.
The code uses pdfbox api, provided by Apache, version 1.6.0


import java.io.File;

import java.util.Iterator;

import java.util.List;



import org.apache.pdfbox.pdmodel.PDDocument;

import org.apache.pdfbox.pdmodel.PDPage;

import org.apache.pdfbox.pdmodel.PDResources;

import org.apache.pdfbox.pdmodel.interactive.action.type.PDActionLaunch;

import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;

import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;

import org.apache.pdfbox.util.PDFTextStripper;





public class POC {



public static void main(String[] args) throws Exception{



try

{



File f= new File("D:/POC/sample.pdf");

PDDocument pd=PDDocument.load(f);

PDFTextStripper textStripper=new PDFTextStripper();



List pages = pd.getDocumentCatalog().getAllPages();

Iterator iter = pages.iterator();



while( iter.hasNext() ) {

PDPage page = (PDPage)iter.next();

PDResources resources = page.getResources();

List l = page.getAnnotations();

Iterator it = l.iterator();

while(it.hasNext())

{



PDAnnotation annotation = (PDAnnotation)it.next();

if(annotation instanceof PDAnnotationLink){

// System.out.println(annotation);

PDAnnotationLink link = (PDAnnotationLink)annotation;

PDActionLaunch pdl= (PDActionLaunch)link.getAction();

System.out.println("PDF Link: "+pdl.getF());

}



}



}

pd.close();

}

catch(Exception ex)

{

ex.printStackTrace();

}



}



}

1 comment:

  1. i like but sir please convert it into c# iam really thankful of you.

    ReplyDelete