Extract pages from pdf

abenitez77

Board Regular
Joined
Dec 30, 2004
Messages
149
This code runs and creates a pdf file, but it is skipping the pages that don't have the word "Information" on it.

I am searching for the word "Information" and every time it finds it, it should print that page up to the next page where "information" shows up. 95% of the time it will be on every page of the pdf document, but 5% of the time, it could have an extra page or two in between where "information" is not on the page. When that happens I want the pdf file to print the page that has the word "Information" on it and all the following pages up to the page that next has the word "Information" on it.

Code:
Sub SearchWordInPDF()
      
    '----------------------------------------------------------------------------------------
    'This macro can be used to find a specific WORD in a PDF document (one word ONLY -> in
    'case you search two words for example it doesn't find anything, just opens the file).
    'The macro opens the PDF, finds the first appearance of the specified word, scrolls
    'so that it is visible and highlights it.

    'The code uses late binding, so no reference to external library is required.
    'However, the code works ONLY with Adobe Professional, so don't try to use it with
    'Adobe Reader because you will get an "ActiveX component can't create object" error.
    
    '--------------------------------------------------------------------------------------

    'Declaring the necessary variables.
    Dim WordToFind  As String
    Dim PDFPath     As String
    Dim App         As Object
    Dim AVDoc       As Object
    Dim PDDoc       As Object
    Dim JSO         As Object
    Dim i           As Long
    Dim j           As Long
    Dim Word        As Variant
    Dim Result      As Integer
    Dim x           As Long
    Dim foundnew    As Boolean
    Dim PDFNewName  As String
    Dim page        As Long
    Dim newPDF      As Acrobat.CAcroPDDoc
    Dim lngPages    As Long
    Dim NewName     As String

    'Added the below declaration
    Dim SpecFl As String

    'Specify the text you want to search.
    WordToFind = "Information"
    
    'Specify the path of the sample PDF form.
    'Full path example:
    PDFPath = "C:\Audit Strategy\Audits\Federated Coop\PDF Extraction\Promo 41_Contracts_2018-03-06_03-53-39-PM.pdf"
    PDFNewName = Mid(PDFPath, 1, Len(PDFPath) - 4)
    
    'Check if the file exists.
    If Dir(PDFPath) = "" Then
        MsgBox "Cannot find the PDF file!" & vbCrLf & "Check the PDF path and retry.", _
                vbCritical, "File Path Error"
        Exit Sub
    End If
   
    'Check if the input file is a PDF file.
    If LCase(Right(PDFPath, 3)) <> "pdf" Then
        MsgBox "The input file is not a PDF file!", vbCritical, "File Type Error"
        Exit Sub
    End If
    
    On Error Resume Next
    
    'Initialize Acrobat by creating the App object.
    Set App = CreateObject("AcroExch.App")
    
    'Check if the object was created. In case of error release the objects and exit.
    If Err.Number <> 0 Then
        MsgBox "Could not create the Adobe Application object!", vbCritical, "Object Error"
        Set App = Nothing
        Exit Sub
    End If
    
    'Create the AVDoc object.
    Set AVDoc = CreateObject("AcroExch.AVDoc")
    
    
    'Check if the object was created. In case of error release the objects and exit.
    If Err.Number <> 0 Then
        MsgBox "Could not create the AVDoc object!", vbCritical, "Object Error"
        Set AVDoc = Nothing
        Set App = Nothing
        Exit Sub
    End If
    
    On Error GoTo 0
    
    'Open the PDF file.
    If AVDoc.Open(PDFPath, "") = True Then
        
        'Open successful, bring the PDF document to the front.
        AVDoc.BringToFront
        
        'Set the PDDoc object.
        Set PDDoc = AVDoc.GetPDDoc
        
        'Set the JS Object - Java Script Object.
        Set JSO = PDDoc.GetJSObject
        
        'Search for the word.
        If Not JSO Is Nothing Then
            x = 0
            
            'Loop through all the pages of the PDF.
            For i = 0 To JSO.numPages - 1
                lngPages = 1
                'Loop through all the words of each page.
                For j = 0 To JSO.GetPageNumWords(i) - 1
                    
                    'Get a single word.
                    Word = JSO.getPageNthWord(i, j)
                    
                    'If the word is string...
                    If VarType(Word) = vbString Then
                        
                        'Compare the word with the text to be found.
                        Result = StrComp(Word, WordToFind, vbTextCompare)
                        
                        'If both strings are the same.
                        If Result = 0 Then
                            'Select the word and exit.
                            Call JSO.selectPageNthWord(i, j)
                            
                            Set newPDF = CreateObject("AcroExch.pdDoc")
                            newPDF.Create
                            
                            NewName = PDFNewName & "_" & RTrim(LTrim(Str(i))) & ".pdf"
                            newPDF.Open (PDFPath)
                            newPDF.InsertPages -1, PDDoc, i, 1, 0
                            newPDF.Save 1, NewName
                            newPDF.Close
                            Set newPDF = Nothing
                            
                            'lngPages = 1
                            GoTo NextPage
                            'Exit Sub
                        Else
                            lngPages = lngPages + 1
                        End If
                        
                    End If
NextWord:
                Next j
NextPage:
            Next i
            
            'Word was not found, close the PDF file without saving the changes.
            AVDoc.Close True
            
            'Close the Acrobat application.
            App.Exit
               
            'Release the objects.
            Set JSO = Nothing
            Set PDDoc = Nothing
            Set AVDoc = Nothing
            Set App = Nothing
            
            'Inform the user.
            MsgBox "The word '" & WordToFind & "' could not be found in the PDF file!", vbInformation, "Search Error"
            
        End If
        
    Else
                
        'Unable to open the PDF file, close the Acrobat application.
        App.Exit

        'Release the objects.
        Set AVDoc = Nothing
        Set App = Nothing
        
        'Inform the user.
        MsgBox "Could not open the PDF file!", vbCritical, "File error"
        
    End If
    
End Sub
 

Excel Facts

Format cells as date
Select range and press Ctrl+Shift+3 to format cells as date. (Shift 3 is the # sign which sort of looks like a small calendar).

Forum statistics

Threads
1,216,075
Messages
6,128,667
Members
449,462
Latest member
Chislobog

We've detected that you are using an adblocker.

We have a great community of people providing Excel help here, but the hosting costs are enormous. You can help keep this site running by allowing ads on MrExcel.com.
Allow Ads at MrExcel

Which adblocker are you using?

Disable AdBlock

Follow these easy steps to disable AdBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the icon in the browser’s toolbar.
2)Click on the "Pause on this site" option.
Go back

Disable AdBlock Plus

Follow these easy steps to disable AdBlock Plus

1)Click on the icon in the browser’s toolbar.
2)Click on the toggle to disable it for "mrexcel.com".
Go back

Disable uBlock Origin

Follow these easy steps to disable uBlock Origin

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back

Disable uBlock

Follow these easy steps to disable uBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back
Back
Top