Extract the file name and specific paragraph from a word file and save in a excel sheet

nbuddhi

New Member
Joined
Jun 23, 2020
Messages
29
Office Version
  1. 365
Platform
  1. Windows
Dear Team,

I have thousands of word files in a folder called "E:\LAB_22_AUG_21\FILTERED_DOCX" and I have to extract the 1st instant of full paragraph content "REPORT NO" texts. (It will be always one line paragraph with the format of "REPORT NO : (ABC12)0123-012345" will be repeated on each pages, so that have to find only the 1st instant). Once "Report No" data captured, same to be included into a new excel workbook/sheet, column "A" and particular word document name to be included into next column after the report number column. Once recorded the the "Report No" and "file name", has to go to next file and this process will be repeated till last file.

Thanks & Best Rgds,
Nuwan Buddhika.
 

Attachments

  • Image.jpg
    Image.jpg
    76.2 KB · Views: 10

Excel Facts

Move date out one month or year
Use =EDATE(A2,1) for one month later. Use EDATE(A2,12) for one year later.
Hi nbuddhi. You can trial this untested code. Change the sheet name to suit. Good luck. Dave
ps. It's going to take a long time to open, extract and close all them files.
Code:
Sub test()
Dim FSO As Object, FolDir As Object, FileNm As Object, StrFlPath As String
Dim WdApp As Object, Cnt As Integer, Counter As Integer, Last As Integer
'Check if Word is already opened
On Error Resume Next
Set WdApp = GetObject(, "Word.Application")
If Err.Number <> 0 Then
'If it isn't, open a new instance of it
Set WdApp = CreateObject("Word.Application")
End If
On Error GoTo 0
WdApp.Visible = False

Set FSO = CreateObject("scripting.filesystemobject")
Set FolDir = FSO.GetFolder("E:\LAB_22_AUG_21\FILTERED_DOCX")
'loop files
For Each FileNm In FolDir.Files
If FileNm.Name Like ".docx" Then
StrFlPath = FileNm
WdApp.Documents.Open Filename:=FileNm.Name, ReadOnly:=True
Last = WdApp.ActiveDocument.Paragraphs.Count
Counter = 1
'loop paragraphs
For Cnt = 1 To Last
'don't search blank paras
If WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text <> Chr(13) Then
If InStr(WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text, "REPORT NO") Then
Counter = Counter + 1
Sheets("Sheet1").Range("A" & Counter) = WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text
Sheets("Sheet1").Range("B" & Counter) = StrFlPath
WdApp.ActiveDocument.Close savechanges:=False
End If
End If
Next Cnt
End If
Next FileNm
WdApp.Quit
Set WdApp = Nothing
Set FolDir = Nothing
Set FSO = Nothing
End Sub
 
Upvote 0
Need an "Exit For" in there... Trial #2
Code:
Sub test()
Dim FSO As Object, FolDir As Object, FileNm As Object, StrFlPath As String
Dim WdApp As Object, Cnt As Integer, Counter As Integer, Last As Integer
'Check if Word is already opened
On Error Resume Next
Set WdApp = GetObject(, "Word.Application")
If Err.Number <> 0 Then
'If it isn't, open a new instance of it
Set WdApp = CreateObject("Word.Application")
End If
On Error GoTo 0
WdApp.Visible = False

Set FSO = CreateObject("scripting.filesystemobject")
Set FolDir = FSO.GetFolder("E:\LAB_22_AUG_21\FILTERED_DOCX")
'loop files
For Each FileNm In FolDir.Files
If FileNm.Name Like ".docx" Then
StrFlPath = FileNm
WdApp.Documents.Open Filename:=FileNm.Name, ReadOnly:=True
Last = WdApp.ActiveDocument.Paragraphs.Count
Counter = 1
'loop paragraphs
For Cnt = 1 To Last
'don't search blank paras
If WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text <> Chr(13) Then
If InStr(WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text, "REPORT NO") Then
Counter = Counter + 1
Sheets("Sheet1").Range("A" & Counter) = WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text
Sheets("Sheet1").Range("B" & Counter) = StrFlPath
WdApp.ActiveDocument.Close savechanges:=False
Exit For
End If
End If
Next Cnt
End If
Next FileNm
WdApp.Quit
Set WdApp = Nothing
Set FolDir = Nothing
Set FSO = Nothing
End Sub
 
Upvote 0
Last revision ....
Code:
Sub test()
Dim FSO As Object, FolDir As Object, FileNm As Object, StrFlPath As String
Dim WdApp As Object, Cnt As Integer, Counter As Integer, Last As Integer
'Check if Word is already opened
On Error Resume Next
Set WdApp = GetObject(, "Word.Application")
If Err.Number <> 0 Then
'If it isn't, open a new instance of it
Set WdApp = CreateObject("Word.Application")
End If
On Error GoTo 0
WdApp.Visible = False

Set FSO = CreateObject("scripting.filesystemobject")
Set FolDir = FSO.GetFolder("E:\LAB_22_AUG_21\FILTERED_DOCX")
'loop files
Counter = 1
For Each FileNm In FolDir.Files
If FileNm.Name Like ".docx" Then
StrFlPath = FileNm
WdApp.Documents.Open Filename:=FileNm.Name, ReadOnly:=True
Last = WdApp.ActiveDocument.Paragraphs.Count
'loop paragraphs
For Cnt = 1 To Last
'don't search blank paras
If WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text <> Chr(13) Then
If InStr(WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text, "REPORT NO") Then
Counter = Counter + 1
Sheets("Sheet1").Range("A" & Counter) = WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text
Sheets("Sheet1").Range("B" & Counter) = StrFlPath
WdApp.ActiveDocument.Close savechanges:=False
Exit For
End If
End If
Next Cnt
End If
Next FileNm
WdApp.Quit
Set WdApp = Nothing
Set FolDir = Nothing
Set FSO = Nothing
End Sub
Dave
 
Upvote 0
Last revision ....
Code:
Sub test()
Dim FSO As Object, FolDir As Object, FileNm As Object, StrFlPath As String
Dim WdApp As Object, Cnt As Integer, Counter As Integer, Last As Integer
'Check if Word is already opened
On Error Resume Next
Set WdApp = GetObject(, "Word.Application")
If Err.Number <> 0 Then
'If it isn't, open a new instance of it
Set WdApp = CreateObject("Word.Application")
End If
On Error GoTo 0
WdApp.Visible = False

Set FSO = CreateObject("scripting.filesystemobject")
Set FolDir = FSO.GetFolder("E:\LAB_22_AUG_21\FILTERED_DOCX")
'loop files
Counter = 1
For Each FileNm In FolDir.Files
If FileNm.Name Like ".docx" Then
StrFlPath = FileNm
WdApp.Documents.Open Filename:=FileNm.Name, ReadOnly:=True
Last = WdApp.ActiveDocument.Paragraphs.Count
'loop paragraphs
For Cnt = 1 To Last
'don't search blank paras
If WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text <> Chr(13) Then
If InStr(WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text, "REPORT NO") Then
Counter = Counter + 1
Sheets("Sheet1").Range("A" & Counter) = WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text
Sheets("Sheet1").Range("B" & Counter) = StrFlPath
WdApp.ActiveDocument.Close savechanges:=False
Exit For
End If
End If
Next Cnt
End If
Next FileNm
WdApp.Quit
Set WdApp = Nothing
Set FolDir = Nothing
Set FSO = Nothing
End Sub
Dave
Dear Dave,

Many Thanks for your valuable time spent helping me. I tried the last version of code with limited number of files and it's running without any error, but no record giving back to the excel sheet as desired. Kindly help me reviewing the code if your time permit to do so.

Thanks & Best Rgds,
Nuwan.
 
Upvote 0
Hi Nuwan. My apologies, I was at the lake. Anyways, "Change the sheet name to suit" .... do you have a sheet1? Also, is it "REPORT NO" or "Report No" that your searching for? Anyways, U can trial this to see if anything is actually being extracted from the docs. HTH. Dave
Code:
Sub test()
Dim FSO As Object, FolDir As Object, FileNm As Object, StrFlPath As String
Dim WdApp As Object, Cnt As Integer, Counter As Integer, Last As Integer
'Check if Word is already opened
On Error Resume Next
Set WdApp = GetObject(, "Word.Application")
If Err.Number <> 0 Then
'If it isn't, open a new instance of it
Set WdApp = CreateObject("Word.Application")
End If
On Error GoTo 0
WdApp.Visible = False

Set FSO = CreateObject("scripting.filesystemobject")
Set FolDir = FSO.GetFolder("E:\LAB_22_AUG_21\FILTERED_DOCX")
'loop files
Counter = 1
For Each FileNm In FolDir.Files
If FileNm.Name Like ".docx" Then
StrFlPath = FileNm
WdApp.Documents.Open Filename:=FileNm.Name, ReadOnly:=True
Last = WdApp.ActiveDocument.Paragraphs.Count
'loop paragraphs
For Cnt = 1 To Last
'don't search blank paras
If WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text <> Chr(13) Then
If InStr(WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text, "REPORT NO") Then
Counter = Counter + 1
Sheets("Sheet1").Range("A" & Counter) = WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text
Sheets("Sheet1").Range("B" & Counter) = StrFlPath

MsgBox Sheets("Sheet1").Range("A" & Counter) & vbCrLf _
& Sheets("Sheet1").Range("B" & Counter)

WdApp.ActiveDocument.Close savechanges:=False
Exit For
End If
End If
Next Cnt
End If
Next FileNm
WdApp.Quit
Set WdApp = Nothing
Set FolDir = Nothing
Set FSO = Nothing
End Sub
 
Upvote 0
Hi Nuwan. My apologies, I was at the lake. Anyways, "Change the sheet name to suit" .... do you have a sheet1? Also, is it "REPORT NO" or "Report No" that your searching for? Anyways, U can trial this to see if anything is actually being extracted from the docs. HTH. Dave
Code:
Sub test()
Dim FSO As Object, FolDir As Object, FileNm As Object, StrFlPath As String
Dim WdApp As Object, Cnt As Integer, Counter As Integer, Last As Integer
'Check if Word is already opened
On Error Resume Next
Set WdApp = GetObject(, "Word.Application")
If Err.Number <> 0 Then
'If it isn't, open a new instance of it
Set WdApp = CreateObject("Word.Application")
End If
On Error GoTo 0
WdApp.Visible = False

Set FSO = CreateObject("scripting.filesystemobject")
Set FolDir = FSO.GetFolder("E:\LAB_22_AUG_21\FILTERED_DOCX")
'loop files
Counter = 1
For Each FileNm In FolDir.Files
If FileNm.Name Like ".docx" Then
StrFlPath = FileNm
WdApp.Documents.Open Filename:=FileNm.Name, ReadOnly:=True
Last = WdApp.ActiveDocument.Paragraphs.Count
'loop paragraphs
For Cnt = 1 To Last
'don't search blank paras
If WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text <> Chr(13) Then
If InStr(WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text, "REPORT NO") Then
Counter = Counter + 1
Sheets("Sheet1").Range("A" & Counter) = WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text
Sheets("Sheet1").Range("B" & Counter) = StrFlPath

MsgBox Sheets("Sheet1").Range("A" & Counter) & vbCrLf _
& Sheets("Sheet1").Range("B" & Counter)

WdApp.ActiveDocument.Close savechanges:=False
Exit For
End If
End If
Next Cnt
End If
Next FileNm
WdApp.Quit
Set WdApp = Nothing
Set FolDir = Nothing
Set FSO = Nothing
End Sub
Dear Dave,

Sheet1 is there and searchable word is exactly "REPORT NO"; I did try, but no data extracted into excel. I just removed the first error handler "On Error Resume Next" and run the code, then getting the error "Run-time error 91: Object variable not set", but I can't find the exact reason. Thanks again for your time.

Best Rgds,
Nuwan
 
Upvote 0
Hi Nuwan and again I offer my apologies. This time for not having trialled the code before I posted. This now seems to work with my limitted testing. HTH. Dave
Code:
Sub test()
Dim FSO As Object, FolDir As Object, FileNm As Object, StrFlPath As String
Dim WdApp As Object, Cnt As Integer, Counter As Integer, Last As Integer
'Check if Word is already opened
On Error Resume Next
Set WdApp = GetObject(, "Word.Application")
If Err.Number <> 0 Then
'If it isn't, open a new instance of it
Set WdApp = CreateObject("Word.Application")
End If
On Error GoTo 0
WdApp.Visible = False

Set FSO = CreateObject("scripting.filesystemobject")
Set FolDir = FSO.GetFolder("E:\LAB_22_AUG_21\FILTERED_DOCX")
'loop files
Counter = 1
For Each FileNm In FolDir.Files
If FileNm.Name Like "*" & ".docx" Then
StrFlPath = FileNm
WdApp.Documents.Open Filename:=StrFlPath, ReadOnly:=True
Last = WdApp.ActiveDocument.Paragraphs.Count
'loop paragraphs
For Cnt = 1 To Last
'don't search blank paras
If WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text <> Chr(13) Then
If InStr(WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text, "REPORT NO") Then
Counter = Counter + 1
Sheets("Sheet1").Range("A" & Counter) = WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text
Sheets("Sheet1").Range("B" & Counter) = StrFlPath
WdApp.ActiveDocument.Close savechanges:=False
Exit For
End If
End If
Next Cnt
End If
Next FileNm

WdApp.Quit
Set WdApp = Nothing
Set FolDir = Nothing
Set FSO = Nothing
End Sub
 
Upvote 0
Solution
Hi Nuwan and again I offer my apologies. This time for not having trialled the code before I posted. This now seems to work with my limitted testing. HTH. Dave
Code:
Sub test()
Dim FSO As Object, FolDir As Object, FileNm As Object, StrFlPath As String
Dim WdApp As Object, Cnt As Integer, Counter As Integer, Last As Integer
'Check if Word is already opened
On Error Resume Next
Set WdApp = GetObject(, "Word.Application")
If Err.Number <> 0 Then
'If it isn't, open a new instance of it
Set WdApp = CreateObject("Word.Application")
End If
On Error GoTo 0
WdApp.Visible = False

Set FSO = CreateObject("scripting.filesystemobject")
Set FolDir = FSO.GetFolder("E:\LAB_22_AUG_21\FILTERED_DOCX")
'loop files
Counter = 1
For Each FileNm In FolDir.Files
If FileNm.Name Like "*" & ".docx" Then
StrFlPath = FileNm
WdApp.Documents.Open Filename:=StrFlPath, ReadOnly:=True
Last = WdApp.ActiveDocument.Paragraphs.Count
'loop paragraphs
For Cnt = 1 To Last
'don't search blank paras
If WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text <> Chr(13) Then
If InStr(WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text, "REPORT NO") Then
Counter = Counter + 1
Sheets("Sheet1").Range("A" & Counter) = WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text
Sheets("Sheet1").Range("B" & Counter) = StrFlPath
WdApp.ActiveDocument.Close savechanges:=False
Exit For
End If
End If
Next Cnt
End If
Next FileNm

WdApp.Quit
Set WdApp = Nothing
Set FolDir = Nothing
Set FSO = Nothing
End Sub
Dear Dave,

It's perfect and worked like charm. Thank you very much for your great support. Cheers.

Best Rgds,
Nuwan.
 
Upvote 0
You are welcome Nuwan. Thanks for posting your outcome. You may want to trial the following code to remove the "REPORT NO:" part of the return. Dave
Code:
Sheets("Sheet1").Range("A" & Counter) = _
Right(WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text, _
   Len(WdApp.ActiveDocument.Paragraphs(Cnt).Range.Text) - 10)
 
Upvote 0

Forum statistics

Threads
1,215,326
Messages
6,124,260
Members
449,149
Latest member
mwdbActuary

We've detected that you are using an adblocker.

We have a great community of people providing Excel help here, but the hosting costs are enormous. You can help keep this site running by allowing ads on MrExcel.com.
Allow Ads at MrExcel

Which adblocker are you using?

Disable AdBlock

Follow these easy steps to disable AdBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the icon in the browser’s toolbar.
2)Click on the "Pause on this site" option.
Go back

Disable AdBlock Plus

Follow these easy steps to disable AdBlock Plus

1)Click on the icon in the browser’s toolbar.
2)Click on the toggle to disable it for "mrexcel.com".
Go back

Disable uBlock Origin

Follow these easy steps to disable uBlock Origin

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back

Disable uBlock

Follow these easy steps to disable uBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back
Back
Top