Hi and thanks in advance,
I am using Excel 2010.
I have a data sheet with about 100,000 rows and 30 columns. There may be up to 200,000 rows in the future. I have written some VBA that searches 12 of those 30 columns for a whole word match with one of 350 separate words which I have listed on another sheet. If a match is found in one of those 12 columns then the whole row of data is copied to a results sheet and the code moves to the next row and repeats. I am using regex to replace non alpha numeric characters with a space and then searching for a whole word delimited by spaces. This seems to work ok but takes a very long time to run. For 100,000 rows it is taking around 16 minutes. Wondering if there is a much quicker way to do the same thing?
Thanks
Deutz
I am using Excel 2010.
I have a data sheet with about 100,000 rows and 30 columns. There may be up to 200,000 rows in the future. I have written some VBA that searches 12 of those 30 columns for a whole word match with one of 350 separate words which I have listed on another sheet. If a match is found in one of those 12 columns then the whole row of data is copied to a results sheet and the code moves to the next row and repeats. I am using regex to replace non alpha numeric characters with a space and then searching for a whole word delimited by spaces. This seems to work ok but takes a very long time to run. For 100,000 rows it is taking around 16 minutes. Wondering if there is a much quicker way to do the same thing?
Code:
Sub TestIt()
Dim arrData As Variant
Dim arrWords As Variant
arrData = Sheets("Data").Range("A2:AD100000")
arrWords = Sheets("WordList").Range("A2:A350")
GetMatchingRows arrData, arrWords
End Sub
Sub GetMatchingRows(ByVal arrData As Variant, ByVal arrWords As Variant)
Dim n As Long
Dim i As Long
Dim j As Long
Dim x As Long
Dim arr() As String
Dim arrCols() As String
Dim wsSource As Worksheet
Dim wsDest As Worksheet
Dim RegEx As Object
'Dim StartTime As Double
'Dim MinutesElapsed As String
Set RegEx = CreateObject("VBScript.RegExp")
'StartTime = Timer
With RegEx
.Pattern = "[^a-zA-Z0-9]"
.Global = True
.MultiLine = True
End With
Set wsSource = Worksheets("Data")
Set wsDest = Worksheets("Result")
arr = ("2,4,5,8,9,11,12,14,16,20")
' Assign column numbers to array
arrCols = Split(arr, ",")
x = 10
' Loop data array for each column number
For i = LBound(arrData) To UBound(arrData) ' Row
For j = LBound(arrCols) To UBound(arrCols) ' Col
' Replace all non alpha numeric characters in the array element with a space
If RegEx.test(arrData(i, arrCols(j))) Then
arrData(i, arrCols(j)) = RegEx.Replace(arrData(i, arrCols(j)), " ")
arrData(i, arrCols(j)) = " " & arrData(i, arrCols(j)) & " "
End If
' Loop words to match on array
For n = LBound(arrWords) To UBound(arrWords)
' Test if whole word found in data array element (space & word & space)
If InStr(1, UCase(arrData(i, arrCols(j))), " " & UCase(arrWords(n, 1)) & " ") > 0 Then
x = x + 1
' If whole word found then add the whole data row to the destination sheet and exit the column
' For loop and continue with the next row For loop
wsDest.Range("A" & x & ":AD" & x).Value = wsSource.Range("A" & i + 1 & ":" & "AD" & i + 1).Value
GoTo ExitColFor
End If
Next n
Next j
ExitColFor:
Next i
wsDest.Range("A7").Value = x - 10
MsgBox "Done"
'MinutesElapsed = Format((Timer - StartTime) / 86400, "hh:mm:ss")
'MsgBox "This code ran successfully in " & MinutesElapsed & " minutes", vbInformation
Set RegEx = Nothing
End Sub
Thanks
Deutz