Finding a string difference.

RyDowte

New Member
Joined
Jun 23, 2015
Messages
2
Dear All,

My <acronym title="visual basic for applications" style="border-width: 0px 0px 1px; border-bottom-style: dotted; border-bottom-color: rgb(0, 0, 0); cursor: help; color: rgb(51, 51, 51); background-color: rgb(250, 250, 250);">VBA skills are not strong</acronym> so please bear with me. These codes work perfectly about half the time, both having different problems. The inputs below are two example strings that give me an issue for both codes.

Inputs:
string1= Part II Long / Non Producer F2F / Part III BMI / Part III BP / MIB / BCP-Protein / BCP-KF / BCP-Glucose / BCP-Chol / BCP-LF / BCP-HIV / HOS / I\ADL Qs / APS / MVR

string2= Part II Long / Non Producer F2F / Part III BMI / Part III BP / MIB / BCP-Protein / BCP-KF / BCP-Glucose / BCP-Chol / BCP-LF / BCP-HIV / NT-proBNP / PSA / HOS / APS / MVR

Code 1:
Code:
Code:
Sub exampleString()
'This is the code i am working on
'This code should be used moving forward


Dim r As Integer
Dim w As Integer
For r = 4 To 12


For w = 4 To 10




Cells(r + 26, w).Clear
   Dim arr() As Long
    
    Dim String1 As String
    Dim String2 As String
    Dim Target As Range
    
    String1 = Cells(r, w)
    String2 = Cells(r + 13, w)
    
    arr = longestCommonSubsequence(String1, String2)
    
    Dim s As String, t As String
    s = backTraceUp(arr, String1, String2, Len(String1), Len(String2))
    t = backTraceLeft(arr, String1, String2, Len(String1), Len(String2))
    Dim a As String, B As String
    a = getDiff(arr, String1, String2, Len(String1), Len(String2))
    
    
    Set Target = ActiveSheet.Cells(r + 26, w)
    
    Dim i As Integer
    For i = 2 To Len(a) Step 2
        Target.Value2 = Target.Value2 & Mid(a, i - 1, 1)
    Next i
    For i = 2 To Len(a) Step 2
        Select Case Mid(a, i, 1)
        Case "-"
            With Target.Characters(i / 2, 1).Font
                .Strikethrough = True
                .Color = RGB(255, 0, 0)
                .Bold = False
            End With
        Case "+"
            With Target.Characters(i / 2, 1).Font
                .ColorIndex = 4
                .Bold = False
                .Underline = xlUnderlineStyleSingle
            End With
        End Select
    Next i
    Dim brr() As Long
    Next w
    Next r
    
End Sub
Public Function longestCommonSubsequence(ByRef String1 As String, ByRef String2 As String) As Long()
    If String1 = vbNullString Or String2 = vbNullString Then
        Exit Function
    End If
 
    Dim num() As Long
    
    'define the array, note rows of zeros get added to front automatically
    ReDim num(Len(String1), Len(String2))
    
    Dim i As Long, j As Long
    
    For i = 1 To Len(String1)
        For j = 1 To Len(String2)
            If Mid$(String1, i, 1) = Mid$(String2, j, 1) Then
                num(i, j) = num(i - 1, j - 1) + 1
            Else
                num(i, j) = max(num(i - 1, j), num(i, j - 1))
            End If
        Next j
    Next i




    longestCommonSubsequence = num
End Function
'back traces c, defaulting in the up direction
Public Function backTraceUp(ByRef C() As Long, ByRef String1 As String, ByRef String2 As String, ByRef i As Long, ByRef j As Long) As String
    If i < 1 Or j < 1 Then
        backTraceUp = vbNullString
    ElseIf Mid$(String1, i, 1) = Mid$(String2, j, 1) Then
        'equal characters, save it and then go up and left
        backTraceUp = backTraceUp(C, String1, String2, i - 1, j - 1) & Mid$(String1, i, 1)
    Else
        'go in the direction of the highest number, defaulting to up
        If (C(i, j - 1) > C(i - 1, j)) Then
            backTraceUp = backTraceUp(C, String1, String2, i, j - 1)
        Else
            backTraceUp = backTraceUp(C, String1, String2, i - 1, j)
        End If
    End If
End Function
'back traces c, defaulting in the left direction
Public Function backTraceLeft(ByRef C() As Long, ByRef String1 As String, ByRef String2 As String, ByRef i As Long, ByRef j As Long) As String
    If i < 1 Or j < 1 Then
        backTraceLeft = vbNullString
    ElseIf Mid$(String1, i, 1) = Mid$(String2, j, 1) Then
        'equal characters, save it and then go up and left
        backTraceLeft = backTraceLeft(C, String1, String2, i - 1, j - 1) & Mid$(String1, i, 1)
    Else
        'go in the direction of the highest number, defaulting to left
        If (C(i, j - 1) >= C(i - 1, j)) Then
            backTraceLeft = backTraceLeft(C, String1, String2, i, j - 1)
        Else
            backTraceLeft = backTraceLeft(C, String1, String2, i - 1, j)
        End If
    End If
End Function
'the following function returns a string with indication to what was deleted or added
'proceding character can be;
' = no change
' - deletion
' + addition
Public Function getDiff(ByRef C() As Long, ByRef stringOld As String, ByRef stringNew As String, ByRef i As Long, ByRef j As Long) As String
    If i > 0 Then
        If j > 0 Then 'both are greater than zero
            'can only do the following comparison when i and j are greater than zero
            If Mid$(stringOld, i, 1) = Mid$(stringNew, j, 1) Then
                getDiff = getDiff(C, stringOld, stringNew, i - 1, j - 1) & Mid$(stringOld, i, 1) & "="
            Else
                If i = 0 Then
                    getDiff = getDiff(C, stringOld, stringNew, i, j - 1) & Mid$(stringNew, j, 1) & "+"
                ElseIf C(i, j - 1) >= C(i - 1, j) Then
                    getDiff = getDiff(C, stringOld, stringNew, i, j - 1) & Mid$(stringNew, j, 1) & "+"
                ElseIf j = 0 Then
                    getDiff = getDiff(C, stringOld, stringNew, i - 1, j) & Mid$(stringOld, i, 1) & "-"
                ElseIf C(i, j - 1) < C(i - 1, j) Then
                    getDiff = getDiff(C, stringOld, stringNew, i - 1, j) & Mid$(stringOld, i, 1) & "-"
                Else
                    getDiff = vbNullString
                End If
            End If
        Else 'i is is greater than zero
                If j = 0 Then
                    getDiff = getDiff(C, stringOld, stringNew, i - 1, j) & Mid$(stringOld, i, 1) & "-"
                ElseIf C(i, j - 1) < C(i - 1, j) Then
                    getDiff = getDiff(C, stringOld, stringNew, i - 1, j) & Mid$(stringOld, i, 1) & "-"
                Else
                    getDiff = vbNullString
                End If
        End If
    Else
        If j > 0 Then 'j is  greater than zero
                If i = 0 Then
                    getDiff = getDiff(C, stringOld, stringNew, i, j - 1) & Mid$(stringNew, j, 1) & "+"
                ElseIf C(i, j - 1) >= C(i - 1, j) Then
                    getDiff = getDiff(C, stringOld, stringNew, i, j - 1) & Mid$(stringNew, j, 1) & "+"
                Else
                    getDiff = vbNullString
                End If
        Else 'none are greater than zero
                getDiff = vbNullString
        End If
    End If
End Function
Public Function max(ByRef a As Long, ByRef B As Long) As Long
    If a >= B Then
        max = a
    Else
        max = B
    End If
End Function
[/COLOR][COLOR=#333333]


Code 2:
Code:
Code:
Sub test_UW()
    Dim aStr As String
    Dim bStr As String
    Dim r As Integer
    Dim s As Integer
    For r = 4 To 12
    
    
    For s = 4 To 10
    
    
    aStr = Cells(r, s)
    bStr = Cells(r + 13, s)
    
    Call CompareAndDisplay(aStr, bStr, Cells(r + 26, s))
    
    Next s
    Next r
    
End Sub


Sub CompareAndDisplay(strOne As String, strTwo As String, outCell As Range, Optional Delimiter As String = " ")
    Dim strResult As String
    Dim olStart As Variant, olLength As Variant
    Dim nwStart As Variant, nwLength As Variant
    Dim i As Long
    
    strResult = ComparedText(strOne, strTwo, olStart, olLength, nwStart, nwLength)
    
    With outCell.Cells(1, 1)
        .Clear
        .Value = strResult
        For i = LBound(olStart) To UBound(olStart)
            If olStart(i) <> 0 Then
                
                With .Characters(olStart(i), olLength(i)).Font
                    .ColorIndex = 3
                    .Strikethrough = True
                End With
            End If
        Next i
        For i = LBound(nwStart) To UBound(nwStart)
            If nwStart(i) <> 0 Then
                With .Characters(nwStart(i), nwLength(i)).Font
                    .ColorIndex = 4
                    .Underline = True
                End With
            End If
        Next i
    End With
End Sub




Function ComparedText(aString As String, bString As String, _
                                        Optional ByRef oStart As Variant, Optional oLength As Variant, _
                                        Optional ByRef nStart As Variant, Optional ByRef nLength As Variant, _
                                        Optional Delimiter As String = " ") As String
    Dim aWords As Variant, aWord As String
    Dim bWords As Variant
    Dim outWords() As String
    Dim aPoint As Long, bPoint As Long, outPoint As Long
    Dim matchPoint As Variant, outLength As Long
    Dim High As Long
    Dim oldStart() As Long, oldLength() As Long, oldPoint As Long
    Dim newStart() As Long, newLength() As Long, newPoint As Long
    
    Rem remove double delimiters
        'to be done
        
    aWords = Split(aString, Delimiter)
    bWords = Split(bString, Delimiter)
    High = UBound(aWords)
    ReDim outWords(0 To High + UBound(bWords))
    ReDim oldStart(0 To High + UBound(bWords)): ReDim oldLength(0 To High + UBound(bWords))
    ReDim newStart(0 To High + UBound(bWords)): ReDim newLength(0 To High + UBound(bWords))
    oldPoint = -1: newPoint = -1
    outLength = Len(Delimiter)
    aPoint = 0: bPoint = 0: outPoint = LBound(outWords) - 1
    
    Do
        aWord = aWords(aPoint)
        If LCase(aWord) = LCase(bWords(bPoint)) Then
            Rem is word in common
            outPoint = outPoint + 1
            outWords(outPoint) = aWord
            outLength = outLength + Len(aWord) + Len(Delimiter)
            bWords(bPoint) = vbNullString
            aPoint = aPoint + 1
            bPoint = bPoint + 1
        Else
            Rem word divergence
            matchPoint = Application.Match(aWord, bWords, 0)
            If IsError(matchPoint) Then
                Rem old word is not in new string
                outPoint = outPoint + 1
                outWords(outPoint) = aWord
                
                oldPoint = oldPoint + 1
                oldStart(oldPoint) = outLength: oldLength(oldPoint) = Len(aWord)
                
                outLength = outLength + Len(aWord) + Len(Delimiter)
                aPoint = aPoint + 1
            Else
                Rem old word is in new string, i.e. is a common word
                
                Do Until LCase(bWords(bPoint)) = LCase(aWord)
                    outPoint = outPoint + 1
                    outWords(outPoint) = bWords(bPoint)
                    
                    newPoint = newPoint + 1
                    newStart(newPoint) = outLength: newLength(newPoint) = Len(bWords(bPoint))
        
                    outLength = outLength + Len(outWords(outPoint)) + Len(Delimiter)
                    bWords(bPoint) = vbNullString
                    bPoint = bPoint + 1
                Loop
            End If
        End If
    Loop Until (High < aPoint) Or (UBound(bWords) < bPoint)
    
    Rem last new/different string
    Do Until UBound(bWords) < bPoint
        outPoint = outPoint + 1
        outWords(outPoint) = bWords(bPoint)
        
        newPoint = newPoint + 1
        newStart(newPoint) = outLength: newLength(newPoint) = Len(bWords(bPoint))
        
        outLength = outLength + Len(outWords(outPoint)) + Len(Delimiter)
        bPoint = bPoint + 1
    Loop
    
    Rem final common string
    Do Until High < aPoint
        outPoint = outPoint + 1
        outWords(outPoint) = aWords(aPoint)
        
        oldPoint = oldPoint + 1
        oldStart(oldPoint) = outLength: oldLength(oldPoint) = Len(aWords(aPoint))
        
        outLength = outLength + Len(outWords(outPoint)) + Len(Delimiter)
        aPoint = aPoint + 1
    Loop
    
    ReDim Preserve outWords(0 To outPoint)
    oStart = oldStart: oLength = oldLength
    nStart = newStart: nLength = newLength
    ComparedText = Join(outWords, Delimiter)
End Function
[/COLOR][COLOR=#333333]

Code 1 yields:
Part II Long / Non Producer F2F / Part III BMI / Part III BP / MIB / BCP-Protein / BCP-KF / BCP-Glucose / BCP-Chol / BCP-LF / BCP-HIV / <strike>HOS</strike>NT-proBNP / <strike>I\</strike>PSA<strike>DL</strike> <strike>Qs</strike>/ HOS / APS / MVR


Code 2 yields:
Part II Long / Non Producer F2F / Part III BMI / Part III BP / MIB / BCP-Protein / BCP-KF / BCP-Glucose / BCP-Chol / BCP-LF / BCP-HIV / NT-proBNP / PSA / HOS / <strike>I\ADL</strike> <strike>Qs</strike>APS / <strike>APS</strike> <strike>/</strike> MVR


I would like to strike through red what has been taken away from string1 and underline green what has been added from string2. So my desired output would be:
Part II Long / Non Producer F2F / Part III BMI / Part III BP / MIB / BCP-Protein / BCP-KF / BCP-Glucose / BCP-Chol / BCP-LF / BCP-HIV / NT-proBNP/PSA/ HOS / <strike>I\ADL</strike> <strike>Qs</strike> / APS / MVR



Like I said, sometimes these codes work perfectly and other times I run into these issues. Any help would be much appreciated. Thank You!! :confused::confused::confused:
 
Last edited:

Excel Facts

When they said...
When they said you are going to "Excel at life", they meant you "will be doing Excel your whole life".
A couple thoughts (can't test anything in VBA for you right now):
- I'm guessing the second code is the better one, as splitting the strings into arrays is probably the easiest.
- Pretty sure you can use " / " (3 characters) as a delimiter; that'll make your arrays clean and you can add the delimiter back in when you're recreating the final string.
- in your ComparedText Function, declare Optional ByRef oLength as Variant.
- If those fail, look up the InStr() function; it could make your code a lot simpler. I'm thinking something along the lines of this:

Code:
Delimiter = " / "
aWords = Split(aString, Delimiter)
For i = 0 to UBound(aWords)
[INDENT]ThisInteger = InStr(1, bString, aWords(i)) 'InStr returns an integer with the starting position of the matched string
If ThisInteger <> 0 Then
[/INDENT]
[INDENT=2]'do nothing, snippet is found in second string
[/INDENT]
[INDENT]Else
[/INDENT]
[INDENT=2]'snippet isn't found, add snippet to red/strikethrough word array


[/INDENT]
[INDENT]End If[/INDENT]
Next i

You'd need an additional loop for adding snippets to the green/underline word array, and it might get weird trying to find the position of snippets within the final array. Don't mess with all the InStr crap until you've tried changing the delimiter, though, I think that one change might fix it all for you.
 
Upvote 0

Forum statistics

Threads
1,216,458
Messages
6,130,754
Members
449,588
Latest member
accountant606

We've detected that you are using an adblocker.

We have a great community of people providing Excel help here, but the hosting costs are enormous. You can help keep this site running by allowing ads on MrExcel.com.
Allow Ads at MrExcel

Which adblocker are you using?

Disable AdBlock

Follow these easy steps to disable AdBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the icon in the browser’s toolbar.
2)Click on the "Pause on this site" option.
Go back

Disable AdBlock Plus

Follow these easy steps to disable AdBlock Plus

1)Click on the icon in the browser’s toolbar.
2)Click on the toggle to disable it for "mrexcel.com".
Go back

Disable uBlock Origin

Follow these easy steps to disable uBlock Origin

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back

Disable uBlock

Follow these easy steps to disable uBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back
Back
Top