Web Scraping Add problem in list

fredrerik84

Active Member
Joined
Feb 26, 2017
Messages
383
Hi I would like to start this thread to thank the community here and especially Domenic for helping me together the code bellow:

Rich (BB code):
Rich (BB code):
    Set HTMLDoc = IE.document
Sub testing2()




    Dim IE As New SHDocVw.InternetExplorer
    Dim HTMLDoc As New MSHTML.HTMLDocument
    Dim HTMLRows As MSHTML.IHTMLElementCollection
    Dim HTMLRow As MSHTML.IHTMLElement
    Dim League As String
    Dim sheet As Worksheet
    Dim r As Long
    Dim i As Long
    Dim j As Long
    Dim K As Long
    Dim vMatch As Variant
    
    
    Set sheet = ActiveWorkbook.Sheets("Conversion")
    IE.Navigate "http://www.xscores.com/soccer/scheduled_games/24-05"
    IE.Visible = True
    
With IE
        Do While .Busy Or .readyState <> READYSTATE_COMPLETE
            DoEvents
        Loop
    End With
    
    Set HTMLDoc = IE.document
    Set HTMLRows = HTMLDoc.getElementById("scoretable").getElementsByTagName("tr")
    
    Columns("B:F").ClearContents
     Columns("B:F").Borders.LineStyle = xlNone
    
    r = 2
    For i = 0 To HTMLRows.Length - 3
        If HTMLRows(i).Cells.Length - 1 >= 10 Then
            Cells(r, "B").Value = HTMLRows(i).Cells(0).innerText
            Cells(r, "B").Borders(xlEdgeLeft).Color = RGB(91, 155, 213)
            League = HTMLRows(i).Cells(IIf(i > 0, 4, 3)).innerText
            Cells(r, "C").Value = League
            vMatch = Application.VLookup(League, sheet.Range("V1:X62"), 3, False)
            If Not IsError(vMatch) Then
                Cells(r, "C").Value = vMatch
            Else
                Cells(r, "C").Value = League
            End If
            Cells(r, "D").Value = HTMLRows(i).Cells(IIf(i > 0, 5, 4)).innerText & " (" & HTMLRows(i).Cells(IIf(i > 0, 6, 5)).innerText & ")"
            If i > 0 Then
                Cells(r, "E").Value = GetCountry(HTMLRows(i).Cells(3).getElementsByTagName("a")(0).getAttribute("title"))
            Else
                Cells(r, "E").Value = "Country"
            End If
            Cells(r, "F").Value = HTMLRows(i).Cells(IIf(i > 0, 9, 7)).innerText & " (" & HTMLRows(i).Cells(IIf(i > 0, 10, 8)).innerText & ")"
            Cells(r, "F").Borders(xlEdgeRight).Color = RGB(91, 155, 213)
            r = r + 1
        End If
    Next i
    Cells(r - 1, 2).Resize(, 5).Borders(xlEdgeBottom).Color = RGB(91, 155, 213)
    MsgBox i - 4 & " Matches has been Loaded.", vbInformation
    IE.Quit
    Set HTMLDoc = Nothing
    Set HTMLRows = Nothing
    Set HTMLRow = Nothing
    
End Sub

but I have made some changes to this code as this page has also "all_games" link so the line in red above will be: IE.Navigate "http://www.xscores.com/soccer/all_games/24-05" but this gives following problem: as this is just as summerize page for Finished_games, Live_games and up_comming games so this site handles this with adding an Add in the middle of the list. , Is there an way/code I can try to jump over this without going into infinite loop Any help would be greatly appreciated , my sulltion is to run the code from 3 links but I relly hope there is an better way

 
Last edited:

Excel Facts

How can you automate Excel?
Press Alt+F11 from Windows Excel to open the Visual Basic for Applications (VBA) editor.
I think the way we can deal with ads that may occur within the table and anything else that may be added at the end of the table is by processing only rows where the row is either the first row or where the first cell in the row contains a time. See if this helps (changes are in red)...

Code:
Sub testing2()




    Dim IE As New SHDocVw.InternetExplorer
    Dim HTMLDoc As New MSHTML.HTMLDocument
    Dim HTMLRows As MSHTML.IHTMLElementCollection
    Dim HTMLRow As MSHTML.IHTMLElement
    Dim League As String
    Dim sheet As Worksheet
    Dim r As Long
    Dim i As Long
    Dim j As Long
    Dim K As Long
    Dim vMatch As Variant
    [COLOR=#ff0000]Dim sKO As String[/COLOR]
    
    
    Set sheet = ActiveWorkbook.Sheets("Conversion")
    IE.Navigate "http://www.xscores.com/soccer/scheduled_games/24-05"
    IE.Visible = True
    
With IE
        Do While .Busy Or .readyState <> READYSTATE_COMPLETE
            DoEvents
        Loop
    End With
    
    Set HTMLDoc = IE.document
    Set HTMLRows = HTMLDoc.getElementById("scoretable").getElementsByTagName("tr")
    
    Columns("B:F").ClearContents
     Columns("B:F").Borders.LineStyle = xlNone
    
    r = 2
    [COLOR=#ff0000]For i = 0 To HTMLRows.Length - 1
        sKO = HTMLRows(i).Cells(0).innerText[/COLOR]
       [COLOR=#ff0000] If i = 0 Or IsDate(sKO) Then[/COLOR]
           [COLOR=#ff0000] Cells(r, "B").Value = sKO[/COLOR]
            Cells(r, "B").Borders(xlEdgeLeft).Color = RGB(91, 155, 213)
            League = HTMLRows(i).Cells(IIf(i > 0, 4, 3)).innerText
            Cells(r, "C").Value = League
            vMatch = Application.VLookup(League, sheet.Range("V1:X62"), 3, False)
            If Not IsError(vMatch) Then
                Cells(r, "C").Value = vMatch
            Else
                Cells(r, "C").Value = League
            End If
            Cells(r, "D").Value = HTMLRows(i).Cells(IIf(i > 0, 5, 4)).innerText & " (" & HTMLRows(i).Cells(IIf(i > 0, 6, 5)).innerText & ")"
            If i > 0 Then
                Cells(r, "E").Value = GetCountry(HTMLRows(i).Cells(3).getElementsByTagName("a")(0).getAttribute("title"))
            Else
                Cells(r, "E").Value = "Country"
            End If
            Cells(r, "F").Value = HTMLRows(i).Cells(IIf(i > 0, 9, 7)).innerText & " (" & HTMLRows(i).Cells(IIf(i > 0, 10, 8)).innerText & ")"
            Cells(r, "F").Borders(xlEdgeRight).Color = RGB(91, 155, 213)
            r = r + 1
        End If
    Next i
    Cells(r - 1, 2).Resize(, 5).Borders(xlEdgeBottom).Color = RGB(91, 155, 213)
    [COLOR=#ff0000]MsgBox r - 3 & " Matches has been Loaded.", vbInformation[/COLOR]
    IE.Quit
    Set HTMLDoc = Nothing
    Set HTMLRows = Nothing
    Set HTMLRow = Nothing
    
End Sub
 
Upvote 0
Hi and good morning

- this is great stuff. I guess this new edit can also be used where there are no adds ?
 
Upvote 0
There is one problem with the code though - happens almost everytime I make a change to the code

this line:
Code:
    With IE
        Do While .Busy Or .readyState <> READYSTATE_COMPLETE
            DoEvents
        Loop
    End With

Run-time error 2147023170 (800706be) :
Automation error


If I change the date or from upcoming to live , etc this code goes into loop
is there an work around to kill the code with an error msg ,
or best of all make this error not happen at all

I have a similar code which uses sleep kernel but this slows the code down to much
 
Upvote 0
Try clearing the variable IE from memory at the end of the procedure...

Code:
Set IE = Nothing

Does this help?
 
Upvote 0
No it does not help as I already had this line. I was thinking if it would be possible to end the loop after 4 sec with an error msg if something goes wrong
 
Upvote 0
If it gets stuck in that loop and you manually end the procedure, the object variable IE, along with the others, don't cleared from memory. So maybe this is what's causing the error. In any case, yes, it's a good idea to set a timeout. And, it's also a good idea to add some error handling to ensure that IE quits, and all object variables are cleared from memory in the event of an error. I'll amend the code accordingly, when I get a chance.
 
Upvote 0
Okay, I've revised the code to include a specified wait time. I've set it to 30 seconds, but you can change it as desired. I've also included some error handling as well. This will ensure that in an event of a time out or an error, IE will be closed, and the object variables cleared from memory. I've also taken the liberty to make some minor changes and additions, but you can ignore them if you so wish.

Code:
[COLOR=darkblue]Option[/COLOR] [COLOR=darkblue]Explicit[/COLOR]

[COLOR=darkblue]Const[/COLOR] MAX_WAIT_SEC [COLOR=darkblue]As[/COLOR] [COLOR=darkblue]Integer[/COLOR] = 30 [COLOR=green]'change wait time as desired[/COLOR]

[COLOR=darkblue]Sub[/COLOR] GetData()

    [COLOR=darkblue]Dim[/COLOR] IE [COLOR=darkblue]As[/COLOR] [COLOR=darkblue]New[/COLOR] SHDocVw.InternetExplorer
    [COLOR=darkblue]Dim[/COLOR] HTMLDoc [COLOR=darkblue]As[/COLOR] [COLOR=darkblue]New[/COLOR] MSHTML.HTMLDocument
    [COLOR=darkblue]Dim[/COLOR] HTMLRows [COLOR=darkblue]As[/COLOR] MSHTML.IHTMLElementCollection
    [COLOR=darkblue]Dim[/COLOR] HTMLRow [COLOR=darkblue]As[/COLOR] MSHTML.IHTMLElement
    [COLOR=darkblue]Dim[/COLOR] League [COLOR=darkblue]As[/COLOR] [COLOR=darkblue]String[/COLOR]
    [COLOR=darkblue]Dim[/COLOR] sheet [COLOR=darkblue]As[/COLOR] Worksheet
    [COLOR=darkblue]Dim[/COLOR] r [COLOR=darkblue]As[/COLOR] [COLOR=darkblue]Long[/COLOR]
    [COLOR=darkblue]Dim[/COLOR] i [COLOR=darkblue]As[/COLOR] [COLOR=darkblue]Long[/COLOR]
    [COLOR=darkblue]Dim[/COLOR] j [COLOR=darkblue]As[/COLOR] [COLOR=darkblue]Long[/COLOR]
    [COLOR=darkblue]Dim[/COLOR] K [COLOR=darkblue]As[/COLOR] [COLOR=darkblue]Long[/COLOR]
    [COLOR=darkblue]Dim[/COLOR] vMatch [COLOR=darkblue]As[/COLOR] [COLOR=darkblue]Variant[/COLOR]
    [COLOR=darkblue]Dim[/COLOR] sKO [COLOR=darkblue]As[/COLOR] [COLOR=darkblue]String[/COLOR]
    [COLOR=darkblue]Dim[/COLOR] sngStartTime [COLOR=darkblue]As[/COLOR] [COLOR=darkblue]Single[/COLOR]
    [COLOR=darkblue]Dim[/COLOR] strURL [COLOR=darkblue]As[/COLOR] [COLOR=darkblue]String[/COLOR]
    [COLOR=darkblue]Dim[/COLOR] strErrMsg [COLOR=darkblue]As[/COLOR] [COLOR=darkblue]String[/COLOR]
    [COLOR=darkblue]Dim[/COLOR] blnSuccessful [COLOR=darkblue]As[/COLOR] [COLOR=darkblue]Boolean[/COLOR]
    
    [COLOR=darkblue]If[/COLOR] TypeName(ActiveSheet) <> "Worksheet" [COLOR=darkblue]Then[/COLOR]
        MsgBox "Please make sure that a worksheet is the active sheet, " & _
            "and try again.", vbExclamation
        [COLOR=darkblue]Exit[/COLOR] [COLOR=darkblue]Sub[/COLOR]
    [COLOR=darkblue]End[/COLOR] [COLOR=darkblue]If[/COLOR]
    
    [COLOR=darkblue]On[/COLOR] [COLOR=darkblue]Error[/COLOR] [COLOR=darkblue]GoTo[/COLOR] ErrHandler
    
    Columns("B:F").ClearContents
    Columns("B:F").Borders.LineStyle = xlNone
    
    [COLOR=darkblue]With[/COLOR] Application
        .DisplayAlerts = [COLOR=darkblue]False[/COLOR]
        .EnableEvents = [COLOR=darkblue]False[/COLOR]
        .ScreenUpdating = [COLOR=darkblue]False[/COLOR]
    [COLOR=darkblue]End[/COLOR] [COLOR=darkblue]With[/COLOR]
    
    [COLOR=darkblue]Set[/COLOR] sheet = ActiveWorkbook.Sheets("Conversion")
    
    strURL = "http://www.xscores.com/soccer/scheduled_games/25-05"
    
    [COLOR=darkblue]With[/COLOR] IE
        .Navigate strURL
        .Visible = [COLOR=darkblue]False[/COLOR]
        sngStartTime = Timer
        [COLOR=darkblue]Do[/COLOR] [COLOR=darkblue]While[/COLOR] .Busy [COLOR=darkblue]Or[/COLOR] .readyState <> READYSTATE_COMPLETE
            DoEvents
            [COLOR=darkblue]If[/COLOR] Timer - sngStartTime > MAX_WAIT_SEC [COLOR=darkblue]Then[/COLOR]
                strErrMsg = "Unable to connect to :" & vbCrLf & vbCrLf & strURL
                [COLOR=darkblue]GoTo[/COLOR] ErrHandler
            [COLOR=darkblue]End[/COLOR] [COLOR=darkblue]If[/COLOR]
        [COLOR=darkblue]Loop[/COLOR]
    [COLOR=darkblue]End[/COLOR] [COLOR=darkblue]With[/COLOR]
    
    [COLOR=darkblue]Set[/COLOR] HTMLDoc = IE.document
    [COLOR=darkblue]Set[/COLOR] HTMLRows = HTMLDoc.getElementById("scoretable").getElementsByTagName("tr")
    
    r = 2
    [COLOR=darkblue]For[/COLOR] i = 0 [COLOR=darkblue]To[/COLOR] HTMLRows.Length - 1
        sKO = HTMLRows(i).Cells(0).innerText
        [COLOR=darkblue]If[/COLOR] i = 0 [COLOR=darkblue]Or[/COLOR] IsDate(sKO) [COLOR=darkblue]Then[/COLOR]
            Cells(r, "B").Value = sKO
            Cells(r, "B").Borders(xlEdgeLeft).Color = RGB(91, 155, 213)
            League = HTMLRows(i).Cells(IIf(i > 0, 4, 3)).innerText
            Cells(r, "C").Value = League
            vMatch = Application.VLookup(League, sheet.Range("V1:X62"), 3, [COLOR=darkblue]False[/COLOR])
            [COLOR=darkblue]If[/COLOR] [COLOR=darkblue]Not[/COLOR] IsError(vMatch) [COLOR=darkblue]Then[/COLOR]
                Cells(r, "C").Value = vMatch
            [COLOR=darkblue]Else[/COLOR]
                Cells(r, "C").Value = League
            [COLOR=darkblue]End[/COLOR] [COLOR=darkblue]If[/COLOR]
            Cells(r, "D").Value = HTMLRows(i).Cells(IIf(i > 0, 5, 4)).innerText & " (" & HTMLRows(i).Cells(IIf(i > 0, 6, 5)).innerText & ")"
            [COLOR=darkblue]If[/COLOR] i > 0 [COLOR=darkblue]Then[/COLOR]
                Cells(r, "E").Value = GetCountry(HTMLRows(i).Cells(3).getElementsByTagName("a")(0).getAttribute("title"))
            [COLOR=darkblue]Else[/COLOR]
                Cells(r, "E").Value = "Country"
            [COLOR=darkblue]End[/COLOR] [COLOR=darkblue]If[/COLOR]
            Cells(r, "F").Value = HTMLRows(i).Cells(IIf(i > 0, 9, 7)).innerText & " (" & HTMLRows(i).Cells(IIf(i > 0, 10, 8)).innerText & ")"
            Cells(r, "F").Borders(xlEdgeRight).Color = RGB(91, 155, 213)
            r = r + 1
        [COLOR=darkblue]End[/COLOR] [COLOR=darkblue]If[/COLOR]
    [COLOR=darkblue]Next[/COLOR] i
    
    Cells(r - 1, 2).Resize(, 5).Borders(xlEdgeBottom).Color = RGB(91, 155, 213)
    
    blnSuccessful = [COLOR=darkblue]True[/COLOR]
    
ExitHandler:
    [COLOR=darkblue]If[/COLOR] [COLOR=darkblue]Not[/COLOR] IE [COLOR=darkblue]Is[/COLOR] [COLOR=darkblue]Nothing[/COLOR] [COLOR=darkblue]Then[/COLOR]
        IE.Quit
    [COLOR=darkblue]End[/COLOR] [COLOR=darkblue]If[/COLOR]
    
    [COLOR=darkblue]With[/COLOR] Application
        .DisplayAlerts = [COLOR=darkblue]True[/COLOR]
        .EnableEvents = [COLOR=darkblue]True[/COLOR]
        .ScreenUpdating = [COLOR=darkblue]True[/COLOR]
    [COLOR=darkblue]End[/COLOR] [COLOR=darkblue]With[/COLOR]
    
    [COLOR=darkblue]Set[/COLOR] IE = [COLOR=darkblue]Nothing[/COLOR]
    [COLOR=darkblue]Set[/COLOR] HTMLDoc = [COLOR=darkblue]Nothing[/COLOR]
    [COLOR=darkblue]Set[/COLOR] HTMLRows = [COLOR=darkblue]Nothing[/COLOR]
    [COLOR=darkblue]Set[/COLOR] HTMLRow = [COLOR=darkblue]Nothing[/COLOR]
    [COLOR=darkblue]Set[/COLOR] sheet = [COLOR=darkblue]Nothing[/COLOR]
    
    [COLOR=darkblue]If[/COLOR] blnSuccessful [COLOR=darkblue]Then[/COLOR]
        MsgBox r - 3 & " Matches has been Loaded.", vbInformation
    [COLOR=darkblue]End[/COLOR] [COLOR=darkblue]If[/COLOR]
    
    [COLOR=darkblue]Exit[/COLOR] [COLOR=darkblue]Sub[/COLOR]
    
ErrHandler:
    [COLOR=darkblue]If[/COLOR] Len(strErrMsg) > 0 [COLOR=darkblue]Then[/COLOR]
        MsgBox strErrMsg, vbCritical, "Error"
        [COLOR=darkblue]GoTo[/COLOR] ExitHandler
    [COLOR=darkblue]Else[/COLOR]
        [COLOR=darkblue]If[/COLOR] Err <> 0 [COLOR=darkblue]Then[/COLOR]
            MsgBox "Error " & Err.Number & ":  " & Err.Description, vbCritical, "Error"
            [COLOR=darkblue]Resume[/COLOR] ExitHandler
        [COLOR=darkblue]End[/COLOR] [COLOR=darkblue]If[/COLOR]
    [COLOR=darkblue]End[/COLOR] [COLOR=darkblue]If[/COLOR]
    
[COLOR=darkblue]End[/COLOR] [COLOR=darkblue]Sub[/COLOR]
 
Upvote 0

Forum statistics

Threads
1,215,425
Messages
6,124,824
Members
449,190
Latest member
rscraig11

We've detected that you are using an adblocker.

We have a great community of people providing Excel help here, but the hosting costs are enormous. You can help keep this site running by allowing ads on MrExcel.com.
Allow Ads at MrExcel

Which adblocker are you using?

Disable AdBlock

Follow these easy steps to disable AdBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the icon in the browser’s toolbar.
2)Click on the "Pause on this site" option.
Go back

Disable AdBlock Plus

Follow these easy steps to disable AdBlock Plus

1)Click on the icon in the browser’s toolbar.
2)Click on the toggle to disable it for "mrexcel.com".
Go back

Disable uBlock Origin

Follow these easy steps to disable uBlock Origin

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back

Disable uBlock

Follow these easy steps to disable uBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back
Back
Top