Excel vba scraper
I ran the code, but there's a problem. Input is omitted if a bookmaker does not exist.
If there are no bookmaker, the following information shall be entered.
< "Country", "League", "Date & Time", "Home", "Away", "Score", "Halfs", >
ex)
bookmaker yes : < "Country", "League", "Date & Time", "Home", "Away", "Score", "Halfs", "Bookmaker", "Total", "Over", "Under" >
bookmaker non : < "Country", "League", "Date & Time", "Home", "Away", "Score", "Halfs", >
Public Sub Extract_Data2()
Dim httpReq As WinHttp.WinHttpRequest
Dim HTMLdoc As HTMLDocument
Dim para As HTMLParaElement
Dim tRows As IHTMLElementCollection
Dim tRow As HTMLTableRow
Dim URLs As Range, URL As Range
Dim destSheet As Worksheet
Dim parts As Variant
Dim matchURL As String, matchOddsURL As String
Dim r As Long
Dim matchData(1 To 7) As Variant
Dim HTML As String
Dim startTime As Single
startTime = Timer
Set destSheet = ThisWorkbook.Worksheets("Ou_Odds")
With destSheet
.UsedRange.ClearContents
.Range("A1:K1").Value = Array("Country", "League", "Date & Time", "Home", "Away", "Score", "Halfs", "Bookmaker", "Total", "Over", "Under")
End With
r = 2
With ThisWorkbook.Worksheets("MatchDetails")
Set URLs = .Range("D2", .Cells(Rows.Count, "D").End(xlUp))
End With
Set httpReq = New WinHttp.WinHttpRequest
For Each URL In URLs
'Request the match result page by removing "#ou" from the URL
matchURL = Replace(URL.Value, "#ou", "")
Debug.Print matchURL
With httpReq
.Open "GET", matchURL, False
.setRequestHeader "Host", "www.betexplorer.com"
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
.setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
.setRequestHeader "Accept-Language", "en-US,en;q=0.5"
.setRequestHeader "Upgrade-Insecure-Requests", "1"
.setRequestHeader "Referer", "http://www.betexplorer.com/soccer/italy/serie-a-2015-2016/results/"
.send
Debug.Print .Status, .statusText
'Put response in a HTMLDocument for parsing
Set HTMLdoc = New HTMLDocument
HTMLdoc.body.innerHTML = .responseText
End With
DoEvents
matchData(1) = HTMLdoc.getElementsByClassName("list-breadcrumb__item")(2).innerText
matchData(2) = HTMLdoc.getElementsByClassName("list-breadcrumb__item")(3).innerText
'Date and time of match is in data-dt attribute of "match-date" P element:
'< P id=match-date class=list-details__item__date data-dt="15,5,2016,18,00">< /P>
Set para = HTMLdoc.getElementById("match-date")
parts = Split(para.getAttribute("data-dt"), ",")
matchData(3) = DateSerial(parts(2), parts(1), parts(0)) + TimeSerial(parts(3), parts(4), 0)
matchData(4) = HTMLdoc.getElementsByTagName("h2")(0).innerText
matchData(5) = HTMLdoc.getElementsByTagName("h2")(2).innerText
matchData(6) = HTMLdoc.getElementById("js-score").innerText
matchData(7) = HTMLdoc.getElementById("js-partial").innerText
'Construct match odds URL from match result URL
parts = Split(URL.Value, "/")
matchOddsURL = "http://www.betexplorer.com/gres/ajax/matchodds.php?p=1&b=ou&e=" & parts(7)
Debug.Print matchOddsURL
'Request the match odds data. The response is a JSON string containing HTML which itself contains the odds data
With httpReq
.Open "GET", matchOddsURL, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
.setRequestHeader "Accept", "application/json, text/javascript, */*; q=0.01"
.setRequestHeader "Accept-Language", "en-US,en;q=0.5"
.setRequestHeader "X-Requested-With", "XMLHttpRequest"
.setRequestHeader "Referer", URL.Value
.send
Debug.Print .Status, .statusText
'Extract HTML from JSON response and put in HTMLDocument
HTML = Mid(.responseText, Len("{'odds':'") + 1) 'remove {"odds":" at the start
HTML = Left(HTML, Len(HTML) - 2) 'remove "} at the end
HTML = Replace(HTML, "\n", "") 'remove newlines
HTML = Replace(HTML, "", "") 'remove escape characters (assumes that every "" is the escape character preceding the escaped character)
Set HTMLdoc = New HTMLDocument
HTMLdoc.body.innerHTML = HTML
End With
DoEvents
Set tRows = HTMLdoc.getElementsByTagName("TR")
For Each tRow In tRows
If tRow.getElementsByTagName("TABLE").Length = 0 Then 'no inner tables?
If tRow.innerText Like "*bet365*" Then
destSheet.Cells(r, "A").Resize(1, 7) = matchData
destSheet.Cells(r, "H").Value = tRow.Cells(0).innerText
destSheet.Cells(r, "I").Value = tRow.Cells(3).innerText
destSheet.Cells(r, "J").Value = tRow.Cells(4).getAttribute("data-odd")
destSheet.Cells(r, "K").Value = tRow.Cells(5).getAttribute("data-odd")
r = r + 1
End If
End If
Next
Next
Debug.Print "Elapsed time = " & Timer - startTime & " seconds"
End Sub