Public Sub Extract_Data4()
Dim httpReq As WinHttp.WinHttpRequest
Dim HTMLdoc As HTMLDocument
Dim para As HTMLParaElement
Dim tRows As IHTMLElementCollection
Dim tRow As HTMLTableRow
Dim URLs As Range, URL As Range
Dim destSheet As Worksheet
Dim parts As Variant
Dim matchURL As String, matchOddsURL As String
Dim r As Long, i As Long
Dim matchData(1 To 7) As Variant
Dim HTML As String
Dim bookmakers As Variant, numRows() As Long, maxRows As Long
Dim startTime As Single
startTime = Timer
bookmakers = Array("bet365", "188bet", "pinnacle")
ReDim numRows(UBound(bookmakers))
Set destSheet = ThisWorkbook.Worksheets("XMLhttp_Odds")
With destSheet
.UsedRange.ClearContents
.Range("A1:G1").Value = Array("Country", "League", "Date & Time", "Home", "Away", "Score", "Halfs")
For i = 0 To UBound(bookmakers)
.Range("H1").Offset(, i * 4).Resize(1, 4) = Array("Bookmaker", "Total", bookmakers(i) & " Over", bookmakers(i) & " Under")
Next
End With
r = 2
With ThisWorkbook.Worksheets("MatchDetails")
Set URLs = .Range("D2", .Cells(Rows.Count, "D").End(xlUp))
End With
Set httpReq = New WinHttp.WinHttpRequest
For Each URL In URLs
'Request the match result page by removing "#ou" from the URL
matchURL = Replace(URL.Value, "#ou", "")
With httpReq
.Open "GET", matchURL, False
.setRequestHeader "Host", "www.betexplorer.com"
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko" 'Windows 10, IE11
.setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
.setRequestHeader "Accept-Language", "en-US,en;q=0.5"
.setRequestHeader "Upgrade-Insecure-Requests", "1"
.setRequestHeader "Referer", "http://www.betexplorer.com/soccer/italy/serie-a-2015-2016/results/"
.send
Debug.Print .Status, .statusText
'Put response in a HTMLDocument for parsing
Set HTMLdoc = New HTMLDocument
HTMLdoc.body.innerHTML = .responseText
End With
DoEvents
matchData(1) = HTMLdoc.getElementsByClassName("list-breadcrumb__item")(2).innerText
matchData(2) = HTMLdoc.getElementsByClassName("list-breadcrumb__item")(3).innerText
'Date and time of match is in data-dt attribute of "match-date" P element:
'< P id=match-date class=list-details__item__date data-dt="15,5,2016,18,00">< /P>
Set para = HTMLdoc.getElementById("match-date")
parts = Split(para.getAttribute("data-dt"), ",")
matchData(3) = DateSerial(parts(2), parts(1), parts(0)) + TimeSerial(parts(3), parts(4), 0)
matchData(4) = HTMLdoc.getElementsByTagName("h2")(0).innerText
matchData(5) = HTMLdoc.getElementsByTagName("h2")(2).innerText
matchData(6) = HTMLdoc.getElementById("js-score").innerText
matchData(7) = HTMLdoc.getElementById("js-partial").innerText
'Construct match odds URL from match result URL
parts = Split(URL.Value, "/")
matchOddsURL = "http://www.betexplorer.com/gres/ajax/matchodds.php?p=1&b=ou&e=" & parts(7)
'Request the match odds data. The response is a JSON string containing HTML which itself contains the odds data
With httpReq
.Open "GET", matchOddsURL, False
'.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0" 'Windows 10, Firefox
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko" 'Windows 10, IE11
.setRequestHeader "Accept", "application/json, text/javascript, */*; q=0.01"
.setRequestHeader "Accept-Language", "en-US,en;q=0.5"
.setRequestHeader "X-Requested-With", "XMLHttpRequest"
.setRequestHeader "Referer", URL.Value
.send
Debug.Print .Status, .statusText
'Extract HTML from JSON response and put in HTMLDocument
HTML = Mid(.responseText, Len("{'odds':'") + 1) 'remove {"odds":" at the start
HTML = Left(HTML, Len(HTML) - 2) 'remove "} at the end
HTML = Replace(HTML, "\n", "") 'remove newlines
HTML = Replace(HTML, "\", "") 'remove escape characters (assumes that every "\" is the escape character preceding the escaped character)
Set HTMLdoc = New HTMLDocument
HTMLdoc.body.innerHTML = HTML
End With
DoEvents
'Initialise number of rows found for each bookmaker
For i = 0 To UBound(numRows)
numRows(i) = 0
Next
'Find data for each bookmaker and extract to Excel
Set tRows = HTMLdoc.getElementsByTagName("TR")
For Each tRow In tRows
If tRow.getElementsByTagName("TABLE").Length = 0 Then 'no inner tables?
For i = 0 To UBound(bookmakers)
If InStr(1, tRow.innerText, bookmakers(i), vbTextCompare) > 0 Then
destSheet.Cells(r + numRows(i), "H").Offset(, i * 4).Value = tRow.Cells(0).innerText
destSheet.Cells(r + numRows(i), "I").Offset(, i * 4).Value = tRow.Cells(3).innerText
destSheet.Cells(r + numRows(i), "J").Offset(, i * 4).Value = tRow.Cells(4).getAttribute("data-odd")
destSheet.Cells(r + numRows(i), "K").Offset(, i * 4).Value = tRow.Cells(5).getAttribute("data-odd")
numRows(i) = numRows(i) + 1
End If
Next
End If
Next
'Find maximum number of rows output for all bookmakers
maxRows = 0
For i = 0 To UBound(numRows)
If numRows(i) > maxRows Then maxRows = numRows(i)
Next
'Set the next start row
If maxRows > 0 Then
destSheet.Cells(r, "A").Resize(maxRows, 7) = matchData
r = r + maxRows
Else
destSheet.Cells(r, "A").Resize(1, 7) = matchData
r = r + 1
End If
Next
Debug.Print "Elapsed time = " & Timer - startTime & " seconds"
Debug.Print "Elapsed time = " & Format(TimeSerial(0, 0, Timer - startTime), "hh:mm:ss")
End Sub