Speed of multiple URL web scraping

johnnyL

Board Regular
Joined
Nov 7, 2011
Messages
94
Need some help with this code.

1) The current code leaves a bunch of IE windows open.
2) The current code leads to a Run-time error '-2147437259 (80004005)':
3) It takes forever to run, Hopefully someone can assist me in converting it to use MSXML2.XMLHTTP60 for example, I heard that works faster.


Code:
'
'-----------------------------------------------------
'   Run-time error '-2147437259 (80004005)':    ' This Error Occurs, eventually, in the 'Yahoo_One_Year_Estimates_Scrape_Error' section \/ \/ \/
'                                                   Also many internet explorer windows are left open that should have been closed
'
'   Automation Error
'   Unspecified Error
'-----------------------------------------------------
'
'
'   Global Variables That will be used
'
    Public Doc                                      As HTMLDocument
'
    Public StockMainPageURL                         As String       ' This will be the main portion of the URL that we send to Internet Explorer
    Public TotalURL                                 As String       ' This will be the complete URL that we send to Internet Explorer
'
    Public CellCounter                              As Integer      ' This will be used to adjust left to right on web site cells
    Public RowCounter                               As Integer      ' This adjusts the offset from the top of the spreadsheet to the start of the columns
    Public StockCount                               As Integer      ' This counts the actual stocks being analyzed currently
    Public TotalStocksToLoad                        As Integer      ' This counts the stocks that should be analyzed right now
'
    Public PageLoadAttempt                          As Long         ' This counts the number of times we have tried to load a page
'
'-------------------------------------------------------------------------------------------------------------------------------
'
Private Sub RefreshEntireDocument_Click()
'
'   This will Clear certain cell values in the spreadsheet when the $B$1 'Refresh' cell is clicked
'
    Range("$B$5:$K$254").Select                                 ' Select the range of $B$5 thru $J$254
    Selection.ClearContents                                     ' Delete the contents of this range
'
'
' -------------------------------------------------------------------------------------------------------------------------
'
'   Scrape stocks to consider looking into further from 1st URL page
'
    RowCounter = 5                                              ' Start loading stock values recieved into the 5th row of Excel
    MaxYahooDelay = 0                                           ' Initialize MaxYahooDelay = 0
'
    CellCounter = 0                                             ' Left to right cell counter
    PageLoadAttempt = 0                                         ' Initialize PageLoadAttempt = 0
    TotalStocksToLoad = 100                                     ' we will Scrape this amount of stocks from the 1st loaded page of stocks
'
    Call Scrape_BarChart_Stock_Page_1                           ' Scrape the amount of TotalStocksToLoad into excel
'
' -------------------------------------------------------------------------------------------------------------------------
'
'   Scrape stocks to consider looking into further from 2nd URL page
'
    CellCounter = 0                                             ' Left to right cell counter
    PageLoadAttempt = 0                                         ' Initialize PageLoadAttempt = 0
    TotalStocksToLoad = 100                                     ' we will Scrape this amount of stocks from the 2nd loaded page of stocks
'
    Call Scrape_BarChart_Stock_Page_2                           ' Scrape the amount of TotalStocksToLoad into excel
'
' -------------------------------------------------------------------------------------------------------------------------
'
'   Scrape stocks to consider looking into further from 3rd URL page
'
    CellCounter = 0                                             ' Left to right cell counter
    PageLoadAttempt = 0                                         ' Initialize PageLoadAttempt = 0
    TotalStocksToLoad = 50                                      ' we will Scrape this amount of stocks from the 3rd loaded page of stocks
'
    Call Scrape_BarChart_Stock_Page_3                           ' Scrape the amount of TotalStocksToLoad into excel
'
' -------------------------------------------------------------------------------------------------------------------------
' -------------------------------------------------------------------------------------------------------------------------
'
'   Scrape values from Yahoo to Update the one year estimates from previous pages of stocks scraped
'
    RowCounter = 5                                              ' Start loading stock values recieved into the 5th row of Excel
    PageLoadAttempt = 0                                         ' Initialize PageLoadAttempt = 0
    TotalYahooDelay = 0                                         ' Initialize TotalYahooDelay = 0
    TotalYahooPageAttempts = 0                                  ' Initialize TotalYahooPageAttempts = 0
    TotalStocksToLoad = 250                                     ' we will Scrape this amount of stocks from the 3rd loaded page of stocks

    Call Scrape_Yahoo_One_Year_Estimates                        ' Scrape the amount of TotalStocksToLoad into excel
'
' -------------------------------------------------------------------------------------------------------------------------
'
'   Display some final results in the status bar
    Application.StatusBar = "Spreadsheet Refreshing Complete :)" ' & "    Avg Yahoo Delay = " & AvgYahooDelay & "     Avg Yahoo Page Attempts = " & AvgYahooPageAttempts
'
End Sub
'-------------------------------------------------------------------------------------------------------------------------------
'-------------------------------------------------------------------------------------------------------------------------------
'
Private Sub Scrape_Yahoo_One_Year_Estimates()                       ' *** Good up to here ***
'
'
    For StockCount = 1 To TotalStocksToLoad                         ' Grab One Year stock price estimate
'
'
ReloadScrape_Yahoo_One_Year_Estimates:
'
'       Load all of the Update one year estimates
        DelaySeconds = 0                                            '   Initialize DelaySeconds to zero
        PageLoadAttempt = PageLoadAttempt + 1                       '   Add 1 to our PageLoadAttempt counter
''''        TotalYahooPageAttempts = TotalYahooPageAttempts + 1         '   This will be the total yahoo Page Attempts
'
        StockMainPageURL = "finance.yahoo.com/quote/"               '   This will be the main portion of the URL that we send to Internet Explorer
        CurrentStockSymbol = Trim(Range("B" & RowCounter).Value)    '   This is the stock symbol that we will be addressing
'
'       Setup and Load the Internet Explorer Page ...
''''        Dim IE As New SHDocVw.InternetExplorer  ' This works
        Dim IE As New InternetExplorer
''      Dim IE As MSXML2.XMLHTTP60
''      Set IE = New MSXML2.XMLHTTP60
'
        TotalURL = "https://" & StockMainPageURL & CurrentStockSymbol   ' This will be the complete URL that we send to Internet Explorer
'
        If CurrentStockSymbol = 0 Or CurrentStockSymbol = "" Or IsEmpty(CurrentStockSymbol) = True Then ' If no stock symbol found @ $B?  then ...
            PageLoadAttempt = 0                                                                         '   Reset PageLoadAttempt = 0
            StockCount = TotalStocksToLoad                                                              '   Indicate no more stocks to load
'
            IE.Quit                                                                                     '   Close Internet Explorer Window
            Set IE = Nothing                                                                            '   Clear Internet Explorer Memory
'
            Exit Sub                                                                                    '   Exit this sub
        Else
'
            On Error GoTo Yahoo_One_Year_Estimates_Scrape_Error                                         '   If Error occurs then goto Yahoo_One_Year_Estimates_Scrape_Error
'
            Set IE = New InternetExplorer                                                               '   Open Internet Explorer Browser
'
'           Browser address that we will be scraping values from
            IE.navigate TotalURL                                                                        '   Load the Internet Explorer URL
'
'           Make the Browser window, that we will be scraping values from, visible
            IE.Visible = True                                           '   Make Internet Explorer Windows Visible
'
'           Allow mouse clicks and such while browser window is loading ... Loop until browser window is fuilly loaded, ie. READYSTATE_COMPLETE
            Do While IE.readyState <> 4 And DelaySeconds <= 19                                          '   Loop while IE is still loading and <= 19 seconds delayed
''              Application.Wait DateAdd("s", 1, Now)
                Application.Wait (Now + TimeValue("00:00:01"))                                          '   Delay for 1 second
                DoEvents                                                                                '   Enable Mouse Clicks
'
'               Update status bar to inform the user of what is occurring
                Application.StatusBar = "Loading website … " & TotalURL & "    Stock # " & (RowCounter - 4) ''''& _
''''                                "   Delay Seconds =  " & DelaySeconds & "    Page Load Attempts = " & PageLoadAttempt & _
''''                                "   Avg Yahoo Delay = " & AvgYahooDelay & "     AvgYahooPageAttempts = " & AvgYahooPageAttempts
'
                DelaySeconds = DelaySeconds + 1                         '   Add 1 to our DelaySeconds Counter
'
''''                If DelaySeconds > MaxYahooDelay Then MaxYahooDelay = DelaySeconds   '   Save the MaxYahooDelay
''                  TotalYahooDelay = TotalYahooDelay + 1
'
            Loop                                                        ' Loop back
'
'           Allow mouse clicks and such while browser window is loading ... Loop until browser window is fuilly loaded, ie. READYSTATE_COMPLETE
            Do While IE.Busy And DelaySeconds <= 19 ' Or IE.readyState <> 4 And DelaySeconds <= 19  ' Loop while IE is still loading and <= 19 seconds delayed
''              Application.Wait DateAdd("s", 1, Now)
                Application.Wait (Now + TimeValue("00:00:01"))          '   Delay for 1 second
                DoEvents                                                '   Enable Mouse Clicks
'
'               Update status bar to inform the user of what is occurring
                Application.StatusBar = "Loading website … " & TotalURL & "    Stock # " & (RowCounter - 4) ''''& _
''''                                "   Delay Seconds =  " & DelaySeconds & "    Page Load Attempts = " & PageLoadAttempt & _
''''                                "   Avg Yahoo Delay = " & AvgYahooDelay & "     AvgYahooPageAttempts = " & AvgYahooPageAttempts
'
                DelaySeconds = DelaySeconds + 1                         '   Add 1 to our DelaySeconds Counter
'
''''                If DelaySeconds > MaxYahooDelay Then MaxYahooDelay = DelaySeconds   '   Save the MaxYahooDelay
            Loop                                                        ' Loop back
'
'
            If DelaySeconds > 19 Then                                   ' If we have delayed for > 19 seconds to allow the page to load then ...
                IE.Quit                                                 '   Close Internet Explorer Window
'
                If PageLoadAttempt <= 4 Then GoTo ReloadScrape_Yahoo_One_Year_Estimates '   If we have'nt tried 4 reloads of this page then reload page again
            End If                                                      ' End If
'
            If PageLoadAttempt > 4 Then                                 ' If we have tried 4 reloads of the URL page then Display a message box & Exit program
                MsgBox "We've reloaded the same web page  " & PageLoadAttempt & " times without success so we're going to pause the program" & _
                " so you can investigate.", , "Multiple errors detected"
'
                PageLoadAttempt = 0                                     '   Reset PageLoadAttempt = 0
'
                Stop                                                    '   Stop this Excel program!
            End If
'
            Set Doc = IE.document
'
        End If
'
'
''''        TotalYahooDelay = TotalYahooDelay + DelaySeconds
''''        AvgYahooDelay = TotalYahooDelay / (RowCounter - 4)
''''        AvgYahooPageAttempts = TotalYahooPageAttempts / (RowCounter - 4)
'
'       Update status bar to inform the user of what is occurring
        Application.StatusBar = "Gathering Data from website … " & TotalURL & "    Stock # " & (RowCounter - 4) ''''& _
''''                                "   Delay Seconds =  " & DelaySeconds & "    Page Load Attempts = " & PageLoadAttempt & _
''''                                "   Avg Yahoo Delay = " & AvgYahooDelay & "    AvgYahooPageAttempts = " & AvgYahooPageAttempts
'
        Range("J" & RowCounter).Value = Doc.getElementsByTagName("td")(11).innerText        '   Scrape the Yahoo 52 Week Price Range
        Range("K" & RowCounter).Value = Doc.getElementsByTagName("td")(31).innerText        '   Scrape the Yahoo One Year Price Estimate
'
        On Error GoTo 0                                                                     '   Clear Errors & Set Excel Error handling to Default
'
        RowCounter = RowCounter + 1                                                         '   Advance to next row in Excel sheet
'
        IE.Quit                                                                             '   Close Internet Explorer Window
        Set IE = Nothing                                                                    '   Clear Internet Explorer Memory
'
        PageLoadAttempt = 0                                                                 '   Reset PageLoadAttempt = 0
'
    Next                                                                                    '   Load next stock until all are loaded
'
    Exit Sub                                                                                ' Exit this Sub
'
Yahoo_One_Year_Estimates_Scrape_Error:
'
'   Tried this solution from google \/ \/ to solve errors, No luck :(                       ' Shut down all Internet Explorer windows
''    Dim wsh As Object
''    Dim windowStyle As Integer: windowStyle = 1
''    Dim waitOnReturn As Boolean: waitOnReturn = True
'
''    Set wsh = VBA.CreateObject("Wscript.Shell")
''    wsh.Run "taskkill /F /IM iexplore.exe", windowStyle, waitOnReturn
'
'
'
''    IE.Quit                                                                             '   Close Internet Explorer Window
    Set IE = Nothing                                                                    '   Clear Internet Explorer Memory
'
'   This works some what
    Set IE = New InternetExplorer                                                           ' Open Internet Explorer Browser
'
'
    Resume Next                                                                             ' Go back to the next line after the previous error occurred
'
End Sub
'________________________________________________________________________________________________________________________________________________________
 

Dan_W

Active Member
Joined
Jul 11, 2018
Messages
258
Office Version
  1. 365
Platform
  1. Windows
Sorry - I wasn't very clear:- Yes, it is possible to get all pieces of information through MSXML (and yes, I have managed to get it) except for the information that you're pulling through the right-border-separator class. Is this piece of information something that you can get from elsewhere, if in fact you do need it? It is not an issue with MSXML - that particular information is generated by the browser (which MSXML is not).
 

Some videos you may like

Excel Facts

Ambidextrous Undo
Undo last command with Ctrl+Z or Alt+Backspace. If you use the Undo icon in the QAT, open the drop-down arrow to undo up to 100 steps.

johnnyL

Board Regular
Joined
Nov 7, 2011
Messages
94
It would be nice to have that one bit of data, but I don't need it for every stock per se. I could always manually look up the value if the rest of the desired values show that it is worthy of looking up. Out of the 250 stocks that are researched, I would say maybe 20 - 30 would necessitate manually looking up that value that MSXML can't obtain.
 

johnnyL

Board Regular
Joined
Nov 7, 2011
Messages
94
Well, it's not really the Yahoo part of the process that's the worry, is it? From memory, didn't we get that down to 4 mins?Also, from memory, it was only ever scraping three pages, so that would only require 3 bots. I thnk the more concerning area is the 250 pages that need to accessed via Internet Explorer. On that, quick question - how critically important to you is it to get that information in the right-border-separator class? I ask because it appears that that is the only piece of information generated by the Javascript. It may be possible to access the rest quicker if that part can be jettisoned.
Latest update ...
 

Attachments

  • 4.4 Capture.JPG
    4.4 Capture.JPG
    47.5 KB · Views: 6

johnnyL

Board Regular
Joined
Nov 7, 2011
Messages
94
Here is a link to the workbook, comprising one worksheet with a command button, and one standard module (modScrape). I set out that code in full below, on the off-chance it may be of use to someone else:

VBA Code:
'Remember to add reference to Microsoft HTML Object Library

Sub ScrapeSite()
    Dim Doc                          As HTMLDocument
    Dim StockMainPageURL             As String       ' This will be the main portion of the URL that we send to Internet Explorer
    Dim TotalURL                     As String       ' This will be the complete URL that we send to Internet Explorer
    Dim RowCounter                   As Integer      ' This adjusts the offset from the top of the spreadsheet to the start of the columns
    Dim StockCount                   As Integer      ' This counts the actual stocks being analyzed currently
    Dim strHTML                      As String
   
    Set Doc = New HTMLDocument
    StockMainPageURL = "https://finance.yahoo.com/quote/"               '   This will be the main portion of the URL
    RowCounter = 5
    TotalStocksToLoad = 250
    For StockCount = 1 To TotalStocksToLoad                                          ' Grab One Year stock price estimate
        currentstocksymbol = Trim(Range("B" & RowCounter).value)      '   This is the stock symbol that we will be addressing
        If currentstocksymbol = vbNullString Then Exit For
        TotalURL = StockMainPageURL & currentstocksymbol                 ' This will be the complete URL
        strHTML = GetHTML(TotalURL)
       
        Doc.body.innerhtml = strHTML
       
        Range("J" & RowCounter).value = Doc.getElementsByTagName("td")(11).innerText
        Range("K" & RowCounter).value = Doc.getElementsByTagName("td")(31).innerText
       
        RowCounter = RowCounter + 1
    Next
   
    Set Doc = Nothing
   
End Sub

Function GetHTML(strURL As String) As String
    Dim objHTTP As Object, strTemp As String
    Set objHTTP = CreateObject("MSXML2.ServerXMLHTTP")
    objHTTP.Open "GET", strURL, False
    objHTTP.send
    If objHTTP.Status = 200 Then
        strTemp = objHTTP.responseText
    Else
        'There has been an error
        strTemp = ""
    End If
    GetHTML = strTemp
End Function

That /\ /\ /\ was the previous code posted by Dan_W as an example to scrape the yahoo website via MSXML2 that worked for me. :)

Since it has been stated that the BarChart site can be scraped with same method, I tried the following code ...

VBA Code:
'Remember to add reference to Microsoft HTML Object Library

Sub ScrapeSite()
    Dim Doc                          As HTMLDocument
    Dim StockMainPageURL             As String       ' This will be the main portion of the URL that we send to Internet Explorer
    Dim TotalURL                     As String       ' This will be the complete URL that we send to Internet Explorer
    Dim RowCounter                   As Integer      ' This adjusts the offset from the top of the spreadsheet to the start of the columns
    Dim StockCount                   As Integer      ' This counts the actual stocks being analyzed currently
    Dim strHTML                      As String
    
    Set Doc = New HTMLDocument
'
''    StockMainPageURL = "https://finance.yahoo.com/quote/"               '   This will be the main portion of the URL
'
' Replace with ...
    StockMainPageURL = "https://www.barchart.com/stocks/quotes/"               '   This will be the main portion of the URL
'
    RowCounter = 5
    TotalStocksToLoad = 250
    For StockCount = 1 To TotalStocksToLoad                                          ' Grab One Year stock price estimate
        currentstocksymbol = Trim(Range("B" & RowCounter).Value)      '   This is the stock symbol that we will be addressing
        If currentstocksymbol = vbNullString Then Exit For
''        TotalURL = StockMainPageURL & currentstocksymbol                 ' This will be the complete URL
'
' Replace with ...
        TotalURL = StockMainPageURL & currentstocksymbol & "/analyst-ratings"                 ' This will be the complete URL"
'
        strHTML = GetHTML(TotalURL)
'
        Doc.body.innerHTML = strHTML
'
''        Range("J" & RowCounter).Value = Doc.getElementsByTagName("td")(11).innerText
''        Range("K" & RowCounter).Value = Doc.getElementsByTagName("td")(31).innerText
'
' Replace with ...
''        Range("O" & RowCounter).Value = num(Doc.getElementsByClassName("right-border-separator")(1).innerText) ' Avg Analyst 1 yr price ... MSXML Can't get this
        Range("P" & RowCounter).Value = Doc.getElementsByClassName("block__colored-header")(3).innerText        ' Analyst stock strength
        Range("Q" & RowCounter).Value = Doc.getElementsByClassName("block__average_value")(3).innerText         ' Analyst rating 1 - 5
        Range("R" & RowCounter).Value = Doc.getElementsByClassName("bold")(3).innerText                         ' # of analysts
'
        RowCounter = RowCounter + 1
    Next
    
    Set Doc = Nothing
    
End Sub

Function GetHTML(strURL As String) As String
    Dim objHTTP As Object, strTemp As String
    Set objHTTP = CreateObject("MSXML2.ServerXMLHTTP")
    objHTTP.Open "GET", strURL, False
    objHTTP.send
    If objHTTP.Status = 200 Then
        strTemp = objHTTP.responseText
    Else
        'There has been an error
        strTemp = ""
    End If
    GetHTML = strTemp
End Function

I still get the same error that I mentioned recently ... "An error occurred in the secure channel support"

Any bones that can be thrown to me to let me know what I have done wrong?
 

Dan_W

Active Member
Joined
Jul 11, 2018
Messages
258
Office Version
  1. 365
Platform
  1. Windows

ADVERTISEMENT

That /\ /\ /\ was the previous code posted by Dan_W as an example to scrape the yahoo website via MSXML2 that worked for me. :)

Since it has been stated that the BarChart site can be scraped with same method, I tried the following code ...

VBA Code:
'Remember to add reference to Microsoft HTML Object Library

Sub ScrapeSite()
    Dim Doc                          As HTMLDocument
    Dim StockMainPageURL             As String       ' This will be the main portion of the URL that we send to Internet Explorer
    Dim TotalURL                     As String       ' This will be the complete URL that we send to Internet Explorer
    Dim RowCounter                   As Integer      ' This adjusts the offset from the top of the spreadsheet to the start of the columns
    Dim StockCount                   As Integer      ' This counts the actual stocks being analyzed currently
    Dim strHTML                      As String
   
    Set Doc = New HTMLDocument
'
''    StockMainPageURL = "https://finance.yahoo.com/quote/"               '   This will be the main portion of the URL
'
' Replace with ...
    StockMainPageURL = "https://www.barchart.com/stocks/quotes/"               '   This will be the main portion of the URL
'
    RowCounter = 5
    TotalStocksToLoad = 250
    For StockCount = 1 To TotalStocksToLoad                                          ' Grab One Year stock price estimate
        currentstocksymbol = Trim(Range("B" & RowCounter).Value)      '   This is the stock symbol that we will be addressing
        If currentstocksymbol = vbNullString Then Exit For
''        TotalURL = StockMainPageURL & currentstocksymbol                 ' This will be the complete URL
'
' Replace with ...
        TotalURL = StockMainPageURL & currentstocksymbol & "/analyst-ratings"                 ' This will be the complete URL"
'
        strHTML = GetHTML(TotalURL)
'
        Doc.body.innerHTML = strHTML
'
''        Range("J" & RowCounter).Value = Doc.getElementsByTagName("td")(11).innerText
''        Range("K" & RowCounter).Value = Doc.getElementsByTagName("td")(31).innerText
'
' Replace with ...
''        Range("O" & RowCounter).Value = num(Doc.getElementsByClassName("right-border-separator")(1).innerText) ' Avg Analyst 1 yr price ... MSXML Can't get this
        Range("P" & RowCounter).Value = Doc.getElementsByClassName("block__colored-header")(3).innerText        ' Analyst stock strength
        Range("Q" & RowCounter).Value = Doc.getElementsByClassName("block__average_value")(3).innerText         ' Analyst rating 1 - 5
        Range("R" & RowCounter).Value = Doc.getElementsByClassName("bold")(3).innerText                         ' # of analysts
'
        RowCounter = RowCounter + 1
    Next
   
    Set Doc = Nothing
   
End Sub

Function GetHTML(strURL As String) As String
    Dim objHTTP As Object, strTemp As String
    Set objHTTP = CreateObject("MSXML2.ServerXMLHTTP")
    objHTTP.Open "GET", strURL, False
    objHTTP.send
    If objHTTP.Status = 200 Then
        strTemp = objHTTP.responseText
    Else
        'There has been an error
        strTemp = ""
    End If
    GetHTML = strTemp
End Function

I still get the same error that I mentioned recently ... "An error occurred in the secure channel support"

Any bones that can be thrown to me to let me know what I have done wrong?
Ok, so I rewrote the Phase 3 code as below. It first loads all the StockSymbols into an array, and then starts looping through them.
Rather than using Internet Explorer, it relies on the GetHTML function (which uses MSXML2), and scrapes 3 of the 4 pieces of data sought in your original code. The data is loaded into a two-dimensional array, and after scraping all 250 pages, the array is pasted direct to the Worksheet.

I have tried it 5 to 6 times, and it worked each time within 5 mins. Give it a try and see how it goes.

VBA Code:
Sub Phase3()
    
    Dim DOC                      As HTMLDocument
    Dim ar(1 To 250, 1 To 3) As String
    Dim URL             As String
    Dim Stocksymbols    As Variant
    
    RowCounter = 4
    TotalStocksToLoad = 30
    BaseURL = "https://www.barchart.com/stocks/quotes/"
    Stocksymbols = WorksheetFunction.Transpose(Range("B5:B" & TotalStocksToLoad + RowCounter).Value)
    For I = 11 To UBound(Stocksymbols)
        DoEvents
        currentstocksymbol = Stocksymbols(I)
        Application.StatusBar = "Processing " & I & " - " & currentstocksymbol
        URL = BaseURL & currentstocksymbol & "/analyst-ratings"
        
        strCode = GetHTML(URL)
        Set DOC = New HTMLDocument
        DOC.body.innerHTML = strCode
        On Error Resume Next
        
        ar(I, 1) = DOC.getElementsByClassName("block__colored-header")(3).innerText
        ar(I, 2) = DOC.getElementsByClassName("block__average_value")(3).innerText
        ar(I, 3) = DOC.getElementsByClassName("bold")(3).innerText
    Next
    
    Range("P5").Resize(TotalStocksToLoad, 3).Value = ar
End Sub
 

Dan_W

Active Member
Joined
Jul 11, 2018
Messages
258
Office Version
  1. 365
Platform
  1. Windows

ADVERTISEMENT

Can you try running that Phase3 subroutine in a completely fresh workbook, including the GetHTML function and if the error occurs again, can you please tell me where it occurs.
 

johnnyL

Board Regular
Joined
Nov 7, 2011
Messages
94
Can you try running that Phase3 subroutine in a completely fresh workbook, including the GetHTML function and if the error occurs again, can you please tell me where it occurs.
Hey Dan_W!

Yes, I had/have the code in a separate fresh workbook. After much googling, it ended up being one line of code that I had to alter in the GetHTML Function to get it running.

VBA Code:
Set objHTTP = CreateObject("MSXML2.ServerXMLHTTP")

I had to alter that line to ...
VBA Code:
Set objHTTP = New MSXML2.XMLHTTP

Perhaps some one can explain why that one change I made allows it to run for me?

Now I will go back to playing with more code. :)
 

johnnyL

Board Regular
Joined
Nov 7, 2011
Messages
94
Ok, a day later of playing with code.

I tried some WinHTTP code that I found/altered, that works for some HTTPS sites. I initially thought that HTTPS sites would not work with WinHTTP. It turns out that HTTPS is not a barrier, per se. WinHTTP does work with some HTTPS sites. Then I decided to look into why it works for some HTTPS sites and not others. Well, TLS is usually the answer. WinHTTP supports up to TLS1.1, from my googling, any level above that apparently is not supported. I saw some discussion about messing with the registry, or adding settings to the excel file, but I didn't find any majority belief that those approaches were 100%.

Where am I going with all this? Well the BarChart site requires TLS1.3, apparently, so WinHTTP is not going to be an option. :(

Dan_Ws latest code here, that uses the XML approach, worked for 3 of the 4 values that I wanted to scrape from my phase3 BarChart site. So I decided to see if I could convert that code to scrape my desired Phase1 BarChart desired values. Well my conversion runs error free, however it does not yield any values, :( , the older IE method still yields correct values.

I am assuming that this is because the phase1 URL is javascript, as was previously mentioned, for causing problems with scraping the data via MSXML?

Any thoughts?
 

johnnyL

Board Regular
Joined
Nov 7, 2011
Messages
94
The following is the Dan_W code that scrapes 3 of the 4 desired values from the Phase3 :) ...

VBA Code:
'
'   *** Have to have references to Microsoft HTML Object Library & Microsoft XML, V3.0 or 6.0 ***
'
Sub Phase3()
    
    Dim Doc                         As HTMLDocument
    Dim ar(1 To 250, 1 To 3)        As String
    Dim URL                         As String
    Dim Stocksymbols                As Variant
'
    Set Doc = New HTMLDocument
'
    RowCounter = 5
    TotalStocksToLoad = 30
'
    BaseURL = "https://www.barchart.com/stocks/quotes/"
'
    Stocksymbols = WorksheetFunction.Transpose(Range("B5:B" & TotalStocksToLoad + RowCounter).Value)
'
    For I = 1 To UBound(Stocksymbols)
'
        DoEvents
'
        currentstocksymbol = Stocksymbols(I)
'
        Application.StatusBar = "Processing " & I & " - " & currentstocksymbol
'
        URL = BaseURL & currentstocksymbol & "/analyst-ratings"
'
        Doc.body.innerHTML = GetHTML(URL)
'
        On Error Resume Next
'
        ar(I, 1) = Doc.getElementsByClassName("block__colored-header")(3).innerText
        ar(I, 2) = Doc.getElementsByClassName("block__average_value")(3).innerText
        ar(I, 3) = Doc.getElementsByClassName("bold")(3).innerText
    Next
    
    Range("P5").Resize(TotalStocksToLoad, 3).Value = ar
End Sub


Function GetHTML(strURL As String) As String
'
    Dim objHTTP As Object
    Dim strTemp As String
'
''    Set objHTTP = CreateObject("MSXML2.ServerXMLHTTP")
' Changed /\ /\ to \/ \/ and it works for me now :)
    Set objHTTP = New MSXML2.XMLHTTP
'
    objHTTP.Open "GET", strURL, False
    objHTTP.send
'
    If objHTTP.Status = 200 Then
        strTemp = objHTTP.responseText
    Else
        'There has been an error
        strTemp = ""
    End If
'
    GetHTML = strTemp
End Function


The following is the code that I came up with to try and make that approach work for my Phase1 ...
VBA Code:
'
'   *** Have to have references to Microsoft HTML Object Library & Microsoft XML, V3.0 or 6.0 ***
'
Sub Phase1()
    
    Dim Doc                         As HTMLDocument
    Dim URL                         As String
    Dim Stocksymbols                As Variant
'
    Set Doc = New HTMLDocument
'
    CellCounter = 0                                                         ' Left to right cell counter on the web page that is being scraped
    RowCounter = 5
    TotalStocksToLoad = 30
'
    URL = "https://www.barchart.com/stocks/performance/percent-change/declines?timeFrame=3m&viewName=main"
'
    Application.StatusBar = "Processing "
'
    DoEvents
'
    Doc.body.innerHTML = GetHTML(URL)
'
    For I = 1 To TotalStocksToLoad
'
        DoEvents
'
        On Error Resume Next
'
'       These values work with the IE method
        Range("A" & RowCounter).Value = (RowCounter - 4)                                                '   Stock number Counter
        Range("B" & RowCounter).Value = Doc.getElementsByTagName("td")(CellCounter).innerText           '   Stock Symbol
        Range("C" & RowCounter).Value = Trim(Doc.getElementsByTagName("td")(CellCounter + 1).innerText) '   Stock Name
        Range("D" & RowCounter).Value = Doc.getElementsByTagName("td")(CellCounter + 2).innerText       '   3 Month % Change
        Range("E" & RowCounter).Value = Doc.getElementsByTagName("td")(CellCounter + 3).innerText       '   Last Price
        Range("F" & RowCounter).Value = Doc.getElementsByTagName("td")(CellCounter + 4).innerText       '   Change
        Range("G" & RowCounter).Value = Doc.getElementsByTagName("td")(CellCounter + 5).innerText       '   % Change
        Range("H" & RowCounter).Value = Doc.getElementsByTagName("td")(CellCounter + 7).innerText       '   3 Month High
        Range("I" & RowCounter).Value = Doc.getElementsByTagName("td")(CellCounter + 8).innerText       '   3 Month Low
'
        RowCounter = RowCounter + 1                                                                     '   Advance to next row in Excel sheet
'
        CellCounter = CellCounter + 12                                                                  '   Advance to next row on URL page
    Next
'
    Application.StatusBar = "Script Done "
End Sub


Function GetHTML(strURL As String) As String
'
    Dim objHTTP As Object
    Dim strTemp As String
'
''    Set objHTTP = CreateObject("MSXML2.ServerXMLHTTP")
' Changed /\ /\ to \/ \/ and it works for me now :)
    Set objHTTP = New MSXML2.XMLHTTP
'
    objHTTP.Open "GET", strURL, False
    objHTTP.send
'
    If objHTTP.Status = 200 Then
        strTemp = objHTTP.responseText
    Else
        'There has been an error
        strTemp = ""
    End If
'
    GetHTML = strTemp
End Function

I eliminated the array stuff just to see if I could get some results, unfortunately all I got was a counter to display. :(
 

Watch MrExcel Video

Forum statistics

Threads
1,118,774
Messages
5,574,157
Members
412,574
Latest member
shadowfighter666
Top