Vba web scrapping

Doughtiefireman

New Member
Joined
Dec 2, 2020
Messages
6
Office Version
  1. 2019
Platform
  1. Windows
  2. Mobile
  3. Web
I'm trying to loop it to copy and paste the table and click next till there no more next the next button is a hyperlink with a "A" tagname

VBA Code:
Sub GetData()

'define variables

Dim i As SHDocVw.InternetExplorer

Set i = New InternetExplorer

i.Visible = True

Dim IE As Object, obj As Object

Dim r As Long, c As Long, t As Long

Dim elemCollection As Object

Dim eRow As Long

Dim HTMLDoc As MSHTML.HTMLDocument

Dim HTMLInput As MSHTML.IHTMLElement

Dim HTMLAs As MSHTML.IHTMLElementCollection

Dim HTMLA As MSHTML.IHTMLElement











url_name = Sheet2.Range("e4")

If url_name = "" Then Exit Sub

i.Visible = True

i.navigate (url_name)



Do While i.readyState <> READYSTATE_COMPLETE

Loop



'we ensure that the web page is downloaded completely



ThisWorkbook.Sheets("Sheet1").Range("a2:ai1000").ClearContents

Set elemCollection = i.document.getElementsByTagName("TABLE")

For t = 0 To (elemCollection.Length - 1)

For r = 0 To (elemCollection(t).Rows.Length - 1)

eRow = Sheet1.Cells(Rows.Count, 1).End(xlUp).Offset(1, 0).Row

    For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)

  

ThisWorkbook.Worksheets(1).Cells(eRow, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText

Next c

    Next r

        Next t

      

        Range("a1:ai1000").Columns.AutoFit

        'Clean up Memory

        Set IE = Nothing

        Set HTMLDoc = i.document

      

        Set HTMLAs = HTMLDoc.getElementsByTagName("a")

      

        For Each HTMLA In HTMLAs

        'Debug.Print HTMLA.getAttribute("classname"), HTMLA.getAttribute("href")

      

        If HTMLA.getAttribute("classname") = "button2 next" And HTMLA.getAttribute("href") = "[URL='https://stathead.com/football/pgl_finder.cgi?request=1&game_num_max=99&week_num_max=99&order_by=all_td&match=game&season_start=1&year_max=2020&qb_gwd=0&qb_comeback=0&season_end=-1&game_type=R&age_max=99&year_min=2020&offset=100']Football | Player Game Finder | Stathead.com[/URL]" Then

        HTMLA.Click

        Exit For

      

End If

Next



End Sub
 
Last edited by a moderator:

Some videos you may like

Excel Facts

Using Function Arguments with nested formulas
If writing INDEX in Func. Arguments, type MATCH(. Use the mouse to click inside MATCH in the formula bar. Dialog switches to MATCH.

Anthony47

Well-known Member
Joined
Mar 29, 2006
Messages
2,135
It is possible to register to the site and getting limited time free access.

So I went to this code:
Code:
#If VBA7 Then       '!!! ON  TOP  OF  THE  VBA  MODULE   !!!!
    Declare PtrSafe Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As LongPtr)
#Else
    Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)
#End If
Option Explicit


Sub GetData()
'BEWARE: set a Reference to Microsoft Internet Controls, using Menu /Tools /Reference
Dim iE As SHDocVw.InternetExplorer
Set iE = New InternetExplorer
iE.Visible = True
Dim r As Long, c As Long, t As Long
Dim elemCollection As Object
Dim eRow As Long
Dim nColl As Object, J As Long
Dim Url_Name As String
'
Debug.Print ">>>>>> Start GetData"
Url_Name = Sheets("Sheet2").Range("E4")
iE.Visible = True
ThisWorkbook.Sheets("Sheet1").Range("a2:ai1000").ClearContents
'Navigate till end of pages:
Do
    If Url_Name = "" Then Exit Do
    iE.navigate (Url_Name)
    Sleep 100
    Do While iE.readyState <> READYSTATE_COMPLETE
        DoEvents
    Loop
    'Check if Login is needed:
    Set nColl = iE.document.getElementsByTagName("body")
    If InStr(1, nColl(0).innerText, "Already a subscriber? Log in for full results.", vbTextCompare) > 0 Then
        'First Login
        Call DoLogin(iE)
    Else
        'Already logged in
        Debug.Print "... Already logged"
    End If
    'Import Tables
    Set elemCollection = iE.document.getElementsByTagName("TABLE")
    For t = 0 To (elemCollection.Length - 1)
        For r = 0 To (elemCollection(t).Rows.Length - 1)
            eRow = Sheets("Sheet1").Cells(Rows.Count, 1).End(xlUp).Offset(1, 0).Row
            For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
                Sheets("Sheet1").Cells(eRow, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
            Next c
        Next r
        DoEvents
    Next t
    Debug.Print "Imported: " & eRow
    Set nColl = Nothing: Url_Name = ""
    'Look for "Next Page":
        On Error Resume Next
        For J = 1 To 10
            Set nColl = iE.document.getElementsByClassName("prevnext")(0).getElementsByTagName("A")
            Sleep 200
            If Not nColl Is Nothing Then Exit For
        Next J
        Debug.Print "Next page J=" & J
        On Error GoTo 0
    'Check if Next exists:
    If nColl Is Nothing Then Exit Do
    For J = 0 To nColl.Length - 1
        If InStr(1, nColl(J).innerText & "__", "Next", vbTextCompare) > 0 Then
            Url_Name = nColl(J).href                    'Found
            Exit For
        End If
    Next J
Loop
'Format Sheet1
Sheets("Sheet1").Range("a1:ai1000").Columns.AutoFit
Debug.Print "COMPLETED, imported rows: " & eRow
MsgBox ("Import completed")
'Clean up Memory and exit
iE.Quit
Set iE = Nothing
End Sub

Sub DoLogin(ByRef IESess As Object)
Dim AObj As Object, BObj As Object, J As Long
'
Debug.Print "--- Start Login process"
IESess.document.getElementById("nav").getElementsByClassName("login")(1).Click
    Sleep 100
'Wait for login InputBoxes:
    On Error Resume Next
    For J = 1 To 30
        Set AObj = IESess.document.getElementById("username")
        Sleep 200
        If Not AObj Is Nothing Then Exit For
    Next J
    On Error GoTo 0
    Debug.Print "Submit J=" & J
'Input UserName and passw:
IESess.document.getElementById("username").Value = Sheets("Sheet2").Range("E2").Value
IESess.document.getElementById("password").Value = Sheets("Sheet2").Range("E3").Value
Sleep 100
'Then submit form:
    IESess.document.getElementsByTagName("form")(0).submit
    Sleep 200
'Wait till probably completed:
    For J = 1 To 10
        Sleep 500
        If IESess.document.getElementsByTagName("TD").Length > 100 Then Exit For
    Next J
Debug.Print "Completed", "TD=" & IESess.document.getElementsByTagName("TD").Length, "J=" & J
End Sub
It has to be copied to an empty standard vba Module
It also requires that a reference be set in the vba to the "Microsoft Internet Controls" library, using Menu /Tools /Reference
The macro will check if the user is already logged and, if necessary, will go through the login process using the credentials stored in Sheet2

Your workbook must contain a Sheet1, that initially will be CLEARED WITHOUT ANY NOTICE and then will compiled with the result of the search; and a Sheet2 that will be used for the access credentials, to be loaded in E2, E3 and E4:
-E2 is the account name
-E3 is the password
-E4 is the starting url; the macro has been tested on url= Football | Player Game Finder | Stathead.com

This Url searches for these datas:
"In a single game, in 2020, in the Regular Season, played QB, sorted by descending DraftKings points"


But other urls seems compatible, for example Football | Player Game Finder | Stathead.com (this will import much more lines, almost 19000, and takes its time)

Description of the search says:
"In a single game, in 2020, in the Regular Season, sorted by descending DraftKings points."


Or: Football | Player Game Finder | Stathead.com
Description of the search says:
"In a single game, in 2020, in the Regular Season, played QB, sorted by descending DraftKings points."

I don't know how the data are organized in the site, so I am not able to suggest the rules

Bye
 

Doughtiefireman

New Member
Joined
Dec 2, 2020
Messages
6
Office Version
  1. 2019
Platform
  1. Windows
  2. Mobile
  3. Web
It is possible to register to the site and getting limited time free access.

So I went to this code:
Code:
#If VBA7 Then       '!!! ON  TOP  OF  THE  VBA  MODULE   !!!!
    Declare PtrSafe Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As LongPtr)
#Else
    Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)
#End If
Option Explicit


Sub GetData()
'BEWARE: set a Reference to Microsoft Internet Controls, using Menu /Tools /Reference
Dim iE As SHDocVw.InternetExplorer
Set iE = New InternetExplorer
iE.Visible = True
Dim r As Long, c As Long, t As Long
Dim elemCollection As Object
Dim eRow As Long
Dim nColl As Object, J As Long
Dim Url_Name As String
'
Debug.Print ">>>>>> Start GetData"
Url_Name = Sheets("Sheet2").Range("E4")
iE.Visible = True
ThisWorkbook.Sheets("Sheet1").Range("a2:ai1000").ClearContents
'Navigate till end of pages:
Do
    If Url_Name = "" Then Exit Do
    iE.navigate (Url_Name)
    Sleep 100
    Do While iE.readyState <> READYSTATE_COMPLETE
        DoEvents
    Loop
    'Check if Login is needed:
    Set nColl = iE.document.getElementsByTagName("body")
    If InStr(1, nColl(0).innerText, "Already a subscriber? Log in for full results.", vbTextCompare) > 0 Then
        'First Login
        Call DoLogin(iE)
    Else
        'Already logged in
        Debug.Print "... Already logged"
    End If
    'Import Tables
    Set elemCollection = iE.document.getElementsByTagName("TABLE")
    For t = 0 To (elemCollection.Length - 1)
        For r = 0 To (elemCollection(t).Rows.Length - 1)
            eRow = Sheets("Sheet1").Cells(Rows.Count, 1).End(xlUp).Offset(1, 0).Row
            For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
                Sheets("Sheet1").Cells(eRow, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
            Next c
        Next r
        DoEvents
    Next t
    Debug.Print "Imported: " & eRow
    Set nColl = Nothing: Url_Name = ""
    'Look for "Next Page":
        On Error Resume Next
        For J = 1 To 10
            Set nColl = iE.document.getElementsByClassName("prevnext")(0).getElementsByTagName("A")
            Sleep 200
            If Not nColl Is Nothing Then Exit For
        Next J
        Debug.Print "Next page J=" & J
        On Error GoTo 0
    'Check if Next exists:
    If nColl Is Nothing Then Exit Do
    For J = 0 To nColl.Length - 1
        If InStr(1, nColl(J).innerText & "__", "Next", vbTextCompare) > 0 Then
            Url_Name = nColl(J).href                    'Found
            Exit For
        End If
    Next J
Loop
'Format Sheet1
Sheets("Sheet1").Range("a1:ai1000").Columns.AutoFit
Debug.Print "COMPLETED, imported rows: " & eRow
MsgBox ("Import completed")
'Clean up Memory and exit
iE.Quit
Set iE = Nothing
End Sub

Sub DoLogin(ByRef IESess As Object)
Dim AObj As Object, BObj As Object, J As Long
'
Debug.Print "--- Start Login process"
IESess.document.getElementById("nav").getElementsByClassName("login")(1).Click
    Sleep 100
'Wait for login InputBoxes:
    On Error Resume Next
    For J = 1 To 30
        Set AObj = IESess.document.getElementById("username")
        Sleep 200
        If Not AObj Is Nothing Then Exit For
    Next J
    On Error GoTo 0
    Debug.Print "Submit J=" & J
'Input UserName and passw:
IESess.document.getElementById("username").Value = Sheets("Sheet2").Range("E2").Value
IESess.document.getElementById("password").Value = Sheets("Sheet2").Range("E3").Value
Sleep 100
'Then submit form:
    IESess.document.getElementsByTagName("form")(0).submit
    Sleep 200
'Wait till probably completed:
    For J = 1 To 10
        Sleep 500
        If IESess.document.getElementsByTagName("TD").Length > 100 Then Exit For
    Next J
Debug.Print "Completed", "TD=" & IESess.document.getElementsByTagName("TD").Length, "J=" & J
End Sub
It has to be copied to an empty standard vba Module
It also requires that a reference be set in the vba to the "Microsoft Internet Controls" library, using Menu /Tools /Reference
The macro will check if the user is already logged and, if necessary, will go through the login process using the credentials stored in Sheet2

Your workbook must contain a Sheet1, that initially will be CLEARED WITHOUT ANY NOTICE and then will compiled with the result of the search; and a Sheet2 that will be used for the access credentials, to be loaded in E2, E3 and E4:
-E2 is the account name
-E3 is the password
-E4 is the starting url; the macro has been tested on url= Football | Player Game Finder | Stathead.com

This Url searches for these datas:
"In a single game, in 2020, in the Regular Season, played QB, sorted by descending DraftKings points"


But other urls seems compatible, for example Football | Player Game Finder | Stathead.com (this will import much more lines, almost 19000, and takes its time)

Description of the search says:
"In a single game, in 2020, in the Regular Season, sorted by descending DraftKings points."


Or: Football | Player Game Finder | Stathead.com
Description of the search says:
"In a single game, in 2020, in the Regular Season, played QB, sorted by descending DraftKings points."

I don't know how the data are organized in the site, so I am not able to suggest the rules

Bye
Thanks you and the way there organized is off of what type of data you want to get out of it and for my purpose was fantasy pts. But it works very well thank you again for all your help. Great job done
 

Watch MrExcel Video

Forum statistics

Threads
1,126,957
Messages
5,621,823
Members
415,859
Latest member
Vain

We've detected that you are using an adblocker.

We have a great community of people providing Excel help here, but the hosting costs are enormous. You can help keep this site running by allowing ads on MrExcel.com.
Allow Ads at MrExcel

Which adblocker are you using?

Disable AdBlock

Follow these easy steps to disable AdBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the icon in the browser’s toolbar.
2)Click on the "Pause on this site" option.
Go back

Disable AdBlock Plus

Follow these easy steps to disable AdBlock Plus

1)Click on the icon in the browser’s toolbar.
2)Click on the toggle to disable it for "mrexcel.com".
Go back

Disable uBlock Origin

Follow these easy steps to disable uBlock Origin

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back

Disable uBlock

Follow these easy steps to disable uBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back
Top