Scraping macro stops working properly after some time

yahoo80

New Member
Joined
Jan 25, 2011
Messages
42
Guys

Can somebody advise me why my code doesn't work properly?
It does the job for the first few manufacturers and then it looks like it overloads...
So I guess the question is how should I improve my code so that it work properly.
Excel 2013 Dell xps 14z i7

Below first few rows for the worksheet 'Pages'



Code:
Public lastrow As LongPublic i As Long

Sub WebTableToSheet()
  'Tested using IE7,  Excel 2000 SP1, and Windows XP
  Dim objIE As Object
  Dim varTables, varTable
  Dim WS As Worksheet
  Dim StartTime As Double
  Dim z As Long


On Error GoTo Err
Set objIE = CreateObject("InternetExplorer.Application")
Application.ScreenUpdating = False
lastrow = Worksheets("Pages").Range("A" & Worksheets("Pages").Rows.Count).End(xlUp).Row


With objIE
    'loop through manufacturers RS sites
    For z = 2 To lastrow
        'add new sheet with manufacturer's name
        Set WS = Sheets.Add(After:=Sheets(Worksheets.Count))
        WS.Name = Trim(Worksheets("Pages").Range("B" & z).Value)
        
        .AddressBar = False
        .StatusBar = False
        .MenuBar = False
        .Toolbar = 0
        .Visible = True
start1:
        .Navigate Worksheets("Pages").Range("A" & z).Value
        While objIE.Busy
        Wend
        'back to start1 to reload page if loading is not coplete for over a minute
        Do Until objIE.Document.ReadyState <> "complete"""
            While objIE.Document.ReadyState <> "complete"""
                    If CInt(Timer - StartTime) > 60 Then
                        GoTo start1
                    End If
            Wend
        Loop
        Set Vars = objIE.Document.All.tags("div")
        'look for total products per manufacturer value
        For Each varTable In Vars
            test1 = InStr(varTable.innertext, "products")
            test2 = InStr(varTable.innertext, "Viewing")
            If test2 > test1 Then test1 = InStr(test1 + 1, varTable.innertext, "products")
            If test1 > 0 And test2 > 0 Then
                test3 = InStr(Mid(varTable.innertext, test2, test1 - test2), "of")
                lastproduct = Trim(Mid(varTable.innertext, test2 + test3 + 2, test1 - (test2 + test3 + 2)))
                If lastproduct > 0 Then
                    Exit For
                End If
            End If
        Next
        If lastproduct = 0 Then
            MsgBox ("Cant find total products for " & Trim(Worksheets("Pages").Range("B" & z).Value))
            Exit Sub
        End If
        'go through all products pages for current manufacturers
        For i = 0 To lastproduct Step 20 'move every 20 as there are 20 products per page
            StartTime = Timer
start2:
            .Navigate Worksheets("Pages").Range("A" & z).Value & "&page-offset=" & i
            While objIE.Busy
            Wend
            'back to start2 to reload page if loading is not coplete for over a minute
            Do Until objIE.Document.ReadyState <> "complete"""
                While objIE.Document.ReadyState <> "complete"""
                        If CInt(Timer - StartTime) > 60 Then
                            GoTo start2
                        End If
                Wend
            Loop
            
            checkTable objIE.Document, z
        Next i
    'QA total products per manufacturers against total products downloaded per manufacturer
    Worksheets("Pages").Range("C" & z).Value = lastproduct
    Worksheets("Pages").Range("D" & z).Value = Worksheets(Trim(Worksheets("Pages").Range("B" & z).Value)).Range("A" & Worksheets(Trim(Worksheets("Pages").Range("B" & z).Value)).Rows.Count).End(xlUp).Row - 1
    Next z
End With


  
Cleanup:
  Set varCell = Nothing: Set varCells = Nothing
  Set varRow = Nothing: Set varRows = Nothing
  Set varTable = Nothing: Set varTables = Nothing
  objIE.Quit
  Set objIE = Nothing
Application.ScreenUpdating = True
Exit Sub
Err:
MsgBox (eer.Number & "    " & Err.Description)


Application.ScreenUpdating = True
End Sub

Code:
Function checkTable(ByVal IE As Object, ByVal numeras As Long)
Dim varTables, varTable
Dim varRows, varRow
Dim varCells, varCell
Dim lngRow As Long, lngColumn As Long
Dim z As Long
    
    'lastrow of current sheet
    lastrow = Worksheets(Trim(Worksheets("Pages").Range("B" & numeras).Value)).Range("A" & Worksheets(Trim(Worksheets("Pages").Range("B" & numeras).Value)).Rows.Count).End(xlUp).Row + 1
    Set varTables = IE.All.tags("TABLE")
    For Each varTable In varTables
    'Use the innerText and header values to see if this is the table we want.
    If varTable.innertext Like "*" & "Description" & "*" And varTable.innertext Like "*" & "Category" & "*" Then
      Set varRows = varTable.Rows
      lngRow = lastrow 'This will be the first output row
      For Each varRow In varRows
        Set varCells = varRow.Cells
        lngColumn = 1 'This will be the output column
        For Each varCell In varCells
            'do nothing when  it's a header
            If varCell.innertext = Empty Or InStr(varCell.innertext, "Description") Or InStr(varCell.innertext, "Category") Or InStr(varCell.innertext, "Price") Or InStr(varCell.innertext, "Brand / Part No") Then
                
            Else
                z = 1
                Worksheets(Trim(Worksheets("Pages").Range("B" & numeras).Value)).Cells(lngRow, lngColumn) = varCell.innertext
                lngColumn = lngColumn + 1
            End If
        Next varCell
        'move to a new row if the data was added
        If z = 1 Then
            Worksheets(Trim(Worksheets("Pages").Range("B" & numeras).Value)).Cells(lngRow, lngColumn) = lastrow - 1
            lastrow = lastrow + 1
            lngRow = lngRow + 1
        End If
        z = 0
      Next varRow
    End If
  Next varTable
End Function
 

Excel Facts

Do you hate GETPIVOTDATA?
Prevent GETPIVOTDATA. Select inside a PivotTable. In the Analyze tab of the ribbon, open the dropown next to Options and turn it off
What is this?
"Here's my code, work out what its meant to do cos I cant be bothered to tell you"????

A description would be helpful
 
Upvote 0
So I guess the question is how should I improve my code so that it work properly.
Put Option Explicit at the top of the module and fix all the compilation errors first. Then you'll have a better chance of debugging any run-time errors.
 
Upvote 0
So the macro scrape products' data from rs components site (loops through the links to the manufacturers sites(displaying all their products) provided on sheet 'Pages').
First it looks for total products per manufacturer and then loop through all products scraping the data using Function checkTable and reloading the page changing displayed products (step 20) until it gets to the last products page.

Function checkTable looks for the correct table and copy the data to excel worksheet (one per manufacturer)

Code:
Option Explicit
Public lastrow As Long
Public i As Long
Sub WebTableToSheet()  
  Dim objIE As Object
  Dim varTables, varTable, Vars
  Dim WS As Worksheet
  Dim StartTime As Double
  Dim z As Long
  Dim test1 As Long, test2 As Long, test3 As Long, lastproduct As Long



Set objIE = CreateObject("InternetExplorer.Application")
Application.ScreenUpdating = False
lastrow = Worksheets("Pages").Range("A" & Worksheets("Pages").Rows.Count).End(xlUp).Row


With objIE
    'loop through manufacturers RS sites
    For z = 2 To lastrow
        'add new sheet with manufacturer's name
        Set WS = Sheets.Add(After:=Sheets(Worksheets.Count))
        WS.Name = Trim(Worksheets("Pages").Range("B" & z).Value)
        
        .AddressBar = False
        .StatusBar = False
        .MenuBar = False
        .Toolbar = 0
        .Visible = True
start1:
        .Navigate Worksheets("Pages").Range("A" & z).Value
        While objIE.Busy
        Wend
        'back to start1 to reload page if loading is not coplete for over a minute
        Do Until objIE.Document.ReadyState <> "complete"""
            While objIE.Document.ReadyState <> "complete"""
                    If CInt(Timer - StartTime) > 60 Then
                        GoTo start1
                    End If
            Wend
        Loop
        Set Vars = objIE.Document.All.tags("div")
        'look for total products per manufacturer value
        For Each varTable In Vars
            test1 = InStr(varTable.innertext, "products")
            test2 = InStr(varTable.innertext, "Viewing")
            If test2 > test1 Then test1 = InStr(test1 + 1, varTable.innertext, "products")
            If test1 > 0 And test2 > 0 Then
                test3 = InStr(Mid(varTable.innertext, test2, test1 - test2), "of")
                lastproduct = Trim(Mid(varTable.innertext, test2 + test3 + 2, test1 - (test2 + test3 + 2)))
                If lastproduct > 0 Then
                    Exit For
                End If
            End If
        Next
        If lastproduct = 0 Then
            MsgBox ("Cant find total products for " & Trim(Worksheets("Pages").Range("B" & z).Value))
            Exit Sub
        End If
        'go through all products pages for current manufacturers
        For i = 0 To lastproduct Step 20 'move every 20 as there are 20 products per page
            StartTime = Timer
start2:
            .Navigate Worksheets("Pages").Range("A" & z).Value & "&page-offset=" & i
            While objIE.Busy
            Wend
            'back to start2 to reload page if loading is not coplete for over a minute
            Do Until objIE.Document.ReadyState <> "complete"""
                While objIE.Document.ReadyState <> "complete"""
                        If CInt(Timer - StartTime) > 60 Then
                            GoTo start2
                        End If
                Wend
            Loop
            
            checkTable objIE.Document, z
        Next i
    'QA total products per manufacturers against total products downloaded per manufacturer
    Worksheets("Pages").Range("C" & z).Value = lastproduct
    Worksheets("Pages").Range("D" & z).Value = Worksheets(Trim(Worksheets("Pages").Range("B" & z).Value)).Range("A" & Worksheets(Trim(Worksheets("Pages").Range("B" & z).Value)).Rows.Count).End(xlUp).Row - 1
    Next z
End With


  
Cleanup:
  Set varTable = Nothing: Set varTables = Nothing
  objIE.Quit
  Set objIE = Nothing


Application.ScreenUpdating = True
End Sub

Code:
Function checkTable(ByVal IE As Object, ByVal numeras As Long)Dim varTables, varTable
Dim varRows, varRow, Vars
Dim varCells, varCell
Dim lngRow As Long, lngColumn As Long
Dim z As Long
    
    'lastrow of current sheet
    lastrow = Worksheets(Trim(Worksheets("Pages").Range("B" & numeras).Value)).Range("A" & Worksheets(Trim(Worksheets("Pages").Range("B" & numeras).Value)).Rows.Count).End(xlUp).Row + 1
    Set varTables = IE.All.tags("TABLE")
    For Each varTable In varTables
    'Use the innerText and header values to see if this is the table we want.
    If varTable.innertext Like "*" & "Description" & "*" And varTable.innertext Like "*" & "Category" & "*" Then
      Set varRows = varTable.Rows
      lngRow = lastrow 'This will be the first output row
      For Each varRow In varRows
        Set varCells = varRow.Cells
        lngColumn = 1 'This will be the output column
        For Each varCell In varCells
            'do nothing when  it's a header
            If varCell.innertext = Empty Or InStr(varCell.innertext, "Description") Or InStr(varCell.innertext, "Category") Or InStr(varCell.innertext, "Price") Or InStr(varCell.innertext, "Brand / Part No") Then
                
            Else
                z = 1
                Worksheets(Trim(Worksheets("Pages").Range("B" & numeras).Value)).Cells(lngRow, lngColumn) = varCell.innertext
                lngColumn = lngColumn + 1
            End If
        Next varCell
        'move to a new row if the data was added
        If z = 1 Then
            Worksheets(Trim(Worksheets("Pages").Range("B" & numeras).Value)).Cells(lngRow, lngColumn) = lastrow - 1
            lastrow = lastrow + 1
            lngRow = lngRow + 1
        End If
        z = 0
      Next varRow
    End If
  Next varTable
End Function
 
Upvote 0

Forum statistics

Threads
1,215,094
Messages
6,123,071
Members
449,092
Latest member
ipruravindra

We've detected that you are using an adblocker.

We have a great community of people providing Excel help here, but the hosting costs are enormous. You can help keep this site running by allowing ads on MrExcel.com.
Allow Ads at MrExcel

Which adblocker are you using?

Disable AdBlock

Follow these easy steps to disable AdBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the icon in the browser’s toolbar.
2)Click on the "Pause on this site" option.
Go back

Disable AdBlock Plus

Follow these easy steps to disable AdBlock Plus

1)Click on the icon in the browser’s toolbar.
2)Click on the toggle to disable it for "mrexcel.com".
Go back

Disable uBlock Origin

Follow these easy steps to disable uBlock Origin

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back

Disable uBlock

Follow these easy steps to disable uBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back
Back
Top