Updating web scraping code


Well-known Member
Apr 22, 2007
Office Version
Re: Need help on updating this code

Its ok i have got it sorted, just need to add your line on deleting duplicates,

Option Explicit
'VBE > Tools > References > HTML Object Library
Public Sub test()
    Dim ie As Object, ws As Worksheet, wsUrls As Worksheet, urls()
    Set ie = CreateObject("InternetExplorer.Application")
    Set ws = ThisWorkbook.Worksheets("Scraper")
    Set wsUrls = ThisWorkbook.Worksheets("Url List")

    With wsUrls
        urls = Application.Transpose(.Range("A2:A" & .Cells(.rows.Count, "A").End(xlUp).Row).Value)
    End With
    Dim results(), r As Long
    ReDim results(1 To UBound(urls), 1 To 2)

    With ie
        .Visible = True

        For r = LBound(urls) To UBound(urls)
            .Navigate2 urls(r)

            While .Busy Or .readyState < 4: DoEvents: Wend

            With .document

                Dim email As String, website As String, iconCssSelector As String
                'iconCssSelector for website icon in this instance
                iconCssSelector = "[src='https://static.xx.fbcdn.net/rsrc.php/v3/yV/r/EaDvTjOwxIV.png']"

                If ElementIsPresent(ie.document, "[href^=mailto]") Then
                    email = ie.document.querySelector("[href^=mailto]").innerText
                    email = "Not found"
                End If

                Dim parents As Object, sharedParentCssSelector As String, childOfSiblingCssSelector As String
                sharedParentCssSelector = "._5aj7" 'target parent of both icon and the website link
                childOfSiblingCssSelector = "._50f4" '< to target website address after finding right parent

                If ElementIsPresent(ie.document, iconCssSelector) _
        And ElementIsPresent(ie.document, sharedParentCssSelector) Then

                    Set parents = ie.document.querySelectorAll(sharedParentCssSelector) 'css selector used to allow for greater flexibility in element matching
                    website = GetText(ie.document, parents, iconCssSelector, childOfSiblingCssSelector)
                    website = "Not found"
                End If
            End With
            'Assumes headers already present
            Dim nextRow As Long
            results(r, 1) = email
            results(r, 2) = website
    End With
    nextRow = GetLastRow(ws, 1) + 1
    ws.Cells(nextRow, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

Public Function ElementIsPresent(ByVal document As HTMLDocument, ByVal cssSelector As String) As Boolean
    ElementIsPresent = document.querySelectorAll(cssSelector).length > 0
End Function

Public Function GetText(ByVal document As HTMLDocument, ByVal parents As Object, ByVal iconCssSelector As String, ByVal childOfSiblingCssSelector As String) As String
    'in this instance and with microsoft IE DOM you cannot select for parent of an element with pseudo class _
    of :has(>child); nor use :contains... instead pass expected parent selector, that houses _
    both the icon element for website and the website address itself, and loop all matches checking for website icon _
    if found use childOfSiblingCssSelector to extract
    Dim i As Long, html As HTMLDocument
    Set html = New HTMLDocument

    For i = 0 To parents.length - 1
        html.body.innerHTML = parents.item(i).innerHTML
        If ElementIsPresent(html, iconCssSelector) Then
            GetText = html.querySelector(childOfSiblingCssSelector).innerText
            Exit Function
        End If
    GetText = "Not found"
End Function

Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
    With ws
        GetLastRow = .Cells(.rows.Count, columnNumber).End(xlUp).Row
    End With
End Function
Last edited:

Some videos you may like

Excel Facts

Workdays for a market open Mon, Wed, Friday?
Yes! Use "0101011" for the weekend argument in NETWORKDAYS.INTL or WORKDAY.INTL. The 7 digits start on Monday. 1 means it is a weekend.

Watch MrExcel Video

Forum statistics

Latest member

This Week's Hot Topics

  • Finding issue in If elseif else with For each Loop
    Finding issue in If elseif else with For each Loop I have tried this below code but i'm getting in Y column filled with W005. Colud you please...
  • MsgBox Error
    Hi Guys, I have the below error show up when i try and run my macro in File1 but works fine if i copy and paste the same code into file2. [ATTACH...
    My Cell Format is [B]""0.00" Cr". [/B]But in the cell, it is showing 123.00 for editing. (123 is entry figure). (Data imported from other...
  • Show numbers nearly the same
    Is this possible. I have a number that can change very time eg 0.00001234 Then I have a lot of numbers 0.0000001, 0.0000002, 0.00000004...
  • Please i need your help to create formula
    I need a formula in cell B8 to do this >>if b1=1 then multiply ( cell b8) by 10% ,if b1=2 multiply by 20%,if=3 multiply by 30%. Thank you in...
  • Got error while adding column and filter
    Got error while adding column and filter In column Z has some like "Success" and "Error". I want to add column in AA if the Z cell value is...