Sharid
Well-known Member
- Joined
- Apr 22, 2007
- Messages
- 1,064
- Office Version
- 2016
- Platform
- Windows
Hi
I need a bit of help to update this code. The code it self works fine there is only ONE issue with it. The result are shown at the end when it has finished, so if I have to process 10 urls then it shown the data when it is finished, but most of the time it is 100s of url and I would like it to show the result as they are found and NOT at the end. Apart from that the code works fine
Thanks for having a look
I need a bit of help to update this code. The code it self works fine there is only ONE issue with it. The result are shown at the end when it has finished, so if I have to process 10 urls then it shown the data when it is finished, but most of the time it is 100s of url and I would like it to show the result as they are found and NOT at the end. Apart from that the code works fine
VBA Code:
Private Sub fbStart_Click()
Dim ie As Object, ws As Worksheet, wsUrls As Worksheet, urls()
Set ie = CreateObject("InternetExplorer.Application")
Set ws = ThisWorkbook.Worksheets("Scraper")
Set wsUrls = ThisWorkbook.Worksheets("Url List")
With wsUrls
urls = Application.Transpose(.Range("A2:A" & .Cells(.Rows.Count, "A").End(xlUp).Row).Value)
End With
Dim results(), r As Long
ReDim results(1 To UBound(urls), 1 To 2)
With ie
.Visible = True
For r = LBound(urls) To UBound(urls)
.Navigate2 urls(r)
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
Dim email As String, website As String, iconCssSelector As String
'iconCssSelector for website icon in this instance
iconCssSelector = "[src='https://static.xx.fbcdn.net/rsrc.php/v3/yV/r/EaDvTjOwxIV.png']"
If ElementIsPresent(ie.document, "[href^=mailto]") Then
email = ie.document.querySelector("[href^=mailto]").innerText
Else
email = "Not found"
End If
Dim parents As Object, sharedParentCssSelector As String, childOfSiblingCssSelector As String
sharedParentCssSelector = "._5aj7" 'target parent of both icon and the website link
childOfSiblingCssSelector = "._50f4" '< to target website address after finding right parent
' Put no1 in sheet2 column B
Sheets("Url List").Range("B" & Rows.Count).End(xlUp).Offset(1, 0).Value = 1
'Count No1 in sheet2 Column B
With Worksheets("Url List")
Lastrow = .Cells(.Rows.Count, "B").End(xlUp).Row
Sheets("Url List").Range("B1").Value = Lastrow
End With
Call Autoclick_Click
If ElementIsPresent(ie.document, iconCssSelector) _
And ElementIsPresent(ie.document, sharedParentCssSelector) Then
Set parents = ie.document.querySelectorAll(sharedParentCssSelector) 'css selector used to allow for greater flexibility in element matching
website = GetText(ie.document, parents, iconCssSelector, childOfSiblingCssSelector)
Else
website = "Not found"
End If
End With
'Assumes headers already present
Dim nextRow As Long
results(r, 1) = email
results(r, 2) = website
Next
'Hide FaceBook Scraper Form
.Quit
ScraperForm.Hide
End With
nextRow = GetLastRow(ws, 1) + 1
ws.Cells(nextRow, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
' Delete duplicates
Sheet1.Columns("A:B").RemoveDuplicates Columns:=Array(1, 2)
End Sub
Public Function ElementIsPresent(ByVal document As HTMLDocument, ByVal cssSelector As String) As Boolean
ElementIsPresent = document.querySelectorAll(cssSelector).Length > 0
End Function
VBA Code:
Public Function GetText(ByVal document As HTMLDocument, ByVal parents As Object, ByVal iconCssSelector As String, ByVal childOfSiblingCssSelector As String) As String
'in this instance and with microsoft IE DOM you cannot select for parent of an element with pseudo class _
of :has(>child); nor use :contains... instead pass expected parent selector, that houses _
both the icon element for website and the website address itself, and loop all matches checking for website icon _
if found use childOfSiblingCssSelector to extract
Dim i As Long, html As HTMLDocument
Set html = New HTMLDocument
For i = 0 To parents.Length - 1
html.body.innerHTML = parents.Item(i).innerHTML
If ElementIsPresent(html, iconCssSelector) Then
GetText = html.querySelector(childOfSiblingCssSelector).innerText
Exit Function
End If
Next
GetText = "Not found"
End Function
VBA Code:
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
With ws
GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
End With
End Function
Thanks for having a look