Please help..scrap data from website

imran ashraf

New Member
Joined
Nov 15, 2016
Messages
39
PLEASE HELP I NEED TO SCRAP DATA FROM Arrest.org IN BELOW FORMAT .
Unique IDArrest StateArrest CountyNameAgeGenderHome AddressHome CityHome StatePostal CodeOccupationArrested onChargesPhoto FilePhoto LinkLink

<tbody>
</tbody>

I HAVE ALL READY A SCRIPT THAT BUT IMPORT IN DIFFERENT TABS BUT I NEED IN ABOVE FORMAT. MANY THANKS

Sub scrap2()


Dim sKill As String
sKill = "taskkill /F /IM iexplore.exe"
Shell sKill, vbHide
FnWait 2
Application.StatusBar = ""
FnWait 1


Dim IE As InternetExplorer
Set IE = New InternetExplorer
IE.Visible = Sheets(1).Range("B8")

For r = 1 To Sheets(1).Range("b6")
Application.StatusBar = r
Dim objPage2 As HTMLDocument
URL = Sheets(2).Range("A" & r)
On Error Resume Next
IE.Navigate URL
WaitIE IE
FnWait (1)
Set objPage2 = IE.document
Set info1 = objPage2.getElementsByClassName("section-content")
Sheets(3).Range("A" & r) = Trim(Clean_mul(info1(0).innerText))
Set info1 = Nothing

Set info2 = objPage2.getElementsByClassName("section-content personal-information")
Sheets(3).Range("B" & r) = Trim(Clean_mul(info2(0).innerText))
Set info2 = Nothing

Set info3 = objPage2.getElementsByClassName("section-content charges")
Sheets(3).Range("C" & r) = Trim(Clean_mul(info3(0).innerText))
Set info3 = Nothing

Set imgs = objPage2.getElementsByTagName("img")
Sheets(3).Range("D" & r) = Trim(Clean_mul(imgs(0).src))
Set imgs = Nothing
'section-content charges
'http://georgia.arrests.org/Arrests/Reginald_Davis_29028530/
'/mugs/Houston/2016/2016_6088.jpg
'http://georgia.arrests.org/mugs/Houston/2016/2016_6088.jpg

' urll = "http://" & Sheets(1).Range("B4") & ".arrests.org/" & imgurl


objPage2 = Nothing


Next r
End Sub
Sub macro_scrap()
Dim sKill As String
sKill = "taskkill /F /IM iexplore.exe"
Shell sKill, vbHide
FnWait 2
Application.StatusBar = ""
FnWait 1



Set objPage = New HTMLDocument
ct = 1
ct2 = 1
p = 1
view_lis = 56

Dim IE As InternetExplorer
Set IE = New InternetExplorer
IE.Visible = Sheets(1).Range("B7")
For p = 1 To 18
ct2 = 1
Dim objPage2 As HTMLDocument
URL = "http://" & Sheets(1).Range("B4") & ".arrests.org/?page=" & p & "&results=" & view_lis
On Error Resume Next
IE.Navigate URL
WaitIE IE
FnWait (5)
Set objPage2 = IE.document
Set dados = objPage2.getElementsByTagName("a")
For Each oElement In dados
If InStr(oElement.href, "/?d=1") Then
Sheets(2).Range("a" & ct) = Replace(oElement.href, "about:", URL)
Sheets(2).Range("b" & ct) = p
ct = ct + 1
ct2 = ct2 + 1
If ct2 >= 112 Then
ct2 = 1
Exit For
End If
End If
Next oElement
objPage2 = Nothing
Application.StatusBar = p
Next p
'URL & arsit '/Arrests/Landon_Garrett_29025095/?d=1
End

'Mailing_address
Sheets(2).Range("D" & r) = objPage.getElementById("MainContent_lblAddrLine1").innerText & " " & objPage.getElementById("MainContent_lblAddrLine3").innerText
'Owner_Name
Sheets(2).Range("E" & r) = objPage.getElementById("MainContent_gvOwners").getElementsByTagName("tr")(1).innerText
'Property_Address
Sheets(2).Range("F" & r) = objPage.getElementById("MainContent_lblLocation").innerText
'City
Sheets(2).Range("G" & r) = Left(Sheets(2).Range("B" & r).Text, 2) & "-" & objPage.getElementById("MainContent_lblSubdiv").innerText
'Zip_Code


'Property_Type
Sheets(2).Range("I" & r) = objPage.getElementById("MainContent_lblUsecode").innerText
'Acres
Sheets(2).Range("j" & r) = objPage.getElementById("MainContent_lblAcres").innerText
'Footage
Sheets(2).Range("k" & r) = objPage.getElementById("MainContent_lblSqFt").innerText
'Bedrooms
Sheets(2).Range("L" & r) = objPage.getElementById("MainContent_lblUnits").innerText
'Bathrooms


'Prev_Sale Year
Sheets(2).Range("N" & r) = objPage.getElementById("MainContent_lblSaleDate").innerText
'Prev_Sale
Sheets(2).Range("o" & r) = objPage.getElementById("MainContent_gvSalesInfo").getElementsByTagName("td")(1).innerText
'Tax_Appraisal
Set arr_tr_el = objPage.getElementsByTagName("tr")
ct = 0
For Each tr_el In arr_tr_el
If InStr(tr_el.innerText, "Taxable Value") Then
ct = ct + 1
If ct = 3 Then
Sheets(2).Range("P" & r) = Trim(tr_el.getElementsByTagName("td")(1).innerText)
End If
End If
Next tr_el


Set arr_tr_el = objPage.getElementsByTagName("tr")
ct = 0
For Each tr_el In arr_tr_el
If InStr(tr_el.innerText, "Total tax") Then
ct = ct + 1
If ct = 3 Then
Sheets(2).Range("R" & r) = Trim(tr_el.getElementsByTagName("td")(1).innerText)
End If
End If
Next tr_el
'Tax_link
Sheets(2).Range("s" & r) = "http://pbctax.manatron.com/tabs/propertyTax/accountdetail.aspx?p=" & Sheets(2).Range("B" & r) & "&"
'Dim IE As New InternetExplorer
'IE.Visible = True
'IE.Navigate2 "http://pbctax.manatron.com/tabs/propertyTax/accountdetail.aspx?p=" & Sheets(2).Range("B" & r)
'FnWait (3)
'WaitIE IE
'IE.document.getElementById("fldInput").Value = Sheets(2).Range("E" & r)
'IE.document.getElementById("btnsearch").Click
'WaitIE IE
'FnWait (5)
'Set arr_td_el = IE.document.getElementsByTagName("td")
'For Each td_el In arr_td_el
'MsgBox td_el.innerText
'If InStr(td_el.getAttribute("title"), "View Account Detail") Then
'MsgBox td_el.getAttribute("title")
'Sheets(2).Range("s" & r) = td_el.getAttribute("href")
'Exit For
'End If
'Next td_el
'getElementsByTagName("tr")(1) '.getElementsByTagName("td")(5).innerText .getElementById("grm-search")
'End
'IE.Quit
'Notes



FnWait Sheets(1).Range("B1")

End Sub


Function FnWait(intTime)
Application.Wait DateAdd("s", intTime, Now)
End Function


Public Function WaitIE(ByRef objIEBrowser As InternetExplorer)
Do While objIEBrowser.Busy Or objIEBrowser.ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
End Function


Function Clean_mul(ByVal strIn As String) As String
strIn = Trim(strIn)
' // Replace all double space pairings with single spaces
Do While InStr(strIn, vbNewLine & vbNewLine)
strIn = Replace(strIn, vbNewLine & vbNewLine, vbNewLine)
Loop
Clean_mul = strIn
End Function
 

Some videos you may like

Excel Facts

Save Often
If you start asking yourself if now is a good time to save your Excel workbook, the answer is Yes

Watch MrExcel Video

Forum statistics

Threads
1,122,861
Messages
5,598,493
Members
414,243
Latest member
Shockpulsar

We've detected that you are using an adblocker.

We have a great community of people providing Excel help here, but the hosting costs are enormous. You can help keep this site running by allowing ads on MrExcel.com.
Allow Ads at MrExcel

Which adblocker are you using?

Disable AdBlock

Follow these easy steps to disable AdBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the icon in the browser’s toolbar.
2)Click on the "Pause on this site" option.
Go back

Disable AdBlock Plus

Follow these easy steps to disable AdBlock Plus

1)Click on the icon in the browser’s toolbar.
2)Click on the toggle to disable it for "mrexcel.com".
Go back

Disable uBlock Origin

Follow these easy steps to disable uBlock Origin

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back

Disable uBlock

Follow these easy steps to disable uBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back
Top