Write all URLs to cells instead

jamescooper

Well-known Member
Joined
Sep 8, 2014
Messages
834
Hello, the following code successfully pulls all the URL links from a webpage and puts them in a listbox; how do I modify to put in column A for example?

Thanks.

Code:
Sub Getalllinks()


Dim IE As Object


Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = False


url_name = Sheet1.Range("E4")
If url_name = "" Then Exit Sub


IE.navigate (url_name)


Do
DoEvents
Loop Until IE.ReadyState = 4


Set AllHyperlinks = IE.Document.getElementsByTagName("A")
Sheet1.ListBox1.Clear


For Each hyper_link In AllHyperlinks
Sheet1.ListBox1.AddItem (hyper_link)
Next


IE.Quit


End Sub
 
Right you are.

Code:
Sub Getalllinks()
Dim IE As Object: Set IE = CreateObject("InternetExplorer.Application")
Dim AL As Object: Set AL = CreateObject("System.Collections.ArrayList")
Dim AR() As Variant: AR = Range("E4:E" & Range("E" & Rows.Count).End(xlUp).Row).Value
IE.Visible = False
Dim url_name As String

For i = LBound(AR) To UBound(AR)
    url_name = AR(i, 1)

    If url_name = "" Then Exit For
    IE.navigate (url_name)

    Do
        DoEvents
    Loop Until IE.ReadyState = 4

    Set AllHyperlinks = IE.Document.getElementsByTagName("A")
    Sheet1.ListBox1.Clear

    For Each hyper_link In AllHyperlinks
        If InStr(hyper_link, "http://www.awebsite.co.uk/123") Then
            If Not AL.contains(hyper_link) Then AL.Add hyper_link
        End If
    Next
Next i

IE.Quit
Range("A1").Resize(AL.Count, 1).Value = Application.Transpose(AL.toarray)
End Sub
 
Upvote 0

Excel Facts

Lock one reference in a formula
Need 1 part of a formula to always point to the same range? use $ signs: $V$2:$Z$99 will always point to V2:Z99, even after copying
Ah I put the next there, but should have been next i

So that has solved that now I am getting automation error.

I believe this is to do with the shutting and opening of IE?
 
Upvote 0
In E4:E

https://www.tesco.com/groceries/en-GB/shop/fresh-food?include-children=true

https://www.tesco.com/groceries/en-GB/shop/bakery?include-children=true

https://www.tesco.com/groceries/en-GB/shop/frozen-food?include-children=true

https://www.tesco.com/groceries/en-GB/shop/food-cupboard?include-children=true

https://www.tesco.com/groceries/en-GB/shop/drinks?include-children=true

https://www.tesco.com/groceries/en-GB/shop/baby?include-children=true
https://www.tesco.com/groceries/en-GB/shop/health-and-beauty?include-children=true

https://www.tesco.com/groceries/en-GB/shop/pets?include-children=true

https://www.tesco.com/groceries/en-GB/shop/household?include-children=true
https://www.tesco.com/groceries/en-GB/shop/home-and-ents?include-children=true

https://www.tesco.com/groceries/en-GB/shop/inspiration-and-events?include-children=true

<colgroup><col width="64" style="width:48pt"> </colgroup><tbody>
</tbody>

and then the red.

Code:
For Each hyper_link In AllHyperlinks
        If InStr(hyper_link, "[COLOR=#ff0000]tesco.com/[/COLOR]") Then
            If Not AL.contains(hyper_link) Then AL.Add hyper_link
 
Upvote 0
This code ran successfully on my end.

Code:
Sub Getalllinks()
Dim IE As Object: Set IE = CreateObject("InternetExplorer.Application")
Dim AL As Object: Set AL = CreateObject("System.Collections.ArrayList")
Dim AR() As Variant: AR = Range("E4:E" & Range("E" & Rows.Count).End(xlUp).Row).Value
IE.Visible = False
Dim url_name As String

For i = LBound(AR) To UBound(AR)
    url_name = AR(i, 1)

    If url_name = "" Then Exit For
    IE.navigate (url_name)

    Do
        DoEvents
    Loop Until IE.ReadyState = 4

    Set AllHyperlinks = IE.Document.getElementsByTagName("A")
    Sheet1.ListBox1.Clear

    For Each hyper_link In AllHyperlinks
        If InStr([COLOR=#0000ff]hyper_link.href[/COLOR], "tesco.com/") Then
            If Not AL.contains([COLOR=#0000ff]hyper_link.href[/COLOR]) Then AL.Add [COLOR=#0000ff]hyper_link.href[/COLOR]
        End If
    Next
Next i

IE.Quit
Range("A1").Resize(AL.Count, 1).Value = Application.Transpose(AL.toarray)
End Sub
 
Upvote 0
It fails on this
Code:
Set AL = CreateObject("System.Collections.ArrayList")

Is it because of references?
 
Upvote 0
I'm guessing an old version of Excel or something. Try this one.

Code:
Sub Getalllinks()
Dim IE As Object: Set IE = CreateObject("InternetExplorer.Application")
Dim cnt As Long: cnt = 1
Dim AR() As Variant: AR = Range("E4:E" & Range("E" & Rows.Count).End(xlUp).Row).Value
Dim AL() As Variant
Dim url_name As String

IE.Visible = False

For i = LBound(AR) To UBound(AR)
    url_name = AR(i, 1)

    If url_name = "" Then Exit For
    IE.navigate (url_name)

    Do
        DoEvents
    Loop Until IE.ReadyState = 4

    Set AllHyperlinks = IE.Document.getElementsByTagName("A")
    Sheet1.ListBox1.Clear

    For Each hyper_link In AllHyperlinks
        If InStr(hyper_link.href, "tesco.com/") Then
            ReDim Preserve AL(1 To cnt)
            AL(cnt) = hyper_link.href
            cnt = cnt + 1
        End If
    Next
Next i

IE.Quit
Range("A1").Resize(UBound(AL), 1).Value = Application.Transpose(AL)
End Sub
 
Upvote 0
That's that is great, I am obtaining duplicate links, how would I ensure the code above does not return the same twice?

Thanks.
 
Upvote 0

Forum statistics

Threads
1,214,429
Messages
6,119,424
Members
448,896
Latest member
MadMarty

We've detected that you are using an adblocker.

We have a great community of people providing Excel help here, but the hosting costs are enormous. You can help keep this site running by allowing ads on MrExcel.com.
Allow Ads at MrExcel

Which adblocker are you using?

Disable AdBlock

Follow these easy steps to disable AdBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the icon in the browser’s toolbar.
2)Click on the "Pause on this site" option.
Go back

Disable AdBlock Plus

Follow these easy steps to disable AdBlock Plus

1)Click on the icon in the browser’s toolbar.
2)Click on the toggle to disable it for "mrexcel.com".
Go back

Disable uBlock Origin

Follow these easy steps to disable uBlock Origin

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back

Disable uBlock

Follow these easy steps to disable uBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back
Back
Top