楼主 minren118 |
本人做淘宝电商,经常会在淘宝首页的搜索中抓取些信息,经过学习和参考社区里的前辈的指引,作成了一个小小的作品,可以在淘宝首页上抓取信息,不过属于半自动,图片方面只能用正则把图片地址抓取下来,再用迅雷工具批量下载,有兴趣的朋友们可以下载看看,最大的知识点是<span style="line-height: 1.5;">Microsoft.XMLHTTP 和正则,小批量数据抓取还算效果不错。也欢迎大侠们指点更好的思路,因代码之前写的是自用的,备注写得有些潦草,有疑问的地方可以跟帖提出,刚刚开始分享自己工作中的EXCEL应用,可能都比较嫩,不到位的地方请多多前辈们包涵。</span>
- Sub 搜索淘宝低价()
- Dim url, a As Long, b As Long, i As Integer, j As Integer, arr1(), matchs, tt As String
- Dim item As Integer, num
- Dim reg As Object, regnum
- url = InputBox("请输入抓取网址")
- num = InputBox("请输入抓取页数,一页40条宝贝", , 3)
- 'url = "http://s.taobao.com/search?spm=a230r.1.7.1.ywgunN&promote=0&sort=sale-desc&initiative_id=tbindexz_20130814&tab=all&q=%BE%C5%D1%F4+%D4%AD%D6%AD%BB%FA+E16&source=suggest&suggest=history_1&s="
- 'num = 3
-
- For i = 0 To num - 1
- a = Range("a" & Rows.Count).End(xlUp).Row '开始行
- Application.StatusBar = "现在第" & i & "个循环"
- With CreateObject("Microsoft.XMLHTTP")
- .Open "GET", Split(url, "s=")(0) & "s=" & i * 40 & "#J_relative", True
- .send
- Do Until .ReadyState = 4
- DoEvents
- Loop
- tt = .responseText
- End With
- Set reg = CreateObject("VBSCRIPT.REGEXP")
-
- With reg
- .Pattern = "<h3 class=.+?href=""(.+?)"".+?title=""(.+?)""" '查找标题、宝贝链接
- .Global = True
- .IgnoreCase = False
- If .Test(tt) Then
- Set matchs = .Execute(tt)
- regnum = matchs.Count
- ReDim arr1(1 To regnum, 1 To 9)
- For j = 1 To regnum
- arr1(j, 1) = Date
- arr1(j, 3) = matchs(j - 1).submatches(1)
- arr1(j, 4) = matchs(j - 1).submatches(0)
- Next
- End If
-
-
- End With
- With reg
- .Pattern = "lazyload=""(.+)""" '查找图片链接
- .Global = True
- .IgnoreCase = False
- If .Test(tt) Then
- Set matchs = .Execute(tt)
- For j = 1 To regnum
- arr1(j, 2) = matchs(j - 1).submatches(0)
- arr1(j, 9) = Split(matchs(j - 1).submatches(0), "/")(UBound(Split(matchs(j - 1).submatches(0), "/")))
- Next
- End If
- End With
-
- With reg
- .Pattern = "user_number_id=.+?>(.+?)<" '查找店铺名
- .Global = True
- .IgnoreCase = False
- If .Test(tt) Then
- Set matchs = .Execute(tt)
- For j = 1 To regnum
- arr1(j, 5) = matchs(j - 1).submatches(0)
- Next
- End If
- End With
- With reg
- .Pattern = "col price"">¥(.+?)<" '价格
- .Global = True
- .IgnoreCase = False
- If .Test(tt) Then
- Set matchs = .Execute(tt)
- For j = 1 To regnum
- arr1(j, 6) = CLng(matchs(j - 1).submatches(0))
- Next
- End If
- End With
-
- With reg
- .Pattern = "最近30天 (\d+)人付款 (\d+)人收货" '成交人数
- .Global = True
- .IgnoreCase = False
- If .Test(tt) Then
- Set matchs = .Execute(tt)
- For j = 1 To regnum
- arr1(j, 7) = CInt(matchs(j - 1).submatches(0))
- arr1(j, 8) = CInt(matchs(j - 1).submatches(1))
- Next
- End If
- End With
- Range("a" & a + 1).Resize(regnum, 9) = arr1
- Next
- Application.StatusBar = False
- End Sub
批量下载淘宝搜索内容v4.zip
|