标题:Excel 宏系列之抓取ad copy 百度广告位

-------------------------------------------------------------------------------------------------------------------------------

时间:2012/1/15 19:15:22

-------------------------------------------------------------------------------------------------------------------------------

内容:

Sub zhuaqusem()
    Dim tmp() As String, p, tmp1() As String, brr() As String, o, r, crr() As String, err() As String, drr() As String, i As Integer, g As Integer, j As Integer, arr() As String, arr1() As String, xmlhttp As Object, m&, n&, z() As String
    ' tmp 放左边结果 tmp1 放右边结果,brr放左侧title crr左侧放描述 drr放右侧title err放右侧描述
'  keywords change from colomn to row
        Range("A2").Select
    Range(Selection, Selection.End(xlDown)).Select
    Selection.Copy
    Range("B1").Select
    Selection.PasteSpecial Paste:=xlPasteAll, Operation:=xlNone, SkipBlanks:= _
        False, Transpose:=True
       
       ' 查询有多少关键词需要搜素 放到j里面
      
    j = WorksheetFunction.CountA([a:a])
    p = Cells(1, 1)
  For g = 1 To j
 
   
    Set xmlhttp = CreateObject("Microsoft.XMLHTTP")
    With xmlhttp
  ' 查看搜索结果页面
   
        .Open "get", "http://www.baidu.com/s?wd=" & Cells(1, g + 1) & "&pn=0&rn=", False
        .send
       
        ' 左边的广告网站进行剥离 分有没有绿色背景2种情况 假设是模仿自然结果的 不带绿色背景 分割字符是<font size=-1 color="#008000"> -1不带引号
               
tmp = Split(.responsetext, "<font size=-1 color=""#008000"">")
           ' 如果结果为空
                   If UBound(tmp) = 0 Then
           ' 左边广告绿色背景 按另外一种方法分割 分组放到tmp里面
          
        tmp = Split(.responsetext, "<font size=""-1"" color=""#008000"" style=""margin-left:6px;"">")
             ' 因为带绿色背景 所以得到的网站列表是重复了1次 要减半
    ReDim Preserve tmp(Int(UBound(tmp) / 2))
  
        End If
            ' 右边广告进行剥离如果发现出现百度e.baidu.com
            z = Split(.responsetext, "<font color=""#008000"" size=""-1"">e.baidu.com</font></a>")
            If UBound(z) <> 0 Then
       '那么对前面的代码进行分组右侧广告 装入tmp1分列字符串<font size="-1" color="#008000"> tmp1 放右侧结果
        tmp1 = Split(z(0), "<font size=""-1"" color=""#008000"">")
  
            End If
       
    End With
   
    For i = 1 To UBound(tmp)
     
             m = m + 1
                  ' 把左测url结果放到arr里面
             ReDim Preserve arr(1 To m)
                    ReDim Preserve brr(1 To m)
                           ReDim Preserve crr(1 To m)
                ' remove the <in  the back
          arr(m) = Split(tmp(i), "<")(0)
           arr(m) = Split(arr(m), " ")(0) ' remove the space in  the back
           arr(m) = Split(arr(m), "/")(0) ' remove /
          
           ' 把标题结果放到brr里面 描述放crr
           brr(m) = Split(Split(tmp(i - 1), "<table id=""30")(1), "<br>")(0)
           brr(m) = RemoveHTML("<" & brr(m))
          
          
            crr(m) = Split(Split(tmp(i - 1), "<table id=""30")(1), "<br>")(1)
           crr(m) = RemoveHTML("<" & crr(m))
    
    Next
 
     ' 把左侧广告网址开始装入
          ' 如果左测没广告 就开始装右侧广告
  If UBound(tmp) = 0 Then
     m = 1

    Else:
    '装入左测广告
    r = 2
    For i = 2 To UBound(arr)
   'arr网站 brr放标题 crr放描述
    Cells(r, g + 1) = brr(i)
    Cells(r, g + 1).Font.ColorIndex = 5
   
      Cells(r, g + 1).WrapText = False
       
    Cells(r + 1, g + 1) = crr(i)
    Cells(r + 1, g + 1).WrapText = True
    Cells(r + 1, g + 1).Rows.RowHeight = 28
   
   
      Cells(r + 2, g + 1) = arr(i)
      Cells(r + 2, g + 1).Font.ColorIndex = 10
     
     
      ''  Cells(2, g + 1).Resize(UBound(arr), 1) = Application.Transpose(arr)
      '  Cells(2, g + 1).Resize(UBound(arr), 1) = Application.Transpose(brr)
       ' Cells(2, g + 1).Resize(UBound(arr), 1) = Application.Transpose(crr)
         '左侧广告变色蓝色
                 '  Range(Cells(2, g + 1), Cells((1 + UBound(arr)), g + 1)).Interior.ColorIndex = 10
     r = r + 4
      Next
   End If
      Erase tmp
     Erase arr

        For i = 1 To UBound(tmp1)
        On Error Resume Next '有问题的地方
       
  n = n + 1
        
             ReDim Preserve arr1(1 To n)
             ReDim Preserve drr(1 To n)
              ReDim Preserve err(1 To n)
         ' 把右侧网站结果放到arr1里面去
            arr1(n) = Split(tmp1(i), "<")(0)
            arr1(n) = Split(arr1(n), "/")(0)
            drr(n) = Split(Split(tmp1(i), "<div id=""bdfs")(1), "<br>")(0)
            drr(n) = RemoveHTML(drr(n))
            'drr 发标题 err 放描述
  err(n) = Split(Split(tmp1(i), "<div id=""bdfs")(1), "<br>")(1)
  err(n) = RemoveHTML(err(n))
           
    Next
   
        ' take website list in the below of righside sem  into colomn
           ' if could get any results give up
        If UBound(tmp1) <> 0 Then
        '右侧网站放入进去 从左侧广告的数的行数开始向下放
        o = 4 * m + 1
        For n = 1 To UBound(arr1)
              'drr 发标题 err 放描述
    Cells(o, g + 1) = drr(i)
   
       Cells(o, g + 1).Font.ColorIndex = 5
          Cells(o, g + 1).WrapText = False
         
         
    Cells(o + 1, g + 1) = err(i)
    Cells(o + 1, g + 1).WrapText = True
    Cells(o + 1, g + 1).Rows.RowHeight = 28
    'URl
     Cells(o + 2, g + 1) = arr1(i)
         Cells(o + 2, g + 1).Font.ColorIndex = 10
        
   o = o + 4
   
   
    Next
   
   
    End If
   
    
   
   

   
    Erase tmp1
    Erase arr1
    Set xmlhttp = Nothing
    m = 0
    n = 0
    Cells(1, 1) = g & " of " & j
   
Next
      For i = 2 To 20
   Cells(i, 1) = i - 1
   Next
Cells(1, 1) = p
End Sub

Sub zhuaqusem()
    Dim tmp() As String, p, tmp1() As String, brr() As String, o, r, crr() As String, err() As String, drr() As String, i As Integer, g As Integer, j As Integer, arr() As String, arr1() As String, xmlhttp As Object, m&, n&, z() As String
    ' tmp 放左边结果 tmp1 放右边结果,brr放左侧title crr左侧放描述 drr放右侧title err放右侧描述
'  keywords change from colomn to row
        Range("A2").Select
    Range(Selection, Selection.End(xlDown)).Select
    Selection.Copy
    Range("B1").Select
    Selection.PasteSpecial Paste:=xlPasteAll, Operation:=xlNone, SkipBlanks:= _
        False, Transpose:=True
       
       ' 查询有多少关键词需要搜素 放到j里面
      
    j = WorksheetFunction.CountA([a:a])
    p = Cells(1, 1)
  For g = 1 To j
 
   
    Set xmlhttp = CreateObject("Microsoft.XMLHTTP")
    With xmlhttp
  ' 查看搜索结果页面
   
        .Open "get", "http://www.baidu.com/s?wd=" & Cells(1, g + 1) & "&pn=0&rn=", False
        .send
       
        ' 左边的广告网站进行剥离 分有没有绿色背景2种情况 假设是模仿自然结果的 不带绿色背景 分割字符是<font size=-1 color="#008000"> -1不带引号
               
tmp = Split(.responsetext, "<font size=-1 color=""#008000"">")
           ' 如果结果为空
                   If UBound(tmp) = 0 Then
           ' 左边广告绿色背景 按另外一种方法分割 分组放到tmp里面
          
        tmp = Split(.responsetext, "<font size=""-1"" color=""#008000"" style=""margin-left:6px;"">")
             ' 因为带绿色背景 所以得到的网站列表是重复了1次 要减半
    ReDim Preserve tmp(Int(UBound(tmp) / 2))
  
        End If
            ' 右边广告进行剥离如果发现出现百度e.baidu.com
            z = Split(.responsetext, "<font color=""#008000"" size=""-1"">e.baidu.com</font></a>")
            If UBound(z) <> 0 Then
       '那么对前面的代码进行分组右侧广告 装入tmp1分列字符串<font size="-1" color="#008000"> tmp1 放右侧结果
        tmp1 = Split(z(0), "<font size=""-1"" color=""#008000"">")
  
            End If
       
    End With
   
    For i = 1 To UBound(tmp)
     
             m = m + 1
                  ' 把左测url结果放到arr里面
             ReDim Preserve arr(1 To m)
                    ReDim Preserve brr(1 To m)
                           ReDim Preserve crr(1 To m)
                ' remove the <in  the back
          arr(m) = Split(tmp(i), "<")(0)
           arr(m) = Split(arr(m), " ")(0) ' remove the space in  the back
           arr(m) = Split(arr(m), "/")(0) ' remove /
          
           ' 把标题结果放到brr里面 描述放crr
           brr(m) = Split(Split(tmp(i - 1), "<table id=""30")(1), "<br>")(0)
           brr(m) = RemoveHTML("<" & brr(m))
          
          
            crr(m) = Split(Split(tmp(i - 1), "<table id=""30")(1), "<br>")(1)
           crr(m) = RemoveHTML("<" & crr(m))
    
    Next
 
     ' 把左侧广告网址开始装入
          ' 如果左测没广告 就开始装右侧广告
  If UBound(tmp) = 0 Then
     m = 1

    Else:
    '装入左测广告
    r = 2
    For i = 2 To UBound(arr)
   'arr网站 brr放标题 crr放描述
    Cells(r, g + 1) = brr(i)
    Cells(r, g + 1).Font.ColorIndex = 5
   
      Cells(r, g + 1).WrapText = False
       
    Cells(r + 1, g + 1) = crr(i)
    Cells(r + 1, g + 1).WrapText = True
    Cells(r + 1, g + 1).Rows.RowHeight = 28
   
   
      Cells(r + 2, g + 1) = arr(i)
      Cells(r + 2, g + 1).Font.ColorIndex = 10
     
     
      ''  Cells(2, g + 1).Resize(UBound(arr), 1) = Application.Transpose(arr)
      '  Cells(2, g + 1).Resize(UBound(arr), 1) = Application.Transpose(brr)
       ' Cells(2, g + 1).Resize(UBound(arr), 1) = Application.Transpose(crr)
         '左侧广告变色蓝色
                 '  Range(Cells(2, g + 1), Cells((1 + UBound(arr)), g + 1)).Interior.ColorIndex = 10
     r = r + 4
      Next
   End If
      Erase tmp
     Erase arr

        For i = 1 To UBound(tmp1)
        On Error Resume Next '有问题的地方
       
  n = n + 1
        
             ReDim Preserve arr1(1 To n)
             ReDim Preserve drr(1 To n)
              ReDim Preserve err(1 To n)
         ' 把右侧网站结果放到arr1里面去
            arr1(n) = Split(tmp1(i), "<")(0)
            arr1(n) = Split(arr1(n), "/")(0)
            drr(n) = Split(Split(tmp1(i), "<div id=""bdfs")(1), "<br>")(0)
            drr(n) = RemoveHTML(drr(n))
            'drr 发标题 err 放描述
  err(n) = Split(Split(tmp1(i), "<div id=""bdfs")(1), "<br>")(1)
  err(n) = RemoveHTML(err(n))
           
    Next
   
        ' take website list in the below of righside sem  into colomn
           ' if could get any results give up
        If UBound(tmp1) <> 0 Then
        '右侧网站放入进去 从左侧广告的数的行数开始向下放
        o = 4 * m + 1
        For n = 1 To UBound(arr1)
              'drr 发标题 err 放描述
    Cells(o, g + 1) = drr(i)
   
       Cells(o, g + 1).Font.ColorIndex = 5
          Cells(o, g + 1).WrapText = False
         
         
    Cells(o + 1, g + 1) = err(i)
    Cells(o + 1, g + 1).WrapText = True
    Cells(o + 1, g + 1).Rows.RowHeight = 28
    'URl
     Cells(o + 2, g + 1) = arr1(i)
         Cells(o + 2, g + 1).Font.ColorIndex = 10
        
   o = o + 4
   
   
    Next
   
   
    End If
   
    
   
   

   
    Erase tmp1
    Erase arr1
    Set xmlhttp = Nothing
    m = 0
    n = 0
    Cells(1, 1) = g & " of " & j
   
Next
      For i = 2 To 20
   Cells(i, 1) = i - 1
   Next
Cells(1, 1) = p
End Sub