标题:python抓取51job公司名称招聘职位以及网址极速版

-------------------------------------------------------------------------------------------------------------------------------

时间:2012/11/29 18:06:04

-------------------------------------------------------------------------------------------------------------------------------

内容:

先使用命令安装bs4

sudo easy_install pip


安装pip工具


sudo pip install bs4


# -*- coding: utf8 -*-

import sys

reload(sys) 

sys.setdefaultencoding('utf-8')


from bs4 import BeautifulSoup
import re,time,urllib2
html=urllib2.urlopen("http://www.51job.com/shanghai",timeout=5).read()
soup=BeautifulSoup(html)
div=soup.find("div",id="dataidea_1")
for links in div.find_all("a",title=True):
    print links.get("title")
  
    print links.get("href")
    html1=urllib2.urlopen(links.get("href"),timeout=5).read()
    soup1=BeautifulSoup(html1)
    div1=soup1.find("div",class_="redline")
    if div1!=None:
       
        for link1 in div1.find_all("a",href=True):
             print link1.get_text()
        if soup1.find("p","txt_font1")!=None:
            if  soup1.find("p","txt_font1").get_text().find("tp")>1:
                print soup1.find("p","txt_font1").get_text()

    print "\n"
    print "\n"

先使用命令安装bs4

sudo easy_install pip


安装pip工具


sudo pip install bs4


# -*- coding: utf8 -*-

import sys

reload(sys) 

sys.setdefaultencoding('utf-8')


from bs4 import BeautifulSoup
import re,time,urllib2
html=urllib2.urlopen("http://www.51job.com/shanghai",timeout=5).read()
soup=BeautifulSoup(html)
div=soup.find("div",id="dataidea_1")
for links in div.find_all("a",title=True):
    print links.get("title")
  
    print links.get("href")
    html1=urllib2.urlopen(links.get("href"),timeout=5).read()
    soup1=BeautifulSoup(html1)
    div1=soup1.find("div",class_="redline")
    if div1!=None:
       
        for link1 in div1.find_all("a",href=True):
             print link1.get_text()
        if soup1.find("p","txt_font1")!=None:
            if  soup1.find("p","txt_font1").get_text().find("tp")>1:
                print soup1.find("p","txt_font1").get_text()

    print "\n"
    print "\n"