python抓取51job公司名称招聘职位以及网址极速版

2019年12月6日 | 标签:

 

先使用命令安装bs4

sudo easy_install pip

安装pip工具
sudo pip install bs4

# -*- coding: utf8 -*-

import sys

reload(sys)

sys.setdefaultencoding(’utf-8’)

from bs4 import BeautifulSoup
import re,time,urllib2
html=urllib2.urlopen(”http://www.51job.com/shanghai”,timeout=5).read()
soup=BeautifulSoup(html)
div=soup.find(”div”,id=”dataidea_1″)
for links in div.find_all(”a”,title=True):
print links.get(”title”)

print links.get(”href”)
html1=urllib2.urlopen(links.get(”href”),timeout=5).read()
soup1=BeautifulSoup(html1)
div1=soup1.find(”div”,class_=”redline”)
if div1!=None:

for link1 in div1.find_all(”a”,href=True):
print link1.get_text()
if soup1.find(”p”,”txt_font1″)!=None:
if  soup1.find(”p”,”txt_font1″).get_text().find(”tp”)>1:
print soup1.find(”p”,”txt_font1″).get_text()

print “\n”
print “\n”

先使用命令安装bs4

sudo easy_install pip

安装pip工具
sudo pip install bs4

# -*- coding: utf8 -*-

import sys

reload(sys)

sys.setdefaultencoding(’utf-8’)

from bs4 import BeautifulSoup
import re,time,urllib2
html=urllib2.urlopen(”http://www.51job.com/shanghai”,timeout=5).read()
soup=BeautifulSoup(html)
div=soup.find(”div”,id=”dataidea_1″)
for links in div.find_all(”a”,title=True):
print links.get(”title”)

print links.get(”href”)
html1=urllib2.urlopen(links.get(”href”),timeout=5).read()
soup1=BeautifulSoup(html1)
div1=soup1.find(”div”,class_=”redline”)
if div1!=None:

for link1 in div1.find_all(”a”,href=True):
print link1.get_text()
if soup1.find(”p”,”txt_font1″)!=None:
if  soup1.find(”p”,”txt_font1″).get_text().find(”tp”)>1:
print soup1.find(”p”,”txt_font1″).get_text()

print “\n”
print “\n”

目前还没有任何评论.