python.jpg
1.打印网页数据
#coding=utf-8
'''
将一个网页的所有文字全部打印
'''
import urllib
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
html =
print html
2.筛选网页数据
#coding=utf-8
'''
筛选页面中想要的数据:Python 提供了非常强大的正则表达式,对数据进行筛选
'''
import re
import urllib
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
def getImg(html):
reg = r'src="(.+?\.jpg)"'
imgre =
imglist = re.findall(imgre,html)
return imglist
html =
print getImg(html)
3.下载网页图片
#coding=utf-8
'''
筛选页面中想要的数据:Python 提供了非常强大的正则表达式,对数据进行筛选
'''
import re
import urllib
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
def getImg(html):
reg = r'src="(.+?\.jpg)"'
imgre =
imglist = re.findall(imgre,html)
x = 0
for imgurl in imglist:
urllib.urlretrieve(imgurl,'%s.jpg' % x)
x+=1
html =
print getImg(html)