黄图是怎么被python给爬下来的
本人一向喜欢看美女图片,希望能够下载下来,找到一个黄网,用python把上面的图片给爬下来
执行效果如图(文件夹中的图片就不给大家看了,少儿不宜):
代码如下:
#-*- coding:utf-8 -*-
import sys
import time
import os
import HTMLParser
import urllib
import sys
import socket
socket.setdefaulttimeout(15)
urlString=['http://www.99yeye.com/']
save_path = os.path.abspath("./Download")
if not os.path.exists(save_path):
os.mkdir(save_path)
def getImage(addr):
try:
u = urllib.urlopen(addr)
data = u.read()
splitPath = addr.split('/')
fName = splitPath.pop()
if not os.path.exists("./Download"+fName):
print "Saving %s" % fName
img_file=os.path.join(save_path,fName)
f = open(img_file, 'wb')
f.write(data)
f.close()
print "download-end!!!!!!!!"
except Exception, e:
pass
class app_url(HTMLParser.HTMLParser):
def handle_starttag(self, tag, attrs):
if tag == "a":
for name ,value in attrs:
if name=="href" and value not in urlString:
if value.startswith("http:"):
urlString.append(value)
else:
urlString.append(urlString[0]+value)
#定义HTML解析器
class parseImages(HTMLParser.HTMLParser):
def handle_starttag(self, tag, attrs):
if tag == 'img':
for name,value in attrs:
if name == 'src':
if value.startswith("http:"):
getImage(value)
else:
getImage(i+'/'+value)
#创建HTML解析器的实例
lParser = app_url()
#打开HTML文件
u = urllib.urlopen(urlString[0])
print "Opening URL/n===================="
#把HTML文件传给解析器
lParser.feed(u.read())
lParser.close()
print "##############################################################################"
print urlString
gg=parseImages()
for i in urlString:
print i
u=urllib.urlopen(i)
print u.info()
#把HTML文件传给解析器
gg.feed(u.read())
gg.close()
continue