首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 开发语言 > perl python >

python抓取页面图片,该怎么处理

2012-09-10 
python抓取页面图片在http://www.cnblogs.com/flysun/archive/2009/06/16/1504278.html找了个程序,代码如

python抓取页面图片
在http://www.cnblogs.com/flysun/archive/2009/06/16/1504278.html找了个程序,代码如下:
1import win32com.client,time,win32inet,win32file,os
2class ImgDownloader:
3 def __init__(self,url,dir):
4 self.__dir=dir
5 self.__ie=win32com.client.Dispatch('InternetExplorer.Application')
6 self.__ie.Navigate(url)
7 self.__wait__()
8
9 def __wait__(self):
10 while self.__ie.Busy:
11 time.sleep(0.1)
12
13 def start(self):
14 self.__wait__()
15 imgs=self.__ie.Document.getElementsByTagName('img')
16  
17 for i in range(imgs.length):
18 try:
19 cachInfo=win32inet.GetUrlCacheEntryInfo(imgs.src)
20 if cachInfo:
21 path=cachInfo['LocalFileName']
22 pathinfo=path.split('\\')
23 pathinfo.reverse()
24 filename=('[%d]' % i) + pathinfo[0]
25
26 win32file.CopyFile(path,os.path.join(self.__dir,filename),True)
27 except:
28 pass
29 def close(self):
30 self.__ie.Quit()
31
32if __name__=='__main__':
33 d=ImgDownloader('http://image.baidu.com/i?ct=201326592&cl=2&lm=-1&tn=baiduimage&pv=&word=boy&z=0','c:\\temp\\')
34 d.start()
35 d.close()
在IDLE中运行的时候:
Traceback (most recent call last):
  File "E:\catch.py", line 33, in <module>
  d=ImgDownloader('http://www.hao123.com/','E:\\temp\\')
  File "E:\catch.py", line 5, in __init__
  self.__ie=win32com.client.Dispatch('InternetExplorer.Application')
  File "C:\Python32\lib\site-packages\win32com\client\__init__.py", line 95, in Dispatch
  dispatch, userName = dynamic._GetGoodDispatchAndUserName(dispatch,userName,clsctx)
  File "C:\Python32\lib\site-packages\win32com\client\dynamic.py", line 108, in _GetGoodDispatchAndUserName
  return (_GetGoodDispatch(IDispatch, clsctx), userName)
  File "C:\Python32\lib\site-packages\win32com\client\dynamic.py", line 85, in _GetGoodDispatch
  IDispatch = pythoncom.CoCreateInstance(IDispatch, None, clsctx, pythoncom.IID_IDispatch)
pywintypes.com_error: (-2147024893, '系统找不到指定的路径。', None, None)
请问如何解决问题??

[解决办法]
没啥问题了,关键运行代码时候,最好关闭浏览器,而且默认浏览器是IE

Python code
import win32com.client,time,win32inet,win32file,osclass ImgDownloader:    def __init__(self,url,dir):        self.__dir=dir        self.__ie=win32com.client.Dispatch('InternetExplorer.Application.1')        self.__ie.Navigate(url)        self.__ie.Visible = 1        self.__wait__()    def __wait__(self):        while self.__ie.Busy:            time.sleep(0.5)    def start(self):        self.__wait__()        imgs=self.__ie.Document.getElementsByTagName('img')            for i in range(imgs.length):            try:                cachInfo=win32inet.GetUrlCacheEntryInfo(imgs[i].src)                if cachInfo:                    path=cachInfo['LocalFileName']                    pathinfo=path.split('\\')                    pathinfo.reverse()                    filename=('[%d]' % i) + pathinfo[0]                      win32file.CopyFile(path,os.path.join(self.__dir,filename),True)            except:                pass    def close(self):        self.__ie.Quit()if __name__=='__main__':    d=ImgDownloader('http://www.hao123.com/','E:\\temp\\')    d.start()    d.close() 

热点排行