Python笔记1-HTTP Download

2012-10-09

Python札记1-HTTP Download# -*- coding: utf-8 -*-__author__ gullimport os, urllib2, log_factoryf

Python札记1-HTTP Download

# -*- coding: utf-8 -*-__author__ = 'gull'import os, urllib2, log_factoryfrom urlparse import urlsplitdef get(url, filePath, fileName = None, buffer = 16 * 1024):    log = log_factory.getLogger()    log.info("send http request to %s", url)    def writefile(fsrc, fdst, totalLength):        """copy data from file-like object fsrc to file-like object fdst"""        if not totalLength:            totalLength = "?"        else:            totalLength = float(totalLength)        bytesRead = 0.0        while 1:            buf = fsrc.read(buffer)            if not buf:                break            fdst.write(buf)            bytesRead += len(buf)            if totalLength != "?":                log.info("%s: %.02f/%.02f kb (%d%%)" % (                    fileName,                    bytesRead / 1024.0,                    totalLength / 1024.0,                    100 * bytesRead / totalLength                    ))            else:                log.info("%s: %.02f/? kb (?%%)" % (                    fileName,                    bytesRead / 1024.0                    ))    def getFileName(openUrl):        if 'Content-Disposition' in openUrl.info():            # If the response has Content-Disposition, try to get filename from it            cd = dict(map(                lambda x: x.strip().split('=') if '=' in x else (x.strip(), ''),                openUrl.info().split(';')))            if 'filename' in cd:                filename = cd['filename'].strip(""'")                if filename: return filename            # if no filename was found above, parse it out of the final URL.        return os.path.basename(urlsplit(openUrl.url)[2])    def getFileLength(openUrl):        return openUrl.info().getheader("Content-Length")    r = urllib2.urlopen(urllib2.Request(url), timeout = 120) #timeout is 120s    try:        fileName = fileName or getFileName(r)        fullfileName = "%s%s%s" % (filePath, os.path.sep, fileName)        totalLength = getFileLength(r)        log.info("write response date to %s", fullfileName)        with open(fullfileName, 'wb') as f:            writefile(r, f, totalLength)        return fileName, totalLength, fullfileName    finally:        r.close()    log.info("http request finished.")

参数说明：

url:即下载路径，如http://apache.etoak.com/tomcat/tomcat-7/v7.0.20/bin/apache-tomcat-7.0.20.tar.gzfilePath:下载文件保存的文件夹fileName:下载后保存的文件名，可选参数。若为空，则会却reponse header中的filename信息（如下图） Python笔记1-HTTP Download

继续判空，则取url后缀名（如:apache-tomcat-7.0.20.tar.gz)

buffer:下载缓冲区大小，默认16k

可继续加入以下特性:

支持代理文件分块，多线程下载异步下载，回调机制...

热点排行

perl python

Python笔记1-HTTP Download