UnicodeEncodeError的问题,求救啊,已经百度google过,还是有问题
运行环境:vps centos 6.0
使用putty远程ssh运行显示:
UnicodeEncodeError: 'ascii' codec can't encode characters in position 190-277: ordinal not in range(128)
使用liux远程ssh运行
UnicodeEncodeError: 'latin-1' codec can't encode characters in position 190-277: ordinal not in range(128)
#coding:utf-8import sysreload(sys)sys.setdefaultencoding('utf-8')import urllib2import chardetdef getWebContent2(url): print(url) req = urllib2.urlopen(url, timeout=20) content=req.read() print("code:"+chardet.detect(content)['encoding']) print(content) if chardet.detect(content)['encoding']=='ISO-8859-2': ucontent = unicode(content,'utf-8') else: ucontent = unicode(content,chardet.detect(content)['encoding'],'replace') return ucontentif __name__=="__main__": print("Debug:test qs_getweb.py") url="hao123.com" response = getWebContent2("http://www.%s" % url) print(response)
#coding:utf-8import urllib2import chardetdef getWebContent2(url): print(url) req = urllib2.urlopen(url, timeout=40) content=req.read() mychar=chardet.detect(content) #print mychar bianma=mychar['encoding'] if bianma == 'utf-8' or bianma == 'UTF-8': #html=html.decode('utf-8','ignore').encode('utf-8') ucontent=content else : #看hao123编码是gbk ucontent=content.decode('gbk','ignore').encode('utf-8') #html =html.decode('gb2312').encode('utf-8') return ucontentif __name__=="__main__": print("Debug:test qs_getweb.py") #url="hao123.com" #response = getWebContent2("http://www.%s" % url) response = getWebContent2("http://www.hao123.com") print(response)