为什么程序中获取百度搜索引擎的关键字是乱码?
我做了一个简单的测试,当在百度上输入关键字是中文时,程序中的到的关键字为什么是乱码?
1。aa.html
<script language='javascript' type='text/javascript' src='http://localhost:3514/UTF8toGB2312/GetMakeCode.aspx' charset='utf-8'></script>
2。GetMakeCode.aspx
protected void Page_Load(object sender, EventArgs e)
{
Response.Expires = 0;
Response.CacheControl = "no-cache";
if (!this.IsPostBack)
{
StringBuilder strTemp = new StringBuilder();
strTemp.Append("var referrer = escape('http://www.baidu.com/s?wd=%BC%B4%CA%B1%CD%A8');\r\n");
//http://www.baidu.com/s?wd=%BC%B4%CA%B1%CD%A8
//http://www.google.cn/search?hl=zh-CN&q=%E5%8D%B3%E6%97%B6%E9%80%9A&meta=&aq=f&oq=
//http://www.baidu.com/s?wd=www.wangyetong.com
strTemp.Append("alert(referrer);");
strTemp.Append("var scriptcode = \"<iframe frameborder='no' border='0' marginwidth='0' marginheight='0' scrolling='no' src='http://localhost:3514/UTF8toGB2312/SaveScript.aspx?referrer=\"+referrer+\"' style=\'display:none;'></iframe>\"\r\n");
strTemp.Append("document.write(scriptcode);\r\n");
Response.ContentType = "text/plain";
Response.Write(strTemp.ToString());
}
}
3。saveScript.aspx中
<script src="JS/UrlBuilder.js" type="text/javascript" language="javascript"></script>
<script language="javascript" type="text/javascript">
window.onload = function doSearch()
{
var refer = unescape('<%=Request.QueryString["referrer"] %>');
alert(refer);
var url = new UrlBuilder(refer);
var shortUrl = refer.toLowerCase();
var keywords='';
if(shortUrl =='')
{
keywords='0';
}
else
{
if(shortUrl.indexOf('baidu.com')>0||shortUrl.indexOf('baidu.cn')>0)
{
keywords=url.GetValue('wd','gb2312');
}
else if(shortUrl.indexOf('google.com')>0||shortUrl.indexOf('google.cn')>0)
{
keywords=url.GetValue('q','UTF8');
}
else
{
keywords='1';
}
}
alert(keywords);
window.open('http://localhost:3514/UTF8toGB2312/SaveGuestLogs.aspx?referrer='+escape(keywords)+'');
}
</script>
4。saveScriptLogs.aspx中
<script language="javascript" type="text/javascript">
alert("aaaaaaaaaaaaaa");
var referror = '<%=Request.QueryString["referrer"] %>';
// var referror = '<%=refer %>';
alert(referror);
</script>
5。URLBuilder.js
// JScript 文件
function UrlBuilder(url)
{
this.m_Href = null;
this.m_Host = null;
this.m_Hostname = null;
this.m_Port = null;
this.m_Protocol = null;
this.m_Path = null;
this.m_Search = null;
this.m_Hash = null;
this.m_Params = null;
this.m_Sucess = false;
if ( url ) this.Parse(url);
this.toString = function()
{
return '[class UrlBuilder]';
};
}
UrlBuilder.prototype.Parse = function(url)
{
var m = url.match(/(\w{3,5}:)\/\/([^\.]+(?:\.[^\.:/]+)+)(?::(\d{1,5}))?\/?/);
if ( m )
{
this.m_Protocol = m[1];
this.m_Hostname = m[2];
this.m_Port = m[3];
if ( this.m_Port )
{
this.m_Host = this.m_Hostname + ':' + this.m_Port;
}
else
{
this.m_Host = m[2];
}
var indexHash = url.indexOf('#');
if ( indexHash != -1 )
{
this.m_Hash = url.substr(indexHash);
}
else
{
this.m_Hash = '';
}
var indexParams = url.indexOf('?');
if ( indexParams != -1 )
{
if ( indexHash != -1 )
{
this.m_Search = url.substring(indexParams, indexHash);
}
else
{
this.m_Search = url.substr(indexParams);
}
this.m_Path = url.substr(indexParams);
}
else
{
this.m_Search = '';
}
this.m_Success = true;
this.m_Params = null;
this.m_Href = url;
}
};
UrlBuilder.prototype.GetValue = function(key, encoding)
{
if ( !this.m_Params )
{
if ( this.m_Search )
{
this.m_Params = {};
var search = this.m_Search.substring(1);
var keyValues = search.split('&');
for ( var i=0 ; i < keyValues.length ; ++i )
{
var keyValue = keyValues[i];
var index = keyValue.indexOf('=');
if ( index != -1 )
{
this.m_Params[keyValue.substring(0, index)] = keyValue.substr(index+1);
}
else
{
this.m_Params[keyValue] = '';
}
}
}
}
//alert(this.m_Params[key]);
encoding = encoding || '';
alert(encoding.toUpperCase());
switch(encoding.toUpperCase())
{
case 'UTF8' :
{
return decodeURI(this.m_Params[key]);
}
case 'UNICODE' :
{
return unescape(this.m_Params[key]);
}
case 'GB2312' : // need VBScript function Chr()
{
return decodeURIComponent(this.m_Params[key]);//URLEncoding(this.m_Params[key])
}
default :
{
return this.m_Params[key];
}
}
}
建一个解决方案,把这些代码放到解决方案里,就可以运行了,最后一个弹出的内容就是我要得到的关键字,如何让这个关键字不为乱码?
谢谢,急需解决?
[解决办法]
因为baidu的编码是gb2312,而你做的默认编码是utf-8,自然出现乱码了。
[解决办法]
就是,百度的是gb2312
[解决办法]
url解码
[解决办法]
虽然乱码 但是程序还是回识别的 只是显示乱码而已
[解决办法]
你在URL时进行编码看看我发的博里面有关与编码的方法!
[解决办法]