word存的html中的unicode中文转码为标准中文
var fs = require("fs"); fs.readFile("page.htm",function(err,data){ var str = data.toString(); var matchs = str.match(/&#[0-9]{5};/gm); for(var i = 0 ; i < matchs.length ; i++){ var ma = matchs[i]; var a = ma.replace("&#","").replace(";",""); a = parseInt(a).toString(16); console.log(unescape('%u'+a)); str = str.replace(ma,unescape("%u"+a)); } fs.writeFile("out.htm",str,function(){ console.log("写入成功"); }) console.log(matchs.length,matchs[0]); })
?