C++ unicode编码转换
如何将一段unicode编码的字符串转成对应的字符,如:将\u4f17\u4eba\u5a31\u4e50
转为对应的字符串:众人娱乐
求解,谢谢大家 Unicode 解码 编码
[解决办法]
用iconv转UTF-8为GB2312,参考如下代码:
#include <stdlib.h>
#include <string.h>
#include <iconv.h>
#include <stdio.h>
#define hz_offset(hz) (((hz[0] - 0xa1) * 94 + (hz[1] - 0xa1)) * 32)
/* 字符串转码 */
int decoding(char* from_charset, char* to_charset, char* inbuf, int inlen, char* outbuf, int outlen)
{
iconv_t cd;
char** pin = &inbuf;
char** pout = &outbuf;
if((cd = iconv_open(to_charset, from_charset)) == (iconv_t)0)
{
return -1;
}
memset(outbuf, 0, outlen);
if(iconv(cd, pin, &inlen, pout, &outlen) == -1)
{
iconv_close(cd);
return -1;
}
iconv_close(cd);
return 0;
}
/* 汉字从UTF-8转为GB2312编码 */
int hz_decoding(char* hz_utf8, char* hz_gb2312)
{
return decoding("utf-8", "gb2312", hz_utf8, 3, hz_gb2312, 3);
}
/* 打印汉字 */
void hz_show(unsigned char* datas)
{
int i;
int j;
int k;
unsigned char ch;
for(i = 0; i < 16; i++)
{
for(j = 0; j < 2; j++)
{
ch = datas[i * 2 + j];
for(k = 0; k < 8; k++)
{
if(ch & 0x80)
{
printf("%c ", '*');
}
else
{
printf(" ");
}
ch <<= 1;
}
}
printf("\n");
}
}
int main(int argc, char* argv[])
{
FILE* hzk;
unsigned char hz_utf8[3] = "宋";
unsigned char hz_gb2312[3];
unsigned char hz_datas[32];
hz_decoding(hz_utf8, hz_gb2312);
printf("%x, %x\n", hz_gb2312[0], hz_gb2312[1]);
if((hzk = fopen("simhei_gb2312_16.ds", "rb")) == NULL)
{
return -1;
}
fseek(hzk, hz_offset(hz_gb2312), SEEK_SET);
fread(hz_datas, 32, 1, hzk);
hz_show(hz_datas);
fclose(hzk);
return 0;
}
//将文件1中所有'\uAABB'替换为'\xBB'+'\xAA','C'替换为'C\x00',且文件头加'\xFF\xFE',结果保存到文件2中。
#include <stdio.h>
FILE *f1,*f2;
int c,s,n,i;
char fuAABB[7];
int AA,BB;
void main(int argc,char **argv) {
if (argc<3) {
printf("%s 文件1 文件2\n将文件1中所有"\\uAABB"替换为"\\xBB\\xAA",\'C\'替换为"C\\x00",且文件头加"\\xFF\\xFE",结果保存到文件2中。\n",argv[0]);
return;
}
f1=fopen(argv[1],"rb");
if (NULL==f1) {
printf("找不到文件[%s]!\n",argv[1]);
return;
}
f2=fopen(argv[2],"wb");
if (NULL==f2) {
fclose(f1);
printf("创建文件[%s]出错!\n",argv[2]);
return;
}
fuAABB[6]=0;
fputc(0xFF,f2);fputc(0xFE,f2);
s=0;
while (1) {
c=fgetc(f1);
if (EOF==c) {
for (i=0;i<s;i++) {fputc(fuAABB[i],f2);fputc(0,f2);}
break;//
}
switch (s) {
case 0:
if ('\\'==c) {
n=0;
fuAABB[n++]=c;
s=1;
} else {
fputc(c,f2);fputc(0,f2);
}
break;
case 1:
if ('u'==c) {
fuAABB[n++]=c;
s=2;
} else {
for (i=0;i<n;i++) {fputc(fuAABB[i],f2);fputc(0,f2);}
fputc(c,f2);fputc(0,f2);
s=0;
}
break;
case 2:
if (('0'<=c&&c<='9')
[解决办法]
('A'<=c&&c<='F')
[解决办法]
('a'<=c&&c<='f')) {
fuAABB[n++]=c;
s=3;
} else {
for (i=0;i<n;i++) {fputc(fuAABB[i],f2);fputc(0,f2);}
fputc(c,f2);fputc(0,f2);
s=0;
}
break;
case 3:
if (('0'<=c&&c<='9')
[解决办法]
('A'<=c&&c<='F')
[解决办法]
('a'<=c&&c<='f')) {
fuAABB[n++]=c;
s=4;
} else {
for (i=0;i<n;i++) {fputc(fuAABB[i],f2);fputc(0,f2);}
fputc(c,f2);fputc(0,f2);
s=0;
}
break;
case 4:
if (('0'<=c&&c<='9')
[解决办法]
('A'<=c&&c<='F')
[解决办法]
('a'<=c&&c<='f')) {
fuAABB[n++]=c;
s=5;
} else {
for (i=0;i<n;i++) {fputc(fuAABB[i],f2);fputc(0,f2);}
fputc(c,f2);fputc(0,f2);
s=0;
}
break;
case 5:
if (('0'<=c&&c<='9')
[解决办法]
('A'<=c&&c<='F')
[解决办法]
('a'<=c&&c<='f')) {
fuAABB[n++]=c;
sscanf(fuAABB+2,"%02x%02x",&AA,&BB);
fputc(BB,f2);fputc(AA,f2);
s=0;
} else {
for (i=0;i<n;i++) {fputc(fuAABB[i],f2);fputc(0,f2);}
fputc(c,f2);fputc(0,f2);
s=0;
}
break;
}
}
fclose(f2);
fclose(f1);
printf("%s %s %s OK.\n",argv[0],argv[1],argv[2]);
}