求超大txt文档读取的例子
一个几百M的txt,想按行分割成字符串数组,最好能去掉空行,速度最快的方法,求例子啊~~~~~~
[解决办法]
仅供参考,尽管是C
//文件1中的内容排序并去重,结果保存到文件2中#include <stdio.h>#include <stdlib.h>#include <string.h>#define MAXCHARS 128 //能处理的最大行宽,包括行尾的\n和字符串尾的\0int MAXLINES=10000,MAXLINES2;char *buf,*buf2;int c,n,hh,i,L;FILE *f;char ln[MAXCHARS];int ignore_case=0;int icompare(const void *arg1,const void *arg2) { return stricmp((char *)arg1,(char *)arg2);}int compare(const void *arg1,const void *arg2) { return strcmp((char *)arg1,(char *)arg2);}int main(int argc,char **argv) { if (argc<3) { printf("Unique line. Designed by zhao4zhong1@163.com. 2012-08-20\n"); printf("Usage: %s src.txt uniqued.txt [-i]\n",argv[0]); return 1; } if (argc>3) ignore_case=1;//若存在命令行参数3,忽略大小写 f=fopen(argv[1],"r"); if (NULL==f) { printf("Can not find file %s!\n",argv[1]); return 1; } buf=(char *)malloc(MAXLINES*MAXCHARS); if (NULL==buf) { fclose(f); printf("Can not malloc(%d LINES*%d CHARS)!\n",MAXLINES,MAXCHARS); return 2; } n=0; hh=0; i=0; while (1) { if (NULL==fgets(ln,MAXCHARS,f)) break;// hh++; L=strlen(ln)-1; if ('\n'!=ln[L]) {//超长行忽略后面内容 printf("%s Line %d too long(>%d),spilth ignored.\n",argv[1],hh,MAXCHARS); while (1) { c=fgetc(f); if ('\n'==c || EOF==c) break;// } } while (1) {//去掉行尾的'\n'和空格 if ('\n'==ln[L] || ' '==ln[L]) { ln[L]=0; L--; if (L<0) break;// } else break;// } if (L>=0) { strcpy(buf+i,ln);i+=MAXCHARS; n++; if (n>=MAXLINES) { MAXLINES2=MAXLINES*2; if (MAXLINES2==1280000) MAXLINES2=2500000; buf2=(char *)realloc(buf,MAXLINES2*MAXCHARS); if (NULL==buf2) { printf("Can not malloc(%d LINES*%d CHARS)!\n",MAXLINES2,MAXCHARS); printf("WARNING: Lines >%d ignored.\n",MAXLINES); break;// } buf=buf2; MAXLINES=MAXLINES2; } } } fclose(f); if (n>1) { if (ignore_case) qsort(buf,n,MAXCHARS,icompare); else qsort(buf,n,MAXCHARS,compare); } f=fopen(argv[2],"w"); if (NULL==f) { free(buf); printf("Can not create file %s!\n",argv[2]); return 2; } fprintf(f,"%s\n",buf); if (n>1) { if (ignore_case) { hh=0; L=MAXCHARS; for (i=1;i<n;i++) { if (stricmp((const char *)buf+hh,(const char *)buf+L)) { fprintf(f,"%s\n",buf+L); } hh=L; L+=MAXCHARS; } } else { hh=0; L=MAXCHARS; for (i=1;i<n;i++) { if ( strcmp((const char *)buf+hh,(const char *)buf+L)) { fprintf(f,"%s\n",buf+L); } hh=L; L+=MAXCHARS; } } } fclose(f); free(buf); return 0;}