给一个在txt文件里删除重复单词然后排序的比较好的算法，多谢

2012-02-11

给一个在txt文件里删除重复单词然后排序的比较好的算法，谢谢.txt文件里的内容大概如下,每个词占一行：abcdf

给一个在txt文件里删除重复单词然后排序的比较好的算法，谢谢
.txt文件里的内容大概如下,每个词占一行：

abc
dfe
asdsdf
Abc
er4
abc
dfe
67y
......

程序执行完应是：
abc
asdsdf
dfe
er4
Abc
67y

[解决办法]
用标准库里的sort()和uniq()不好吗？

[解决办法]
stl中的算法，
[解决办法]
//我用字典树给你写了一个算法
//单词可以含有大小写字母和数字，不能包含除此以外的字符
//时间复杂度为o(n)

#include <stdio.h>
#include <malloc.h>

#define c_tire(n) if(s-> next[n]==NULL){\
s-> next[n] = (_tire)malloc(sizeof(struct tire));\
(s-> next[n])-> date = *string;\
s = s-> next[n]; \
for(int i=0;i <62;i++){ \
s-> next[i] = NULL;}} \
else \
s = s-> next[n]

FILE *in,*out;
typedef struct tire
{
struct tire *next[62];
char date;
int cnt;
}*_tire;

void init_tire(_tire root, char *string)
{
_tire s;
s=root;

while(*string!= '\n ' && *string!= '\0 ')
{
if(*string > = 'a ' && *string <= 'z ')
c_tire(*string - 'a ');

else if(*string > = 'A ' && *string <= 'Z ')
c_tire(*string - 'A ' + 26);

else
c_tire(*string - '0 ' + 52);

string++;
}
s-> cnt=1;
}

void print(_tire root, char *s, int i)
{
int j;
s[i] = root-> date;

if(root-> cnt==1)
{
s[i+1] = 0;
puts(s);
s[i+1] = '\n ';
s[i+2] = 0;
fputs(s,out);
}

for(j=0;j <62;j++)
{
if(root-> next[j]!=NULL)
{
print(root-> next[j],s,i+1);
}
}

}

int main()
{
_tire root;
int m,i;
char s[265];

root = (_tire)malloc(sizeof(struct tire));

if((in=fopen( "test.txt ", "r "))==NULL)
{
printf( "cann 't open file\n ");
return 1;
}
if((out=fopen( "out.txt ", "w "))==NULL)
{
printf( "cann 't create file\n ");
return 1;
}

for(i=0;i <62;i++)
{
root-> next[i]=NULL;
}

while(1)
{
if(feof(in)!=0)
{
break;
}
fgets(s,256,in);
printf(s);
init_tire(root,s);
}
puts( "\n\n依字典排序后： ");
for(i=0;i <62;i++)
{
if(root-> next[i] != NULL)
{
print(root-> next[i],s,0);
}
}

printf( "\n程式执行时间：%s\n ",__TIME__ );
fclose(in);
fclose(out);
return 0;
}
[解决办法]
先用sort（）排序；sort(RandomAccessIterator first,RandomAccessIterator second);
将内容排序，然后使用unique()

#include <algorithm>
#include <vector>
#include <string>
#include <iterator>
#include <iostream> //有的时候是: #include <iostream.h>

using namespace std;

//函数模版，用来输出内容到控制台；
template <class Type>
void print_elements(Typt elem){
cout < <elem < < " ";
}

//函数指针，指向print_elements,传递给for_each()来输出
void (*pfi)(int) = print_elements;
void (*pfs)(string)=print_elements;

//main()
int main(){
int ia[]={0, 1, 0, 2, 0, 3, 0, 4, 0, 5};

vector <int> vec(ia,ia+10);//使用数组初始化vecotr对象vec;
vector <int> ::iterator vec_iter;

//排序前使用unique();
//输出：0，1，0，2，0，3，4，0，5;
vec_iter = unique(vec.begin(),vec.end());

//输出vec的值；
for_each(vec.begin(), vec.end(), pfi);
cout < < "\b\b ";

//对vec排序
sort(vec.begin(),vec.end());

//再次使用unique();
//并输出结果：0 1 2 3 4 5 2 3 4 5
//unique()返回结果的最后一个元素的下一个位置的指针,此处指向第七个位置：2；
vec_iter=unique(vec.begin(),vec.end());
for_each(vec.begin(),vec.end(),pfi);
cout < < "\n\n ";

//从容器中删除无效元素；将从vec.iter开始，到vec.end()结束的元素删除
vec.erase(vec.iter,vec.end());
for_each(vec.begin(),vec.end()，pfi);
cout < < "\n\n ";

//定义字符串vector
string st[]={ "enough ", "is ", "enough ", "good ", "is ", "aaa ", "is "}；
vector <string> svec(st,st+7);
vector <string> vec_result(svec.size());
vector <string> ::iterator svec_iter;

//排序
//使用unique_copy()
//作用于unique相同

svec_iter = unique_copy(svec.begin(), svec.end()
unique_copy(svec.begin(),svec.end(), vec.result.begin());

//结果 aaa enough good is
for_each(vec_result.begin(),vec_iter,pfs);
cout < < "\n\n ";
}

//内置数组不支持unique（）
//数组最好是使用unique_copy();

[解决办法]
//a.cpp
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <fstream>
#include <iterator>
using namespace std;

int main()
{
ifstream Source( "a.txt ");
ofstream Dest( "b.txt ");
vector <string> DataVec;
copy(istream_iterator <string> (Source),istream_iterator <string> (),back_inserter(DataVec));
sort(DataVec.begin(),DataVec.end());
unique_copy(DataVec.begin(),DataVec.end(),ostream_iterator <string> (Dest, "\n "));
return 0;
}

G:\test> type a.txt
abc
dfe
asdsdf
Abc
er4
abc
dfe
67y

G:\test> a

G:\test> type b.txt
67y
Abc
abc
asdsdf
dfe
er4

热点排行

C++

给一个在txt文件里删除重复单词然后排序的比较好的算法，多谢