php排序1亿个QQ号码
吃饱喝足了,还发贴了。
拆开分成几千份进行排序再合并。
首先先创建一个1亿个QQ号的txt。
<?php// 创建一亿个QQ号的txt (大约需85~100秒)set_time_limit(0);$fn = 'qq.txt';$fp = fopen($fn, 'w');$st = microtime(true);$l = range(0,10000);shuffle($l);foreach ($l as $k=>$v){ $arr = range($v*10000+10000,10000*($v+1)+9999); shuffle($arr); fputs($fp,implode("\n", $arr)."\n"); unset($arr);}echo microtime(true)-$st;?>
<?php// 长度号码分类 (大约需360~400秒)set_time_limit(0);$st = microtime(true);if(!is_dir('qq_no')) mkdir('qq_no');$file = fopen('qq.txt', 'r'); $i=0;$end_s = '';while(!feof($file)){ $g = 1042*1024; fseek($file,$g*$i); $s = fread($file, $g); $end = strrpos($s, "\n"); $arr_s = $end_s.substr($s, 0, $end); $end_s = substr($s, $end); $arr = explode("\n", $arr_s); foreach ($arr as $k=>$v) { if($v!='') { $tag = "$v[0]$v[1]$v[2]"; $text_arr[strlen($v)][$tag][] = $v; } } foreach ($text_arr as $k=>$v) { $n_dir = 'qq_no/'.$k; if (!is_dir($n_dir)) mkdir($n_dir); foreach ($v as $tag=>$val) { $n_tf = fopen($n_dir.'/'.$tag.'.txt', 'a+'); fputs($n_tf,implode("\n",$val)."\n"); } } unset($text_arr); ++$i;}echo microtime(true)-$st;?>
<?php// 排序完成拉 (800~920秒)set_time_limit(0);$st = microtime(true);$qq_done = fopen('qq_done.txt', 'a+');$root = 'qq_no';$dir_array = scandir($root);foreach ($dir_array as $key=>$val){ if ($val != '.' && $val != '..') $dirs[$val] = scandir($root.'/'.$val);}foreach ($dirs as $key=>$val){ foreach ($val as $v) { if ($v != '.' && $v != '..') { $file = $root. '/' . $key . '/'. $v; $c = file_get_contents($file); $arr = explode("\n", $c); sort($arr); fputs($qq_done, implode("\n",$arr)); unlink($file); } } rmdir($root. '/' . $key);}rmdir($root);echo microtime(true)-$st;?>
#include <stdio.h>#define BITSPERWORD 32#define SHIFT 5#define MASK 0x1F#define N 100000000int a[1 + N/BITSPERWORD];void set(int i){ a[i>>SHIFT] |= (1<<(i & MASK)); //i&MASK相当于1&(32-1),即1%32}void clr(int i){ a[i>>SHIFT] &= ~(1<<(i & MASK));}int test(int i){ return a[i>>SHIFT] & (1<<(i & MASK));}int main(){ int i; //初始化 for(i = 0; i < N; i++) clr(i); //读取文件,置位 while(scanf("%d", &i) != EOF) set(i); for(i = 0; i < N; i++) if(test(i)) printf("%d\n", i); return 0;}
[解决办法]
既然有现成的数据文件,就没有必要去构造插入串了
set_time_limit(0);$sql =<<< SQLCREATE TABLE IF NOT EXISTS qq1 ( `qq` int(10) NOT NULL, KEY `qq` (`qq`)) ENGINE=MyISAM DEFAULT CHARSET=utf8;SQL;mysql_connect('localhost', 'root', '');mysql_select_db('test');mysql_query($sql);$filename = str_replace('\\', '/', realpath('qq.txt'));$sql =<<< SQLLOAD DATA INFILE '$filename' INTO TABLE qq1SQL;check_speed(1);mysql_query($sql) or print(mysql_error());;check_speed();
[解决办法]
ok, 又试了全是11位的QQ号, 实际上100020001个,
sort一下, 22分钟
fputs($fp,implode(PHP_EOL, $arr).PHP_EOL);