几万行TXT文本记录如何快速除重并乱序排列呢?
本帖最后由 camry82 于 2013-07-20 09:38:15 编辑 几万行TXT文本记录如何快速除重并乱序排列呢?
我现在用的方法:
Private Sub Command1_Click()
Dim strtmp
Dim i
Open App.Path & "" & Text1.Text For Binary As #1
strtmp = Split(Input(LOF(1), 1), vbCrLf)
For i = 0 To UBound(strtmp) - 1
List1.AddItem strtmp(i)
Next
Close #1
List1.ListIndex = 0
List2.Clear
Dim e As Integer
Dim t As Integer
For e = 0 To List1.ListCount - 1
Randomize
t = Int(Rnd * List1.ListCount)
List2.AddItem List1.List(t)
List1.RemoveItem (t)
If List1.ListCount = 0 Then Exit Sub
Next e
End Sub
Private Sub Command2_Click()
Dim i As Integer, j As Integer
Dim n As Integer
With List2
For i = 0 To .ListCount - 1
For j = .ListCount - 1 To i + 1 Step -1
If .List(j) = .List(i) Then
List2.RemoveItem j
End If
Next j
Next i
End With
List2.ListIndex = 0
For n = 0 To List2.ListCount - 1
If List2.List(n) = "" Then
' List2.RemoveItem n
End If
Next n
Open App.Path & "" & Text1.Text For Output As #1
For i = 0 To List2.ListCount
Print #1, List2.List(i)
Next
Close #1
Text2.Text = List2.ListCount
End Sub
用这方法很慢,会卡死,有什么方法可以快速处理吗?请给下代码,谢谢了呀。
[解决办法]
10W数据,也就2秒左右:
Option Explicit
' 需要引用:Microsoft Scripting Runtime
Private Sub Command1_Click()
Dim objDict As New Dictionary
Dim arrBuf() As String
Dim arrOut() As String
Dim arrIndex() As Long
Dim i&, p&, m&, strTemp$
Open App.Path & "" & Text1.Text For Binary As #1
arrBuf = Split(Input(LOF(1), 1), vbCrLf)
'筛选、除重
p = -1
m = UBound(arrBuf)
ReDim arrOut(m)
For i = 0 To m
strTemp = arrBuf(i)
If (Len(strTemp)) Then
If (Not objDict.Exists(strTemp)) Then
p = p + 1
objDict.Add strTemp, p
arrOut(p) = strTemp
End If
End If
Next
Close
objDict.RemoveAll
Set objDict = Nothing
If (p = -1) Then
MsgBox "没有内容。", vbExclamation
Exit Sub
End If
'乱序输出
ReDim arrIndex(p)
For i = 0 To p
arrIndex(i) = i
Next
Randomize
m = p
For i = 0 To p
p = Rnd() * m
List1.AddItem arrOut(arrIndex(p))
arrIndex(p) = arrIndex(m)
m = m - 1
Next
End Sub
End Sub
Private Sub Command2_Click()
Dim test As String
Dim i As Long
Dim savetime As Long
savetime = GetTickCount
Open App.Path & "" & Text1.Text For Output As #1
For i = 1 To 100000
test = String(8 - Len(Hex(i)), "0") & Hex(i)
Print #1, test
Next i
Close #1
MsgBox "耗时:" & GetTickCount - savetime & " 毫秒"
End Sub