大数据量下的查找最新的几条数据的通用方法
原文地址
http://blog.csdn.net/xuexiaodong2009/article/details/7938679
由于项目需要,需要获取一组数据的的最新一条数据,表结构如下:
[sql] view plaincopy
CREATE TABLE [dbo].[WUSU_SUOLITest_Table](
[ID] [bigint] IDENTITY(1,1) NOT NULL,
[ReceiveTime] [datetime] NULL,
[GroupID] [bigint] NOT NULL,
[DataValue] [float] NULL,
[SensorCode] [char](10) NOT NULL,
)
在这个表上只有两种操作,插入和查询,没有删除和更新。而且同一种设备,随着id列的变大,ReceiveTime也随着变大。
每一个不同的SensorCode代表了一个设备,目前有50个设备,每30秒上报一次数据,ReceiveTime代表上报数据的时间,现在需要获取每一个设备最新一次的数据,
开始我使用如下的查询语句:
[sql] view plaincopy
select * from WUSU_SUOLITest_Table where id in (select max(id) from WUSU_SUOLITest_Table group by SensorCode )
在数据量比较小时,是没有问题的,但数据量特别大时,这种方式,目前一天的数据就超过了14万,有很大的延时,即使在id上有聚集索引,SensorCode上使用了分区,依然没有多大作用。时间主要花费到了group by上。
实在想不多到什么好的而解决方法,就只能在此表上创建一个触发器,每次插入数据时就把最新的数据放在了一个临时表,又有临时表最多只有50条数据,速度当然就很好了。
[sql] view plaincopy
create TRIGGER [dbo].[UpdateWUSU_LastOriginalDataSUOLI]
ON [dbo].[WUSU_SUOLITest_Table]
AFTER INSERT
AS
BEGIN
declare @SensorCode char(10), @DataValue float ,@ReceiveTime datetime ,@GroupID bigint
select @SensorCode=SensorCode,@DataValue=DataValue,@ReceiveTime=ReceiveTime,@GroupID=GroupID from inserted
update WUSU_LastOriginalData set DataValue=@DataValue,ReceiveTime=@ReceiveTime,GroupID=@GroupID
where SensorCode=@SensorCode
END
当然这是为了获取各种设备最新的一条数据,如果要获取最新的两条数据,最多也就是100条记录,一次类推,只需要把上边的触发器修改一下就可以。
但还有没有更好的方式,在不修改表结构的情况下?目前还没有想到。
[解决办法]
TRY:
select * from WUSU_SUOLITest_Table A
where NOT EXISTS (
SEELCT 1 FROM WUSU_SUOLITest_Table B
WHERE B.SensorCode =A.SensorCode
AND B.ID > A.ID)
select * from WUSU_SUOLITest_Table A
where NOT EXISTS (
SELECT 1 FROM WUSU_SUOLITest_Table B
WHERE B.SensorCode =A.SensorCode
AND B.ID > A.ID)
select * from WUSU_SUOLITest_Table as t
where id = (select max(id) from WUSU_SUOLITest_Table where SensorCode=t.SensorCode )
--1、用于查询重复处理记录(如果列没有大小关系时2000用生成自增列和临时表处理,SQL2005用row_number函数处理)
--> --> (Roy)生成測試數據
if not object_id('Tempdb..#T') is null
drop table #T
Go
Create table #T([ID] int,[Name] nvarchar(1),[Memo] nvarchar(2))
Insert #T
select 1,N'A',N'A1' union all
select 2,N'A',N'A2' union all
select 3,N'A',N'A3' union all
select 4,N'B',N'B1' union all
select 5,N'B',N'B2'
Go
--I、Name相同ID最小的记录(推荐用1,2,3),方法3在SQl05时,效率高于1、2
方法1:
Select * from #T a where not exists(select 1 from #T where Name=a.Name and ID<a.ID)
方法2:
select a.* from #T a join (select min(ID)ID,Name from #T group by Name) b on a.Name=b.Name and a.ID=b.ID
方法3:
select * from #T a where ID=(select min(ID) from #T where Name=a.Name)
方法4:
select a.* from #T a join #T b on a.Name=b.Name and a.ID>=b.ID group by a.ID,a.Name,a.Memo having count(1)=1
方法5:
select * from #T a group by ID,Name,Memo having ID=(select min(ID)from #T where Name=a.Name)
方法6:
select * from #T a where (select count(1) from #T where Name=a.Name and ID<a.ID)=0
方法7:
select * from #T a where ID=(select top 1 ID from #T where Name=a.name order by ID)
方法8:
select * from #T a where ID!>all(select ID from #T where Name=a.Name)
方法9(注:ID为唯一时可用):
select * from #T a where ID in(select min(ID) from #T group by Name)
--SQL2005:
方法10:
select ID,Name,Memo from (select *,min(ID)over(partition by Name) as MinID from #T a)T where ID=MinID
方法11:
select ID,Name,Memo from (select *,row_number()over(partition by Name order by ID) as MinID from #T a)T where MinID=1
生成结果:
/*
ID Name Memo
----------- ---- ----
1 A A1
4 B B1
(2 行受影响)
*/
--II、Name相同ID最大的记录,与min相反:
方法1:
Select * from #T a where not exists(select 1 from #T where Name=a.Name and ID>a.ID)
方法2:
select a.* from #T a join (select max(ID)ID,Name from #T group by Name) b on a.Name=b.Name and a.ID=b.ID order by ID
方法3:
select * from #T a where ID=(select max(ID) from #T where Name=a.Name) order by ID
方法4:
select a.* from #T a join #T b on a.Name=b.Name and a.ID<=b.ID group by a.ID,a.Name,a.Memo having count(1)=1
方法5:
select * from #T a group by ID,Name,Memo having ID=(select max(ID)from #T where Name=a.Name)
方法6:
select * from #T a where (select count(1) from #T where Name=a.Name and ID>a.ID)=0
方法7:
select * from #T a where ID=(select top 1 ID from #T where Name=a.name order by ID desc)
方法8:
select * from #T a where ID!<all(select ID from #T where Name=a.Name)
方法9(注:ID为唯一时可用):
select * from #T a where ID in(select max(ID) from #T group by Name)
--SQL2005:
方法10:
select ID,Name,Memo from (select *,max(ID)over(partition by Name) as MinID from #T a)T where ID=MinID
方法11:
select ID,Name,Memo from (select *,row_number()over(partition by Name order by ID desc) as MinID from #T a)T where MinID=1
生成结果2:
/*
ID Name Memo
----------- ---- ----
3 A A3
5 B B2
(2 行受影响)
*/
--在ID上加聚集索引
create clustered index CIX_WUSU_SUOLITest_Table_ID on [dbo].[WUSU_SUOLITest_Table](ID)
--在Sensor上加非聚集索引
create index IX_WUSU_SUOLITest_Table_SensorCode on [dbo].[WUSU_SUOLITest_Table](SensorCode)
--楼主自己的SQL不用改
select * from WUSU_SUOLITest_Table where id in (select max(id) from WUSU_SUOLITest_Table group by SensorCode )
--构建测试数据 生成5000W条数据, 分成50组
if OBJECT_ID('tb') is not null drop table tb
create table tb(ID int identity,SensorCode smallint)
DECLARE @i AS INT, @rc AS INT,@max int, @RCOUNT int;
SET @rc = 1;
SET @max = 50000000;
set @i = ceiling(log10(@max)/LOG10(2))-1;
INSERT INTO dbo.tb SELECT 1+abs(checksum(newid()))%(50)
WHILE @rc <= @i
BEGIN
INSERT INTO dbo.tb SELECT 1+abs(checksum(newid()))%(50) FROM dbo.tb;
SET @rc = @rc +1 ;
END
select @max = @max - @@IDENTITY
INSERT INTO dbo.tb
SELECT top(@max) 1+abs(checksum(newid()))%(50) FROM dbo.tb
--创建索引
create index xxx on dbo.tb(SensorCode,ID)
GO
--查看表信息
EXEC sp_spaceused tb, true
/*
name rows reserved data index_size unused
--------- ----------- ------------------ ------------------ ------------------ ------------------
tb 50000000 1735960 KB 742120 KB 993352 KB 488 KB
*/
--打开时间统计
set statistics time on
--方案一测试
select SensorCode,MAX(ID) from tb group by SensorCode
/*
SQL Server 分析和编译时间:
CPU 时间 = 0 毫秒,占用时间 = 7 毫秒。
SensorCode
---------- -----------
1 49999999
2 49999956
3 49999991
4 49999982
5 49999976
。
。
。
47 49999844
48 49999989
49 49999996
50 49999995
(50 行受影响)
SQL Server 执行时间:
CPU 时间 = 9577 毫秒,占用时间 = 44901 毫秒。
*/
--清空缓存
DBCC DROPCLEANBUFFERS;
GO
DBCC FREEPROCCACHE;
GO
DBCC FLUSHPROCINDB(8);
GO
--方案二测试
declare @i int,@sql varchar(max)
set @i = 1
set @sql = '
select '+CAST(@i as varchar(2))+',MAX(ID) from tb where SensorCode = '+CAST(@i as varchar(2))
while @i < 50
begin
set @i = @i + 1
set @sql = @sql + '
union select '+CAST(@i as varchar(2))+',MAX(ID) from tb where SensorCode = '+CAST(@i as varchar(2))
end
--print @sql
exec(@sql)
/*
----------- -----------
1 49999999
2 49999956
3 49999991
。
。
。
46 49999998
47 49999844
48 49999989
49 49999996
50 49999995
(50 行受影响)
SQL Server 执行时间:
CPU 时间 = 16 毫秒,占用时间 = 638 毫秒。
SQL Server 执行时间:
CPU 时间 = 78 毫秒,占用时间 = 712 毫秒。
*/
--综上 5000万条数据,建立非聚集索引,方案一耗时 44秒,方案二耗时 0.7秒
create TRIGGER [dbo].[UpdateWUSU_LastOriginalDataSUOLI]
ON [dbo].[WUSU_SUOLITest_Table]
AFTER INSERT
AS
BEGIN
DECLARE @tmpTbl TABLE(SensorCode CHAR(10),DataValue float,ReceiveTime DATETIME,GroupID BIGINT)
INSERT @tmpTbl( SensorCode ,DataValue ,ReceiveTime ,GroupID )
SELECT SensorCode ,DataValue ,ReceiveTime ,GroupID FROM INSERTED
;WITH cte_tmp AS
(
SELECT SensorCode ,DataValue ,ReceiveTime ,GroupID
FROM @tmpTbl a
WHERE a.ReceiveTime=
(SELECT MAX(ReceiveTime)
FROM @tmpTbl
WHERE SensorCode=a.SensorCode
)
)
UPDATE a
SET a.DataValue=b.DataValue,
a.ReceiveTime=b.ReceiveTime,
a.GroupID=b.GroupID
FROM WUSU_LastOriginalData a
INNER JOIN cte_tmp b ON b.SensorCode=a.SensorCode
END
SELECT * from testtb t,
(SELECT (select max(id) from TestTb where SensorCode=s.SensorCode ) as maxid
from SensorCode_tb s) s where t.id=s.id