求一个高效率的算法
现有表A(ID varchar(8),DT datetime,P float),primary key(ID,DT) 表记录数有300万
现在求一个ID每一年(表中有记录的年份,如ID=50100100 表A中有记录的年份从1951-2008年)连续3天最大P值和(P为null不参与计算).
附A表一些示例记录
50100100 1951-1-1 2
50100100 1951-1-2 3.5
50100100 1951-1-3 null
.
.
.
50100100 1952-1-1 5
50100100 1952-1-2 7
50100100 1952-1-3 3.3
.
.
.
62012560 .............
高手看看有什么高效的算法!
输出应该是
年份 P和 开始日期
1951
1952
.
.
.
2008
[解决办法]
----生产年份临时表select 1951 + number as dd into #1 from master.dbo.spt_values where type = 'p' and number <2008 - 1950--------查询select dd,max(p) from ( select * from #1 a join 表A b on year(b.dt) = a.dd where exists (select 1 from 表A where dt = b.dt - 1 ) and exists (select 1 from 表A where dt = b.dt - 2 ) and exists (select 1 from 表A where dt = b.dt + 1 ) and exists (select 1 from 表A where dt = b.dt + 2 )) Tgroup by dd
[解决办法]
顶。。
[解决办法]
楼上高手都帮顶了..小菜也顶!
[解决办法]
create table tbA(id varchar(8),dt datetime,p float)insert into tbA select '50100100','1951-1-1',2 union allselect '50100100','1951-1-2',3.5 union allselect '50100100','1951-1-3',4.2 union allselect '50100100','1951-1-4',null union allselect '50100100','1951-1-6',7.2 union allselect '50100100','1951-1-7',null union allselect '50100100','1951-1-8',10.5 union allselect '50100100','1952-1-2',5 union allselect '50100100','1952-1-3',7 union allselect '50100100','1952-1-4',3.3 union allselect '50100100','1952-1-5',6.5 union allselect '50100100','1952-1-6',1.2goselect datepart(yy,a.dt) as [year],max(a.p+b.p+c.p) as sumP from tbA a inner join tbA b on a.id=b.id inner join tbA c on b.id=c.id where datediff(dd,a.dt,b.dt)=1 and datediff(dd,b.dt,c.dt)=1 and datepart(yy,a.dt)=datepart(yy,b.dt) and datepart(yy,b.dt)=datepart(yy,c.dt) and not a.p is null and not b.p is null and not c.p is null group by datepart(yy,a.dt)godrop table tbA/*year sumP----------- ----------------------1951 9.71952 16.8*/
------解决方案--------------------
ID是聚集类型索引
既然是一个ID, 就先按ID取出1951到现在2008的所有数据, 只不过365*(2008-1951)=20805条数据
而且默认排序肯定按日期了, 所以再用只读向前的游标统计是比较高效的
游标内的算法 可以采用类似 冒泡排序的方法, 但比冒泡简单, 因为只一次数据遍历就行了, 如
变量1记录从1累加三天的数据, 之后清零后 再累加三天
变量2记录从2累加三天的数据, 之后清零后 再累加三天
变量3记录从3累加三天的数据, 之后清零后 再累加三天
变量11是 变量1的最大值, 用于与变量1比较, 如果小于变量1, 就取变量1的值
变量22是 变量2的最大值, 用于与变量2比较, 如果小于变量2, 就取变量2的值
变量33是 变量3的最大值, 用于与变量3比较, 如果小于变量3, 就取变量3的值
遍历到当年年末 变量11, 变量22, 变量33比较再取最大值, 就知道结果了
当然 三个变更要动态维护只有三条记录的表变量, 用于记录是哪三条记录ID, DT
最后 ID, DT都知道了, 数据自然也就出来了
按此方法一个循环就出来了, 变量计算耗时可以忽略不计, 真正时耗在于检索, 恰恰相当于查询一个有2万条记录的表所需时间
[解决办法]
对了,还有ID和日期:
create table tbA(id varchar(8),dt datetime,p float)insert into tbA select '50100100','1951-1-1',2 union allselect '50100100','1951-1-2',3.5 union allselect '50100100','1951-1-3',4.2 union allselect '50100100','1951-1-4',null union allselect '50100100','1951-1-6',7.2 union allselect '50100100','1951-1-7',null union allselect '50100100','1951-1-8',10.5 union allselect '50100100','1951-1-15',10.5 union allselect '50100100','1951-1-16',10 union allselect '50100100','1951-1-17',8.5 union allselect '50100100','1952-1-2',5 union allselect '50100100','1952-1-3',7 union allselect '50100100','1952-1-4',3.3 union allselect '50100100','1952-1-5',6.5 union allselect '50100100','1952-1-6',1.2 union allselect '50100101','1952-1-3',6.5 union allselect '50100101','1952-1-4',3.8 union allselect '50100101','1952-1-5',16 union allselect '50100101','1952-1-6',7goselect a.id,a.dt,a.p+b.p+c.p as sumP into # from tbA a inner join tbA b on a.id=b.id inner join tbA c on b.id=c.id where datediff(dd,a.dt,b.dt)=1 and datediff(dd,b.dt,c.dt)=1 and datepart(yy,a.dt)=datepart(yy,b.dt) and datepart(yy,b.dt)=datepart(yy,c.dt) and not a.p is null and not b.p is null and not c.p is nullselect * from # a where not exists(select 1 from # where id=a.id and datepart(yy,dt)=datepart(yy,a.dt) and sump>a.sump)godrop table tbA,#/*create table tbA(id varchar(8),dt datetime,p float)insert into tbA select '50100100','1951-1-1',2 union allselect '50100100','1951-1-2',3.5 union allselect '50100100','1951-1-3',4.2 union allselect '50100100','1951-1-4',null union allselect '50100100','1951-1-6',7.2 union allselect '50100100','1951-1-7',null union allselect '50100100','1951-1-8',10.5 union allselect '50100100','1951-1-15',10.5 union allselect '50100100','1951-1-16',10 union allselect '50100100','1951-1-17',8.5 union allselect '50100100','1952-1-2',5 union allselect '50100100','1952-1-3',7 union allselect '50100100','1952-1-4',3.3 union allselect '50100100','1952-1-5',6.5 union allselect '50100100','1952-1-6',1.2 union allselect '50100101','1952-1-3',6.5 union allselect '50100101','1952-1-4',3.8 union allselect '50100101','1952-1-5',16 union allselect '50100101','1952-1-6',7goselect a.id,a.dt,a.p+b.p+c.p as sumP into # from tbA a inner join tbA b on a.id=b.id inner join tbA c on b.id=c.id where datediff(dd,a.dt,b.dt)=1 and datediff(dd,b.dt,c.dt)=1 and datepart(yy,a.dt)=datepart(yy,b.dt) and datepart(yy,b.dt)=datepart(yy,c.dt) and not a.p is null and not b.p is null and not c.p is nullselect * from # a where not exists(select 1 from # where id=a.id and datepart(yy,dt)=datepart(yy,a.dt) and sump>a.sump)godrop table tbA,#/*id dt sumP-------- ----------------------- ----------------------50100100 1951-01-15 00:00:00.000 2950100100 1952-01-03 00:00:00.000 16.850100101 1952-01-04 00:00:00.000 26.8*/
[解决办法]
有相同时,取哪个?
[解决办法]
----------------------------------- Author: liangCK 小梁--------------------------------- --> 生成测试数据: #TIF OBJECT_ID('tempdb.dbo.#T') IS NOT NULL DROP TABLE #TCREATE TABLE #T (ID VARCHAR(8),DT DATETIME,P DECIMAL(18,2),PRIMARY KEY(ID,DT))INSERT INTO #TSELECT '50100100','1951-1-1',2 UNION ALLSELECT '50100100','1951-1-2',3.5 UNION ALLSELECT '50100100','1951-1-3',null UNION ALLSELECT '50100100','1951-1-4',2 UNION ALLSELECT '50100100','1951-1-5',5 UNION ALLSELECT '50100100','1951-1-6',5 UNION ALLSELECT '50100100','1951-1-8',5 UNION ALLSELECT '50100100','1951-1-9',2 UNION ALLSELECT '50100100','1951-1-10',5 UNION ALLSELECT '50100100','1952-1-1',5 UNION ALLSELECT '50100100','1952-1-2',7 UNION ALLSELECT '50100100','1952-1-3',3.3--SQL查询如下:DECLARE @min_dt DATETIME,@max_dt DATETIME;SELECT @min_dt=MIN(DT),@max_dt=MAX(dt)FROM #T;CREATE TABLE #temp_dt(DT DATETIME);INSERT #temp_dt(DT) SELECT DATEADD(year,number,DATEADD(year,0,@min_dt)) FROM master.dbo.spt_values WHERE type='p' AND number < DATEDIFF(year,@min_dt,@max_dt)+1; SELECT YEAR(A.DT) AS 年份, B.DT AS [开始时间], B.total AS P和FROM #temp_dt AS A CROSS APPLY ( SELECT TOP(1) * FROM ( SELECT DT,total=(SELECT SUM(ISNULL(P,0)) FROM #T WHERE ID=D.ID AND DT BETWEEN D.DT AND DATEADD(day,2,D.DT)) FROM #T AS D WHERE DT >= A.DT AND DT < DATEADD(year,1,A.DT) ) AS T ORDER BY total DESC ) AS BDROP TABLE #T,#temp_dt;/*年份 开始时间 P和----------- ----------------------- ---------------------------------------1951 1951-01-04 00:00:00.000 12.001952 1952-01-01 00:00:00.000 15.30(2 行受影响)*/
[解决办法]
----------------------------------- Author: liangCK 小梁--------------------------------- --> 生成测试数据: #TIF OBJECT_ID('tempdb.dbo.#T') IS NOT NULL DROP TABLE #TCREATE TABLE #T (ID VARCHAR(8),DT DATETIME,P DECIMAL(18,2),PRIMARY KEY(ID,DT))INSERT INTO #TSELECT '50100100','1951-1-1',2 UNION ALLSELECT '50100100','1951-1-2',3.5 UNION ALLSELECT '50100100','1951-1-3',null UNION ALLSELECT '50100100','1951-1-4',2 UNION ALLSELECT '50100100','1951-1-5',5 UNION ALLSELECT '50100100','1951-1-6',5 UNION ALLSELECT '50100100','1951-1-8',5 UNION ALLSELECT '50100100','1951-1-9',2 UNION ALLSELECT '50100100','1951-1-10',5 UNION ALLSELECT '50100100','1952-1-1',5 UNION ALLSELECT '50100100','1952-1-2',7 UNION ALLSELECT '50100100','1952-1-3',3.3--SQL查询如下:DECLARE @min_dt DATETIME,@max_dt DATETIME;SELECT @min_dt=MIN(DT),@max_dt=MAX(dt)FROM #T;CREATE TABLE #temp_dt(DT DATETIME);INSERT #temp_dt(DT) SELECT DATEADD(year,number,DATEADD(year,0,@min_dt)) FROM master.dbo.spt_values WHERE type='p' AND number < DATEDIFF(year,@min_dt,@max_dt)+1; SELECT YEAR(A.DT) AS 年份, B.DT AS [开始时间], B.total AS P和FROM #temp_dt AS A CROSS APPLY ( SELECT TOP(1) WITH TIES * FROM ( SELECT DT,total=(SELECT SUM(ISNULL(P,0)) FROM #T WHERE ID=D.ID AND DT BETWEEN D.DT AND DATEADD(day,2,D.DT)) FROM #T AS D WHERE DT >= A.DT AND DT < DATEADD(year,1,A.DT) ) AS T ORDER BY total DESC ) AS BDROP TABLE #T,#temp_dt;/*年份 开始时间 P和----------- ----------------------- ---------------------------------------1951 1951-01-04 00:00:00.000 12.001951 1951-01-08 00:00:00.000 12.001952 1952-01-01 00:00:00.000 15.30(3 行受影响)*/
[解决办法]
没看懂明确和详细的意图。。。别怪偶理解能力差
[解决办法]
友情up!!
[解决办法]
关注这个问题~~~
[解决办法]
学习学习!
[解决办法]
I am engage in this aspect.
[解决办法]
[解决办法]
关注!
[解决办法]
关注中...
[解决办法]
不是很理解这条语句
[解决办法]
lknafsdlkwejhnflkjsdlf
[解决办法]
有点复杂!
[解决办法]
确实挺复杂的。。。
[解决办法]
这题关键就是求出连续三天的P值,请大家看看我这个函数怎么样!--创建一个函数create function sumP(@date varchar(10),@diffdate int)returns floatasbegin declare @sums float select @sums=sum(p) from testtable where filedate between @date and dateadd(day,@diffdate,@date) return @sumsend--调用方式select dbo.sump(filedate,2) from testtable
[解决办法]
也要这样调用一次才行
select dbo.sump(filedate,-2) from testtable
[解决办法]
我觉得用外连接可能会更快,也没那么复杂,首先取出该 ID 相关的数据,放到一个临时表里,前面已经有了,就不多说,假设该临时表为 tbA,那么可以
select * from tbA A left join tbA B On A.id = B.id And Year(A.dt) = Year(B.dt) And Day(B.Dt) = Day(A.Dt) + 1 left join tbA C On A.id = C.id And Year(A.dt) = Year(C.dt) And Day(C.dt) = Day(A.Dt) + 2
[解决办法]
37 楼用函数处理应该也挺快吧,有没有人具体做下测试。
[解决办法]
帮顶。
[解决办法]
delete test
insert into test select '50100100','2009-1-1',1
union all select '50100100','2009-1-2',3
--union all select '50100100','2009-1-3',2
union all select '50100100','2009-1-4',5
union all select '50100100','2009-1-5',4
--union all select '50100100','2009-1-6',1
union all select '50100100','2009-1-7',2
union all select '50100100','2009-1-8',5
--union all select '50100100','2009-12-30',1
union all select '50100100','2009-12-31',200
union all select '50100100','2010-1-1',100
--要求必须有三天连续记录
select y,p,(select top 1 dt from test where id='50100100' and (select count(id) from test as A5 where id='50100100' and dt between dateadd(day,1,test.dt) and dateadd(day,2,test.dt))=2 and (month(dt)<>12 or day(dt)>30) and (select sum(p) from test as A6 where id='50100100' and dt between test.dt and dateadd(day,2,test.dt) and p is not null)=A4.p) as dt from
(select y,MAX(p) as p from
(select year(dt) as y,(select sum(p) from test where id='50100100' and dt between A2.dt and dateadd(day,2,A2.dt) and p is not null) as p from
(select dt from test where id='50100100' and (select count(id) from test as A1 where id='50100100' and dt between dateadd(day,1,test.dt) and dateadd(day,2,test.dt))=2 and (month(dt)<>12 or day(dt)>30)) as A2
) as A3 group by y
)as A4
--不要求必须有三天连续记录
select y,p,(select top 1 dt from test where id='50100100' and (select sum(p) from test as A6 where id='50100100' and dt between test.dt and dateadd(day,2,test.dt) and p is not null and year(dt)=year(test.dt))=A4.p) as dt from
(select y,MAX(p) as p from
(select year(dt) as y,(select sum(p) from test where id='50100100' and dt between A2.dt and dateadd(day,2,A2.dt) and p is not null and year(dt)=year(A2.dt)) as p from test as A2) as A3 group by y
)as A4
[解决办法]
我试了下,要高效率的话,不要加太多的WHERE条件,一般来说都是用变量的做法,,但这个结构里面,我个人觉得还得用游标...
我说出我的做法,,肯定符合你想要的结果,看下我造的数据(表A)是不是你想要的数据..
ID DT P
501001005/1/19512.3
501001006/1/19512.5
501001007/1/1951NULL
501001001/2/19513.5
501001002/2/19512.5
501001003/2/19512.8
501001004/3/20082.6
501001005/3/20085.6
501001006/3/20086.2
501001014/6/1952NULL
501001015/6/19525.6
501001016/6/19523.5
DECLARE @ID VARCHAR(8), @DT DATETIME, @P FLOATDECLARE @TIMES INT,@PSUM FLOAT,@COUNT INT,@DT_FRIST VARCHAR(12)--DECLARE @DT_NEXT VARCHAR(12)SET @PSUM=0DECLARE @SQL VARCHAR(999)SET @SQL=''SET @COUNT=(SELECT COUNT(ID) FROM A)CREATE TABLE #T (ID VARCHAR(8),DT DATETIME,P FLOAT,PRIMARY KEY(ID,DT))DECLARE SUM_CUSOR CURSOR FOR SELECT ID,CONVERT(VARCHAR,DT,111) AS DT,CONVERT(VARCHAR,ISNULL(P,0)) AS P FROM A ORDER BY ID,DT FOR READ ONLYSET @TIMES=1OPEN SUM_CUSOR WHILE @COUNT <> 0BEGIN FETCH NEXT FROM SUM_CUSOR INTO @ID,@DT,@P SET @PSUM=@PSUM+@P IF(@TIMES=1) BEGIN SET @DT_FRIST=CONVERT(VARCHAR,@DT,111) END SET @TIMES=@TIMES+1 IF(@TIMES=4) BEGIN SET @SQL=@SQL+' INSERT INTO #T VALUES ('''+CONVERT(VARCHAR,@ID)+''','''+@DT_FRIST+''','+CONVERT(VARCHAR,@PSUM)+')' SET @TIMES=1 SET @PSUM=0 END SET @COUNT=@COUNT-1ENDCLOSE SUM_CUSORDEALLOCATE SUM_CUSOR--PRINT @SQLEXEC (@SQL)SELECT T1.ID,CONVERT(VARCHAR,T2.DT,111)AS DT,CONVERT(VARCHAR,T1.P) AS P FROM (SELECT ID,MAX(P)AS P FROM #T GROUP BY ID,YEAR(DT)) AS T1 LEFT OUTER JOIN #T AS T2 ON T2.ID=T1.ID AND T2.P=T1.PORDER BY T1.IDDROP TABLE #T