大量数据每日汇总方案。
类似于网站统计。
一张表记录 网站Id 访客Ip 访客浏览器 访问页面
每天上千万记录如何汇总每天每个网站的独立Ip和Pv
如果每次都用Count查询太慢了,莫非每天12点以后生成昨天的数据表?
sql
[解决办法]
你的方法应该也可以,处理好索引.
每天的数据都在这张表吗?如果是,可以按日期做分区.
不知道有没有别的办法,等高手了.
[解决办法]
group by。再设一属性,记录同IP访问次数,即只在次数属性上添加
[解决办法]
group by。再设一属性,记录同IP访问次数,即只在次数属性上添加,这是我的想法,不知道能不能实现。我是刚学点皮毛
[解决办法]
1、做表分区,因为你统计的是每天的信息,所以,缩小查询范围是一个很好的办法。
2、建立索引,索引怎么建就看的你需求了。
3、按你说的做,生成一张表,每天汇总统计信息。
[解决办法]
1. 建立合理索引,例如group by的字段,主键,where条件,外键。。。
2. 建立分区方案,
-- 三步曲
-- 1 Range partition table TransactionHistory 分区功能划分
CREATE PARTITION FUNCTION [TransactionRangePF1] (datetime)
AS RANGE RIGHT FOR VALUES ('10/01/2003', '11/01/2003', '12/01/2003',
'1/01/2004', '2/01/2004', '3/01/2004', '4/01/2004',
'5/01/2004', '6/01/2004', '7/01/2004', '8/01/2004');
GO
-- 2 分区架构划分
CREATE PARTITION SCHEME [TransactionsPS1]
AS PARTITION [TransactionRangePF1]
TO ([PRIMARY], [PRIMARY], [PRIMARY]
, [PRIMARY], [PRIMARY], [PRIMARY]
, [PRIMARY], [PRIMARY], [PRIMARY]
, [PRIMARY], [PRIMARY], [PRIMARY]
, [PRIMARY]);
GO
-- 3 把表创建在分区架构上
CREATE TABLE [Production].[TransactionHistory](
[TransactionID] [int] IDENTITY (1, 1) NOT NULL,
[ProductID] [int] NOT NULL,
[ReferenceOrderID] [int] NOT NULL,
[ReferenceOrderLineID] [int] NOT NULL CONSTRAINT [DF_TransactionHistory_ReferenceOrderLineID] DEFAULT (0),
[TransactionDate] [datetime] NOT NULL CONSTRAINT [DF_TransactionHistory_TransactionDate] DEFAULT (GETDATE()),
[TransactionType] [nchar](1) NOT NULL,
[Quantity] [int] NOT NULL,
[ActualCost] [money] NOT NULL,
[ModifiedDate] [datetime] NOT NULL CONSTRAINT [DF_TransactionHistory_ModifiedDate] DEFAULT (GETDATE()),
CONSTRAINT [CK_TransactionHistory_TransactionType] CHECK (UPPER([TransactionType]) IN ('W', 'S', 'P'))
) ON [TransactionsPS1] (TransactionDate);
GO
-- Range partition table TransactionHistoryArchive
CREATE PARTITION FUNCTION [TransactionArchivePF2] (datetime)
AS RANGE RIGHT FOR VALUES ('9/01/2003');
GO
CREATE PARTITION SCHEME [TransactionArchivePS2]
AS PARTITION [TransactionArchivePF2]
TO ([PRIMARY], [PRIMARY]);
GO
-- 把表创建在分区架构TransactionArchivePS2 上
CREATE TABLE [Production].[TransactionHistoryArchive](
[TransactionID] [int] NOT NULL,
[ProductID] [int] NOT NULL,
[ReferenceOrderID] [int] NOT NULL,
[ReferenceOrderLineID] [int] NOT NULL CONSTRAINT [DF_TransactionHistoryArchive_ReferenceOrderLineID] DEFAULT (0),
[TransactionDate] [datetime] NOT NULL CONSTRAINT [DF_TransactionHistoryArchive_TransactionDate] DEFAULT (GETDATE()),
[TransactionType] [nchar](1) NOT NULL,
[Quantity] [int] NOT NULL,
[ActualCost] [money] NOT NULL,
[ModifiedDate] [datetime] NOT NULL CONSTRAINT [DF_TransactionHistoryArchive_ModifiedDate] DEFAULT (GETDATE()),
CONSTRAINT [CK_TransactionHistoryArchive_TransactionType] CHECK (UPPER([TransactionType]) IN ('W', 'S', 'P'))
) ON [TransactionArchivePS2] (TransactionDate);
GO
--以下为插入数据测试
-- Bulk Insert the TransactionHistory and TransactionHistoryArchive data.
DECLARE
@retcode INT
,@data_path NVARCHAR(256)
SELECT @data_path = SUBSTRING([physical_name], 1, CHARINDEX(N'master.mdf', LOWER([physical_name])) - 1)
FROM [master].[sys].[master_files]
WHERE [database_id] = 1
AND [file_id] = 1;
SET @data_path = LEFT(@data_path, PATINDEX('%\MSSQL%', @data_path)) + '90\Tools\Samples\AdventureWorks OLTP\'
EXECUTE (N'BULK INSERT [Production].[TransactionHistory] FROM ''' + @data_path + N'TransactionHistory.csv''
WITH (
CODEPAGE=''ACP'',
DATAFILETYPE = ''char'',
FIELDTERMINATOR= ''\t'',
ROWTERMINATOR = ''\n'' ,
KEEPIDENTITY,
TABLOCK
)');
EXECUTE (N'BULK INSERT [Production].[TransactionHistoryArchive] FROM ''' + @data_path + N'TransactionHistoryArchive.csv''
WITH (
CODEPAGE=''ACP'',
DATAFILETYPE = ''char'',
FIELDTERMINATOR= ''\t'',
ROWTERMINATOR = ''\n'' ,
KEEPIDENTITY,
TABLOCK
)');
GO
--CREATE INDEX [IX_TransactionHistory_TransactionDate] ON [Production].[TransactionHistory]([TransactionDate]);
GO
--CREATE INDEX [IX_TransactionHistoryArchive_TransactionDate] ON [Production].[TransactionHistoryArchive]([TransactionDate]);
GO
ALTER TABLE [Production].[TransactionHistory] WITH CHECK ADD
CONSTRAINT [PK_TransactionHistory_TransactionID] PRIMARY KEY CLUSTERED
(
[TransactionDate],
[TransactionID]
) ON [TransactionsPS1] (TransactionDate);
GO
ALTER TABLE [Production].[TransactionHistoryArchive] WITH CHECK ADD
CONSTRAINT [PK_TransactionHistoryArchive_TransactionID] PRIMARY KEY CLUSTERED
(
[TransactionDate],
[TransactionID]
) ON [TransactionArchivePS2] (TransactionDate);
GO
CREATE INDEX [IX_TransactionHistory_ProductID] ON [Production].[TransactionHistory]([ProductID]) ON [TransactionsPS1] (TransactionDate);
CREATE INDEX [IX_TransactionHistory_ReferenceOrderID_ReferenceOrderLineID] ON [Production].[TransactionHistory]([ReferenceOrderID], [ReferenceOrderLineID]) ON [TransactionsPS1] (TransactionDate);
GO
CREATE INDEX [IX_TransactionHistoryArchive_ProductID] ON [Production].[TransactionHistoryArchive]([ProductID]) ON [TransactionArchivePS2] (TransactionDate);
CREATE INDEX [IX_TransactionHistoryArchive_ReferenceOrderID_ReferenceOrderLineID] ON [Production].[TransactionHistoryArchive]([ReferenceOrderID], [ReferenceOrderLineID]) ON [TransactionArchivePS2] (TransactionDate);
GO
ALTER TABLE [Production].[TransactionHistory] ADD
CONSTRAINT [FK_TransactionHistory_Product_ProductID] FOREIGN KEY
(
[ProductID]
) REFERENCES [Production].[Product](
[ProductID]
);
GO
--查询系统表,得出表在各个分区的数据分布
SELECT OBJECT_NAME([object_id]), * FROM [sys].[partitions]
WHERE [object_id] = OBJECT_ID('[DBO].[TA]')
ORDER BY [partition_number], [index_id];
GO
SELECT OBJECT_NAME([object_id]), * FROM [sys].[partitions]
WHERE [object_id] = OBJECT_ID('[dbo].[TH]')
ORDER BY [partition_number], [index_id];
GO