sql - 以 SQL Server 最快的方式插入/更新 5000 万条记录

标签 sql sql-server

我正在尝试使用从临时表到基表的存储过程将 5000 万插入和更新到表中。但该过程花费的时间比预期要长得多,大约 5-6 小时。

我尝试实现一些索引来进行性能调整,但它们都没有在第一次工作。

有关如何处理和执行大数据插入的任何建议。

我们的 SSIS 程序将来自 CSV 转储的数据转储到临时表中,然后我们的存储过程最后运行,下面的插入会导致时间延迟,

代码片段 - 暂存到基表插入

    INSERT INTO [dbo].[Entries] (
        [TransactionID]
        ,[TimekID]
        ,[ResourceID]
        ,[INVOICE_DATE]
        ,[INVOICE_NUMBER]
        ,[CLIENT_ID]
        ,[LAW_FIRM_MATTER_ID]
        ,[INVOICE_TOTAL]
        ,[BILLING_START_DATE]
        ,[BILLING_END_DATE]
        ,[INVOICE_DESCRIPTION]
        ,[ITEM_NUMBER]
        ,[EXP_FEE_INV_ADJ_TYPE]
        ,[ITEM_NUMBER_OF_UNITS]
        ,[ITEM_ADJUSTMENT_AMOUNT]
        ,[ITEM_TOTAL]
        ,[ITEM_DATE]
        ,[ITEM_TimeK_CODE]
        ,[ITEM_EXPENSE_CODE]
        ,[ITEM_ACTIVITY_CODE]
        ,[TIMEKEEPER_ID]
        ,[ITEM_DESCRIPTION]
        ,[LAW_FIRM_ID]
        ,[ITEM_UNIT_COST]
        ,[TIMEKEEPER_NAME]
        ,[TIMEKEEPER_CLASSIFICATION]
        ,[CLIENT_MATTER_ID]
        ,[TIMEKEEPER_EMAIL]
        ,[CreatedBy]
        ,[CreatedOn]
        ,[ModifiedBy]
        ,[ModifiedOn]
        ,[DeletedFlag]
        ,[sid]
        ,[ITEM_BASE_NUMBER_OF_UNITS]
        ,[ITEM_BASE_TOTAL]
        ,[UNIQUE_ID]
        ,[IsCancelled]
        ,[ENTRY_CLOSED_DATE]
        )
    SELECT ts.TransactionID
        ,(
            SELECT t1.ID
            FROM [dbo].[time] t1
            INNER JOIN [dbo].[TimekMaster] tm ON t1.ABATimekCode = tm.ID
                AND (
                    tm.Code = ts.ITEM_TimeK_CODE
                    OR tm.NAME = ts.ITEM_TimeK_CODE
                    )
            WHERE t1.sid = @SID
                AND t1.DeletedFlag = 0
                AND m.id = t1.matterid
            ) ID
        ,(
            SELECT r.[ID]
            FROM [dbo].[Resource] r
            WHERE r.[FirmUserID] = ts.[TIMEKEEPER_ID]
                AND r.[sid] = @SID
                AND r.[DeletedFlag] = 0
            ) ResourceID
        ,ts.INVOICE_DATE
        ,ts.INVOICE_NUMBER
        ,ts.CLIENT_ID
        ,ts.LAW_FIRM_MATTER_ID
        ,REPLACE(ts.INVOICE_TOTAL, ',', '')
        ,ts.BILLING_START_DATE
        ,ts.BILLING_END_DATE
        ,ts.INVOICE_DESCRIPTION
        ,ts.ITEM_NUMBER
        ,ts.EXP_FEE_INV_ADJ_TYPE
        ,REPLACE(ts.ITEM_NUMBER_OF_UNITS, ',', '')
        ,REPLACE(ts.ITEM_ADJUSTMENT_AMOUNT, ',', '')
        ,REPLACE(ts.ITEM_TOTAL, ',', '')
        ,ts.[ITEM_DATE] --ITEM_DATE, varchar(1000),>
        ,ts.ITEM_TimeK_CODE
        ,ts.ITEM_EXPENSE_CODE
        ,ts.ITEM_ACTIVITY_CODE
        ,ts.TIMEKEEPER_ID
        ,CAST(ts.ITEM_DESCRIPTION AS VARCHAR(4000))
        ,ts.LAW_FIRM_ID
        ,REPLACE(ts.ITEM_UNIT_COST, ',', '')
        ,ts.TIMEKEEPER_NAME
        ,ts.TIMEKEEPER_CLASSIFICATION
        ,ts.CLIENT_MATTER_ID
        ,ts.TIMEKEEPER_EMAIL
        ,0 --<CreatedBy, int,>
        ,GETDATE() --<CreatedOn, datetime2(7),>
        ,NULL --<ModifiedBy, int,>
        ,NULL --<ModifiedOn, datetime2(7),>
        ,0 --<DeletedFlag, bit,>
        ,@SID --<sid, int,>
        ,REPLACE(ts.[ITEM_NUMBER_OF_UNITS_BASE], ',', '') 
        ,REPLACE(ts.[ITEM_BASE_TOTAL], ',', '') 
        ,ts.[UNIQUE_ITEM_NUMBER] --<UNIQUE_DOCKET_ID, varchar(1000),>
        ,0 --ts.[ITEM_CANCEL] --<IsCancelled, bit,>
        ,ts.[ENTRY_CLOSED_DATE] --<ENTRY_CLOSED_DATE, varchar(100),>)
    FROM E_Staging ts
    INNER JOIN matter m ON m.firmmatternumber = ts.LAW_FIRM_MATTER_ID
        AND m.[sid] = @SID
        AND m.[DeletedFlag] = 0
    WHERE 1 = (
            SELECT COUNT(1)
            FROM [dbo].[Time] t1
            INNER JOIN [dbo].[TimekMaster] tm ON t1.ABATimekCode = tm.ID
                AND (
                    tm.Code = ts.ITEM_TimeK_CODE
                    OR tm.NAME = ts.ITEM_TimeK_CODE
                    )
            WHERE t1.sid = @SID
                AND t1.DeletedFlag = 0
                AND m.id = t1.matterid
            )
        AND ts.[Sid] = @SID
        AND ts.[TransactionID] = @TxnID
        AND NOT EXISTS (
            SELECT 1
            FROM [dbo].[Entries] te
            WHERE te.[sid] = @SID
                AND te.[DeletedFlag] = 0
                AND ts.[UNIQUE_ITEM_NUMBER] = te.[UNIQUE_ID]
            )

基础表

CREATE TABLE [dbo].[Entries] (
    [ID] INT IDENTITY(1, 1) NOT NULL
    ,[TransactionID] INT NOT NULL
    ,[TaskID] INT NULL
    ,[ResourceID] INT NULL
    ,[INVOICE_DATE] VARCHAR(1000) NULL
    ,[INVOICE_NUMBER] VARCHAR(1000) NULL
    ,[CLIENT_ID] VARCHAR(1000) NULL
    ,[LAW_FIRM_MATTER_ID] VARCHAR(1000) NULL
    ,[INVOICE_TOTAL] NUMERIC(14, 2) NULL
    ,[BILLING_START_DATE] VARCHAR(1000) NULL
    ,[BILLING_END_DATE] VARCHAR(1000) NULL
    ,[INVOICE_DESCRIPTION] VARCHAR(1000) NULL
    ,[LINE_ITEM_NUMBER] VARCHAR(1000) NULL
    ,[EXP_FEE_INV_ADJ_TYPE] VARCHAR(1000) NULL
    ,[LINE_ITEM_NUMBER_OF_UNITS] NUMERIC(14, 2) NULL
    ,[LINE_ITEM_ADJUSTMENT_AMOUNT] NUMERIC(14, 2) NULL
    ,[LINE_ITEM_TOTAL] NUMERIC(14, 2) NULL
    ,[LINE_ITEM_DATE] VARCHAR(1000) NULL
    ,[LINE_ITEM_TASK_CODE] VARCHAR(1000) NULL
    ,[LINE_ITEM_EXPENSE_CODE] VARCHAR(1000) NULL
    ,[LINE_ITEM_ACTIVITY_CODE] VARCHAR(1000) NULL
    ,[TIMEKEEPER_ID] VARCHAR(1000) NULL
    ,[LINE_ITEM_DESCRIPTION] VARCHAR(4000) NULL
    ,[LAW_FIRM_ID] VARCHAR(1000) NULL
    ,[LINE_ITEM_UNIT_COST] NUMERIC(14, 2) NULL
    ,[TIMEKEEPER_NAME] VARCHAR(1000) NULL
    ,[TIMEKEEPER_CLASSIFICATION] VARCHAR(1000) NULL
    ,[CLIENT_MATTER_ID] VARCHAR(1000) NULL
    ,[TIMEKEEPER_EMAIL] VARCHAR(1000) NULL
    ,[CreatedBy] INT NULL
    ,[CreatedOn] DATETIME2(7) DEFAULT(sysutcdatetime()) NULL
    ,[ModifiedBy] INT NULL
    ,[ModifiedOn] DATETIME2(7) NULL
    ,[DeletedFlag] BIT DEFAULT((0)) NULL
    ,[sid] INT NOT NULL
    ,[LINE_ITEM_BASE_NUMBER_OF_UNITS] NUMERIC(14, 2) NULL
    ,[LINE_ITEM_BASE_TOTAL] NUMERIC(14, 2) NULL
    ,[UNIQUE_ID] VARCHAR(1000) NULL
    ,[IsCancelled] BIT DEFAULT((0)) NOT NULL
    ,[ENTRY_CLOSED_DATE] VARCHAR(100) NULL
    ,[MappingStatus] TINYINT DEFAULT((0)) NOT NULL
    ,[LINE_ITEM_PHASE_CODE] VARCHAR(1000) NULL
    ,[Override] BIT DEFAULT((0)) NOT NULL
    ,[e_INVOICE_DESCRIPTION] VARCHAR(1000) NULL
    ,[e_CLIENT_ID] VARBINARY(8000) NULL
    ,[e_LINE_ITEM_DESCRIPTION] VARCHAR(8000) NULL
    ,[e_CLIENT_MATTER_ID] VARCHAR(1000) NULL
    ,[AmountDocketed] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
    ,[AmountBilled] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
    ,[AmountWrittenDown] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
    ,[AmountWrittenUp] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
    ,[AmountWIP] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
    ,[HourDocketed] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
    ,[HourBilled] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
    ,[HourWrittenDown] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
    ,[HourWrittenUp] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
    ,[HourWIP] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
    ,[MappedBy] INT NULL DEFAULT((0))
    ,[MappedOn] DATETIME2 NULL DEFAULT(sysutcdatetime())
    ,[RemapBy] INT NULL
    ,CONSTRAINT [PK_utb_entries] PRIMARY KEY CLUSTERED ([ID] ASC)
    ,FOREIGN KEY ([TaskID]) REFERENCES [dbo].[Task]([ID])
    ,FOREIGN KEY ([TaskID]) REFERENCES [dbo].[Task]([ID])
    );
GO

CREATE NONCLUSTERED INDEX [Entries_TaskID] ON [dbo].[Entries] ([TaskID] ASC);
GO

CREATE NONCLUSTERED INDEX [Entries_LAW_FIRM_MATTER_ID] ON [dbo].[Entries] ([LAW_FIRM_MATTER_ID] ASC);
GO

CREATE NONCLUSTERED INDEX [Entries_UNIQUE_DOCKET_ID] ON [dbo].[Entries] ([UNIQUE_DOCKET_ID] ASC);
GO

CREATE NONCLUSTERED INDEX [Entries_ResourceID] ON [dbo].[Entries] ([ResourceID] ASC);
GO

CREATE NONCLUSTERED INDEX [Entries_INVOICE_NUMBER] ON [dbo].[Entries] ([INVOICE_NUMBER] ASC);
GO

CREATE NONCLUSTERED INDEX [Entries_EXP_FEE_INV_ADJ_TYPE] ON [dbo].[Entries] ([EXP_FEE_INV_ADJ_TYPE] ASC);
GO

CREATE NONCLUSTERED INDEX [Entries_IsCancelled] ON [dbo].[Entries] ([IsCancelled] ASC);
GO

CREATE NONCLUSTERED INDEX [IX_Entries_Grouped] ON [dbo].[Entries] (
    [TaskID] ASC
    ,[sid] ASC
    ,[DeletedFlag] ASC
    ,[ID] ASC
    ) INCLUDE (
    [INVOICE_NUMBER]
    ,[LAW_FIRM_MATTER_ID]
    ,[LINE_ITEM_NUMBER_OF_UNITS]
    ,[LINE_ITEM_TOTAL]
    ,[LINE_ITEM_BASE_NUMBER_OF_UNITS]
    ,[LINE_ITEM_BASE_TOTAL]
    ,[IsCancelled]
    )
    WITH (
            SORT_IN_TEMPDB = OFF
            ,DROP_EXISTING = OFF
            ,ONLINE = OFF
            ) ON [PRIMARY]
GO

CREATE NONCLUSTERED INDEX [IX_Entries_Grouped1] ON [dbo].[Entries] (
    [sid] ASC
    ,[DeletedFlag] ASC
    ,[IsCancelled] ASC
    ) INCLUDE (
    [ID]
    ,[LAW_FIRM_MATTER_ID]
    ,[EXP_FEE_INV_ADJ_TYPE]
    ,[LINE_ITEM_BASE_NUMBER_OF_UNITS]
    ,[LINE_ITEM_BASE_TOTAL]
    )
    WITH (
            SORT_IN_TEMPDB = OFF
            ,DROP_EXISTING = OFF
            ,ONLINE = OFF
            ) ON [PRIMARY]
GO

CREATE NONCLUSTERED INDEX [IX_Entries_Grouped2] ON [dbo].[Entries] (
    [sid] ASC
    ,[DeletedFlag] ASC
    ,[ID] ASC
    ,[LINE_ITEM_BASE_TOTAL] ASC
    ,[TaskID] ASC
    ) INCLUDE ([LAW_FIRM_MATTER_ID])
    WITH (
            SORT_IN_TEMPDB = OFF
            ,DROP_EXISTING = OFF
            ,ONLINE = OFF
            ) ON [PRIMARY]
GO

CREATE NONCLUSTERED INDEX [IX_Entries_Grouped3] ON [dbo].[Entries] (
    [TaskID] ASC
    ,[sid] ASC
    ,[DeletedFlag] ASC
    ,[ID] ASC
    ,[ResourceID] ASC
    ) INCLUDE (
    [INVOICE_NUMBER]
    ,[EXP_FEE_INV_ADJ_TYPE]
    ,[LINE_ITEM_NUMBER_OF_UNITS]
    ,[LINE_ITEM_TOTAL]
    ,[LINE_ITEM_DATE]
    ,[LINE_ITEM_BASE_NUMBER_OF_UNITS]
    ,[LINE_ITEM_BASE_TOTAL]
    ,[IsCancelled]
    ,[ENTRY_CLOSED_DATE]
    )
    WITH (
            SORT_IN_TEMPDB = OFF
            ,DROP_EXISTING = OFF
            ,ONLINE = OFF
            ) ON [PRIMARY]
GO

CREATE NONCLUSTERED INDEX [IX_Entries_Grouped4] ON [dbo].[Entries] (
    [TaskID] ASC
    ,[sid] ASC
    ,[IsCancelled] ASC
    ,[DeletedFlag] ASC
    ,[ID] ASC
    ,[ResourceID] ASC
    ) INCLUDE (
    [INVOICE_NUMBER]
    ,[EXP_FEE_INV_ADJ_TYPE]
    ,[LINE_ITEM_NUMBER_OF_UNITS]
    ,[LINE_ITEM_TOTAL]
    ,[LINE_ITEM_DATE]
    ,[LINE_ITEM_BASE_NUMBER_OF_UNITS]
    ,[LINE_ITEM_BASE_TOTAL]
    ,[ENTRY_CLOSED_DATE]
    )
    WITH (
            SORT_IN_TEMPDB = OFF
            ,DROP_EXISTING = OFF
            ,ONLINE = OFF
            ) ON [PRIMARY]
GO

CREATE NONCLUSTERED INDEX [Entries_LINE_ITEM_DATE] ON [dbo].[Entries] ([LINE_ITEM_DATE])
GO

CREATE STATISTICS [Stat_Entries_1] ON [dbo].[Entries] (
    [IsCancelled]
    ,[ID]
    ,[TaskID]
    ,[sid]
    )
GO

暂存表

CREATE TABLE [dbo].[E_Staging] (
    [ID] INT IDENTITY(1, 1) NOT NULL
    ,[INVOICE_DAE] VARCHAR(1000) NULL
    ,[INVOICE_NUMBER] VARCHAR(1000) NULL
    ,[CLIENT_ID] VARCHAR(1000) NULL
    ,[LAW_FIRM_MATER_ID] VARCHAR(1000) NULL
    ,[INVOICE_TOTAL] VARCHAR(1000) NULL
    ,[BILLING_START_DAE] VARCHAR(1000) NULL
    ,[BILLING_END_DAE] VARCHAR(1000) NULL
    ,[INVOICE_DESCRIPTION] VARCHAR(8000) NULL
    ,[IEM_NUMBER] VARCHAR(1000) NULL
    ,[EXP_FEE_INV_ADJ_TYPE] VARCHAR(1000) NULL
    ,[IEM_NUMBER_OF_UNITS] VARCHAR(1000) NULL
    ,[IEM_NUMBER_OF_UNITS_BASE] VARCHAR(1000) NULL
    ,[IEM_ADJUSTMENT_AMOUNT] VARCHAR(1000) NULL
    ,[IEM_TOTAL] VARCHAR(1000) NULL
    ,[IEM_BASE_TOTAL] VARCHAR(1000) NULL
    ,[IEM_DAE] VARCHAR(100) NULL
    ,[IEM_TASK_CODE] VARCHAR(1000) NULL
    ,[IEM_EXPENSE_CODE] VARCHAR(1000) NULL
    ,[IEM_ACTIVITY_CODE] VARCHAR(1000) NULL
    ,[TIMEKEEPER_ID] VARCHAR(1000) NULL
    ,[IEM_DESCRIPTION] VARCHAR(8000) NULL
    ,[LAW_FIRM_ID] VARCHAR(1000) NULL
    ,[IEM_UNIT_COST] VARCHAR(1000) NULL
    ,[TIMEKEEPER_NAME] VARCHAR(1000) NULL
    ,[TIMEKEEPER_CLASSIFICATION] VARCHAR(1000) NULL
    ,[CLIENT_MATER_ID] VARCHAR(1000) NULL
    ,[TIMEKEEPER_EMAIL] VARCHAR(1000) NULL
    ,[UNIQUE_ITEM_NUMBER] VARCHAR(1000) NULL
    ,[IEM_CANCEL] VARCHAR(1000) NULL
    ,[Sid] INT NULL
    ,[TransactionID] INT NULL
    ,[ValidationStatus] VARCHAR(20) NULL
    ,[ValidationMessage] VARCHAR(200) NULL
    ,[UploadStatus] VARCHAR(20) NULL
    ,[StatusMessage] VARCHAR(50) NULL
    ,[CREATEdOn] DAETIME NULL DEFAULT(sysutcdaEtime())
    ,[FirmLastModifiedon] VARCHAR(1000) NULL
    ,[ENTRY_CLOSED_DAE] VARCHAR(1000) NULL
    ,[UNIQUE_Docket_ID2] VARCHAR(1000) NULL
    ,[DockeEd_Released_Billed] VARCHAR(1000) NULL
    ,[IEM_PHASE_CODE] VARCHAR(1000) NULL
    ,[WriE_Down] VARCHAR(200) NULL
    ,[e_ClientId] VARBINARY(MAX) NULL
    ,[e_INVOICE_DESCRIPTION] VARCHAR(1000) NULL
    ,[e_IEM_DESCRIPTION] VARCHAR(8000) NULL
    ,[e_CLIENT_MATER_ID] VARCHAR(1000) NULL
    ,CONSTRAINT [PK_E_Staging] PRIMARY KEY CLUSERED([ID] ASC)
    );
GO

CREATE NONCLUSERED INDEX [nci_wi_E_Staging_2150E36DEE003CE53D44FC548A4CD6CC] ON [dbo].[E_Staging] (
    [Sid] ASC
    ,[TransactionID] ASC
    )
GO

CREATE NONCLUSERED INDEX IX_E_Staging_CREATEDON ON [dbo].[E_Staging] (CREATEDON)
GO

CREATE NONCLUSERED INDEX IX_E_Staging_LAW_FIRM_MATER_ID ON [dbo].[E_Staging] (LAW_FIRM_MATER_ID)
GO

CREATE NONCLUSERED INDEX IX_E_Staging_UNIQUE_IEM_NUMBER ON [dbo].[E_Staging] (UNIQUE_IEM_NUMBER)
GO

最佳答案

我建议您不要在目标表上创建新索引,理想情况下您应该在插入之前禁用索引。

  1. 禁用目标表上的索引和约束。
  2. 插入记录。
  3. 启用索引和约束。

这将为您提供更好的插入性能。

除此之外,您还需要优化您的选择查询。如果您也共享选择查询,您可以获得更多帮助。

关于sql - 以 SQL Server 最快的方式插入/更新 5000 万条记录,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/54435330/

相关文章:

mysql - 执行 SELECT MAX(id) 等查找最后插入的行在概念上是否正确?

sql - SQL Server Management Studio中的脚本大小限制

sql - 命名 TSQL 的 XML 结果列 "for xml explicit"?

sql - 绕过在字符串文字中添加前缀 N 的需要

sql - Hive 上的非平等加入

php - 如果项目有许多(动态)属性,则多个左连接

c# - SQL Server 2005 中的 XML 查询

c# - 无法处理 System.Data.SqlTypes.SqlNullValueException

sql - 如何识别SQL Server的端口号

mysql - MySQL : Errorno 150 是什么原因