sql - 使用 SSIS 包加载数据时理解间歇性不一致的问题

标签 sql sql-server-2008 ssis common-table-expression

问题

在过去的几个月中,下面描述的过程在绝大多数情况下都运行没有任何问题(在 2008 r2 上)。然而,我们有三个错误连接数据的实例。问题是,这是什么原因造成的,我该如何补救?

DATA_PreImp

    sourceid    col01   col02   col03   col04   col...
    100001      John    Smith   
    100002      Calvin  Klein
    100003      Peter   Parker
    100004      Moe     Greene

通常呈现的结果是属性正确连接到 Items_Main 但有时(小于 1%)顺序被打乱,因此 col01 的值没有连接到与其余列的值相同的 Items_Main。

任何有关导致这种情况的原因的见解将不胜感激。

数据移动程序

我们有一个 SSIS 包,可将数据从名为 DATA_PreImp 的平面表传输到由三个相关表(基于属性)组成的结构。
  • Items_Main 应为 DATA_PreImp 中的每一行包含一行
  • Items_Featurevalues 在 DATA_PreImp
  • 中为一行的每一列值包含一行
  • Items_MainRel 包含 Items_Main 和 Items_FeatureValues 之间的连接

  • SSIS 包中的第一步是将数据从 DATA_PreImp 插入到 Items_Main 并将生成的标识符插入到空的 DATA_PreImpMappingTMP 表中的 TARGET_ID 列中。
    insert into items_main(creationdate, status)
    output inserted.itemid into DATA_PreImpMappingTMP(TARGET_ID)
    select getdate(), '0' from data_preimp
    order by sourceid asc;
    

    SSIS 包中的第二步用 TARGET_ID(最初的 Itemid)和特征的标识符(在本例中为 5)填充 Items_MainRel 表。
    insert into items_mainrel(itemid, featureid)
    output inserted.itemrelid into DATA_PreImpMapping2TMP(INDREL_ID)
    select TARGET_ID, 5 from DATA_PreImpMappingTMP
    order by TARGET_ID asc;
    

    第三步是用来自DATA_PreImp的SOURCE_ID填充DATA_PreImpMapping2TMP表中的SOURCE_ID列。
    with cte as (select sourceid, row_number() over (order by sourceid asc) as row from data_preimp)
    update m set m.SOURCE_ID = s.sourceid, m.FEAT_ID = 5
    from DATA_PreImpMapping2TMP as m
    join cte as s on s.row = m.ROW;
    

    最后一步是用来自 DATA_PreImpMapping2TMP 和 DATA_PreImp 的数据填充 Items_FeatureValues 表。
    insert into items_featurevalues(itemrelid, languageid, fnvarchar)
    select DATA_PreImpMapping2TMP.INDREL_ID, 0, data_preimp.col01
    from DATA_PreImpMapping2TMP
    join data_preimp on (DATA_PreImpMapping2TMP.SOURCE_ID = data_preimp.sourceid)
    where FEAT_ID = 5
    

    数据表结构

    以下是创建场景所需的内容:
    CREATE TABLE [dbo].[DATA_PreImp](
        [sourceid] [bigint] IDENTITY(1,1) NOT NULL,
        [col01] [nvarchar](500) NULL,
        [col02] [nvarchar](500) NULL,
        [col03] [nvarchar](500) NULL,
        [col04] [nvarchar](500) NULL,
        [col05] [nvarchar](500) NULL,
        [col06] [nvarchar](500) NULL,
        [col07] [nvarchar](500) NULL,
        [col08] [nvarchar](500) NULL,
        [col09] [nvarchar](500) NULL,
        [col10] [nvarchar](500) NULL,
     CONSTRAINT [PK_DATA_PreImp] PRIMARY KEY CLUSTERED 
    (
        [sourceid] ASC
    )WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
    ) ON [PRIMARY]
    
    
    CREATE TABLE [dbo].[DATA_PreImpMappingTMP](
        [ROW] [int] IDENTITY(1,1) NOT NULL,
        [TARGET_ID] [int] NULL,
    PRIMARY KEY CLUSTERED 
    (
        [ROW] ASC
    )WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
    ) ON [PRIMARY]
    
    
    CREATE TABLE [dbo].[Items_Main](
        [Itemid] [int] IDENTITY(1,1) NOT NULL,
        [creationDate] [smalldatetime] NOT NULL,
        [status] [int] NOT NULL,
        [purchdate] [smalldatetime] NULL,
        [logindate] [smalldatetime] NULL,
     CONSTRAINT [PK_Items_Main] PRIMARY KEY CLUSTERED 
    (
        [Itemid] ASC
    )WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
    ) ON [PRIMARY]
    
    
    CREATE TABLE [dbo].[DATA_PreImpMapping2TMP](
        [ROW] [int] IDENTITY(1,1) NOT NULL,
        [SOURCE_ID] [int] NULL,
        [INDREL_ID] [int] NULL,
        [FEAT_ID] [int] NULL,
    PRIMARY KEY CLUSTERED 
    (
        [ROW] ASC
    )WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
    ) ON [PRIMARY]
    
    CREATE TABLE [dbo].[Items_Features](
        [featureId] [int] IDENTITY(1,1) NOT NULL,
        [featureRef] [varchar](15) NOT NULL,
        [featureName] [varchar](50) NOT NULL,
        [creationDate] [smalldatetime] NOT NULL,
        [status] [int] NOT NULL,
        [fieldType] [varchar](50) NOT NULL,
        [featureType] [int] NOT NULL,
        [featureDesc] [varchar](500) NULL,
     CONSTRAINT [PK_Items_Features] PRIMARY KEY CLUSTERED 
    (
        [featureId] ASC
    )WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]) ON [PRIMARY]
    
    
    CREATE TABLE [dbo].[Items_MainRel](
        [ItemRelId] [int] IDENTITY(1,1) NOT NULL,
        [Itemid] [int] NOT NULL,
        [featureId] [int] NOT NULL,
     CONSTRAINT [PK_Items_MainRel] PRIMARY KEY CLUSTERED 
    (
        [ItemRelId] ASC
    )WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
    ) ON [PRIMARY]
    
    GO
    
    CREATE TABLE [dbo].[Items_FeatureValues](
        [valueId] [int] IDENTITY(1,1) NOT NULL,
        [ItemRelId] [int] NOT NULL,
        [languageId] [int] NOT NULL,
        [FnVarChar] [nvarchar](250) NULL,
        [FInt] [int] NULL,
        [FImage] [int] NULL,
        [FNText] [ntext] NULL,
        [FSmallDateTime] [smalldatetime] NULL,
     CONSTRAINT [PK_Items_FeatureValues] PRIMARY KEY CLUSTERED 
    (
        [valueId] ASC
    )WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
    ) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
    
    GO    
    
    ALTER TABLE [dbo].[Items_MainRel]  WITH CHECK ADD  CONSTRAINT [FK_Items_MainRel_Items_Features] FOREIGN KEY([featureId])
    REFERENCES [dbo].[Items_Features] ([featureId])
    GO
    
    ALTER TABLE [dbo].[Items_MainRel] CHECK CONSTRAINT [FK_Items_MainRel_Items_Features]
    GO
    
    ALTER TABLE [dbo].[Items_MainRel]  WITH CHECK ADD  CONSTRAINT [FK_Items_MainRel_Items_Main] FOREIGN KEY([Itemid])
    REFERENCES [dbo].[Items_Main] ([Itemid])
    GO
    
    ALTER TABLE [dbo].[Items_MainRel] CHECK CONSTRAINT [FK_Items_MainRel_Items_Main]
    GO
    
    
    ALTER TABLE [dbo].[Items_FeatureValues]  WITH CHECK ADD  CONSTRAINT [FK_Items_FeatureValues_Items_MainRel] FOREIGN KEY([ItemRelId])
    REFERENCES [dbo].[Items_MainRel] ([ItemRelId])
    ON DELETE CASCADE
    GO
    
    ALTER TABLE [dbo].[Items_FeatureValues] CHECK CONSTRAINT [FK_Items_FeatureValues_Items_MainRel]
    GO
    
    
    INSERT INTO DATA_PreImp (col01,col02,col03,col04) 
    VALUES('John', 'Smith', '1964', 'NewYork'),
            ('Calvin', 'Klein', '1960', 'Washington D. C.'),
            ('Peter', 'Parker', '1974', 'Losangles'),
            ('Moe', 'Greene', '1928', 'Lasvegas')
    
    
    INSERT INTO Items_Features (featureRef, featureName, creationDate, [status], fieldType, featureType, featureDesc)
    VALUES ('firstname','First_Name', GETDATE(), 0, 'FnVarChar', 3, 'FirstName'),
        ('lastname','Last_Name', GETDATE(), 0, 'FnVarChar', 3, 'LastName'),
        ('Birth','Birth', GETDATE(), 0, 'FnVarChar', 3, 'Birth'),
        ('City','City', GETDATE(), 0, 'FnVarChar', 3, 'City')  
    

    最佳答案

    问题是计算列 CUST_CD。经过大量研究,似乎 BULK INSERT 不喜欢复杂的计算类型(请参阅 Using SQL Server spatial types in SSIS data load )。解决方案是删除计算列并将其设为 varchar(20) NULL。然后我创建了一个新的执行 SQL 任务,它使用计算值更新任何 NULL 行。

    关于sql - 使用 SSIS 包加载数据时理解间歇性不一致的问题,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/15274671/

    相关文章:

    sql - left outer join和PIVOT之间有没有一般的关系?

    SQL显示Count为0时的月份

    sql - SQL Server varchar(max) 中的字符数?

    database - 在数据库中启用快照隔离需要哪些用户权限?

    git - 如何通过BIML对SSIS包进行批量更新?

    sql - 调度多个相互依赖的 SQL Server 代理作业的做法是什么?

    c# - 缺少引用 OLEDB 连接类型的库

    mysql - 对于 SQL 中的所有查询,表划分

    MySql 根据逗号分隔值的输入更新计数

    SQL Server 2008 - 选择查询特定项目