sql - 随着时间的推移跟踪事件对象

标签 sql postgresql time-series postgresql-9.5 gaps-and-islands

我有一个事件表(id 用作事件 ID),如下所示(另请参阅 SQL fiddle here ):

CREATE TABLE ext (
  key INT,
  id CHAR(1),
  pid INT,
  sid INT, 
  oid INT,
  event VARCHAR(3)
);

INSERT INTO ext (key, id, pid, sid, oid, event)
VALUES 
 (1, 'Q', 1, 81, 20, 'tsu'),
 (2, 'Q', 1, 81, 9, 'tsu'),
 (3, 'Q', 1, 81, 10, 'tsu'),
 (4, 'Q', 1, 81, 4, 'tsu'),
 (5, 'Q', 1, 81, 15, 'tsu'),
 (6, 'Q', 1, 81, 3, 'tsu'),
 (7, 'Q', 1, 81, 5, 'tsu'),
 (8, 'Q', 1, 81, 18, 'tsu'),
 (9, 'Q', 1, 81, 2, 'tsu'),
 (10, 'Q', 1, 81, 1, 'tsu'),
 (11, 'Q', 1, 81, 7, 'tsu'),
 (12, 'f', 2, 2, NULL, 's'),
 (13, 'Z', 2, 871, NULL, 'e'),
 (14, 'm', 3, 872, 2, 'pof'),
 (15, 's', 3, 873, 31, 'pom'),
 (16, 'R', 3, 874, 15, 'fc'),
 (17, 'R', 3, 874, 1, 'fc'),
 (18, 'R', 3, 874, 31, 'fc'),
 (19, 'R', 3, 874, 9, 'fc'),
 (20, 'R', 3, 874, 10, 'fc'),
 (21, 'R', 3, 874, 4, 'fc'),
 (22, 'R', 3, 874, 7, 'fc'),
 (23, 'R', 3, 874, 3, 'fc'),
 (24, 'R', 3, 874, 5, 'fc'),
 (25, 'R', 3, 874, 18, 'fc'),
 (26, 'R', 3, 874, 20, 'fc'),
 (27, 'k', 3, 876, NULL, 's'),
 (28, 'a', 3, 950, 31, 'rco'),
 (29, 'y', 3, 1285, 7, 'pof'),
 (30, 'N', 3, 1286, 22, 'pom'),
 (31, 'i', 3, 1299, 1, 'fc'),
 (32, 'i', 3, 1299, 5, 'fc'),
 (33, 'i', 3, 1299, 3, 'fc'),
 (34, 'i', 3, 1299, 20, 'fc'),
 (35, 'i', 3, 1299, 4, 'fc'),
 (36, 'i', 3, 1299, 9, 'fc'),
 (37, 'i', 3, 1299, 10, 'fc'),
 (38, 'i', 3, 1299, 22, 'fc'),
 (39, 'i', 3, 1299, 15, 'fc'),
 (40, 'i', 3, 1299, 18, 'fc'),
 (41, 'I', 3, 1407, 9, 'pof'),
 (42, 'T', 3, 1408, 19, 'pom'),
 (43, 'u', 3, 1575, 4, 'pof'),
 (44, 'V', 3, 1576, 30, 'pom'),
 (45, 'B', 3, 2019, NULL, 'e'),
 (46, 'h', 4, 60, NULL, 'e');

事件根据 pidsid 列进行排序(例如,您可以分别将它们查看为天和小时,因为 pid 是更大的时间单位比 sid 因此您应该首先按 pid 排序,然后按 sid 排序以获得正确的顺序)。正如您所看到的,某些事件(event = tsuevent = fc)具有不止一行,因为它们引用了许多对象 (oid ),有些仅引用一个对象(作为id = m的事件),有些仅一行,但它们实际上引用了所有先前观察到的未“死亡”的对象。它们是一些附加规则:

  • 当对象遇到 pofrco 事件(如 event 列中所述)时,对象就死亡了
  • 当对象触发 pof 事件时,会有另一个对象取代标记有 pom 事件的对象

我需要跟踪当前事件的对象。因此,我想将 oidNULL 的行与可以从先前事件推断出的所有事件对象“交叉连接”,其中通过“交叉连接”我的意思是为每个当前事件的 oid 复制带有 oid = NULL 的行。

由于文本中的逻辑可能很难理解,我准备了预期的输出(可在 SQL fiddle here 上找到):

CREATE TABLE intermediate_result (
  id CHAR(1),
  pid INT,
  sid INT, 
  oid INT,
  event VARCHAR(3)
);

INSERT INTO intermediate_result (id, pid, sid, oid, event)
VALUES 
 ('Q', 1, 81, 20, 'tsu'),
 ('Q', 1, 81, 9, 'tsu'),
 ('Q', 1, 81, 10, 'tsu'),
 ('Q', 1, 81, 4, 'tsu'),
 ('Q', 1, 81, 15, 'tsu'),
 ('Q', 1, 81, 3, 'tsu'),
 ('Q', 1, 81, 5, 'tsu'),
 ('Q', 1, 81, 18, 'tsu'),
 ('Q', 1, 81, 2, 'tsu'),
 ('Q', 1, 81, 1, 'tsu'),
 ('Q', 1, 81, 7, 'tsu'),
 ('f', 2, 2, 20, 's'),
 ('f', 2, 2, 9, 's'),
 ('f', 2, 2, 10, 's'),
 ('f', 2, 2, 4, 's'),
 ('f', 2, 2, 15, 's'),
 ('f', 2, 2, 3, 's'),
 ('f', 2, 2, 5, 's'),
 ('f', 2, 2, 18, 's'),
 ('f', 2, 2, 2, 's'),
 ('f', 2, 2, 1, 's'),
 ('f', 2, 2, 7, 's'),
 ('Z', 2, 871, 20, 'e'), 
 ('Z', 2, 871, 9, 'e'),
 ('Z', 2, 871, 10, 'e'),
 ('Z', 2, 871, 4, 'e'),
 ('Z', 2, 871, 15, 'e'),
 ('Z', 2, 871, 3, 'e'),
 ('Z', 2, 871, 5, 'e'),
 ('Z', 2, 871, 18, 'e'),
 ('Z', 2, 871, 2, 'e'),
 ('Z', 2, 871, 1, 'e'),
 ('Z', 2, 871, 7, 'e'),
 ('m', 3, 872, 2, 'pof'),
 ('s', 3, 873, 31, 'pom'),
 ('R', 3, 874, 15, 'fc'),
 ('R', 3, 874, 1, 'fc'),
 ('R', 3, 874, 31, 'fc'),
 ('R', 3, 874, 9, 'fc'),
 ('R', 3, 874, 10, 'fc'),
 ('R', 3, 874, 4, 'fc'),
 ('R', 3, 874, 7, 'fc'),
 ('R', 3, 874, 3, 'fc'),
 ('R', 3, 874, 5, 'fc'),
 ('R', 3, 874, 18, 'fc'),
 ('R', 3, 874, 20, 'fc'),
 ('k', 3, 876, 15, 's'),
 ('k', 3, 876, 1, 's'),
 ('k', 3, 876, 31, 's'),
 ('k', 3, 876, 9, 's'),
 ('k', 3, 876, 10, 's'),
 ('k', 3, 876, 4, 's'),
 ('k', 3, 876, 7, 's'),
 ('k', 3, 876, 3, 's'),
 ('k', 3, 876, 5, 's'),
 ('k', 3, 876, 18, 's'),
 ('k', 3, 876, 20, 's'),
 ('a', 3, 950, 31, 'rco'),
 ('y', 3, 1285, 7, 'pof'),
 ('N', 3, 1286, 22, 'pom'),
 ('i', 3, 1299, 1, 'fc'),
 ('i', 3, 1299, 5, 'fc'),
 ('i', 3, 1299, 3, 'fc'),
 ('i', 3, 1299, 20, 'fc'),
 ('i', 3, 1299, 4, 'fc'),
 ('i', 3, 1299, 9, 'fc'),
 ('i', 3, 1299, 10, 'fc'),
 ('i', 3, 1299, 22, 'fc'),
 ('i', 3, 1299, 15, 'fc'),
 ('i', 3, 1299, 18, 'fc'),
 ('I', 3, 1407, 9, 'pof'),
 ('T', 3, 1408, 19, 'pom'),
 ('u', 3, 1575, 4, 'pof'),
 ('V', 3, 1576, 30, 'pom'),
 ('B', 3, 2019, 1, 'e'),
 ('B', 3, 2019, 5, 'e'),
 ('B', 3, 2019, 3, 'e'),
 ('B', 3, 2019, 20, 'e'),
 ('B', 3, 2019, 30, 'e'),
 ('B', 3, 2019, 19, 'e'),
 ('B', 3, 2019, 10, 'e'),
 ('B', 3, 2019, 22, 'e'),
 ('B', 3, 2019, 15, 'e'),
 ('B', 3, 2019, 18, 'e'),
 ('h', 4, 60, 1, 'e'),
 ('h', 4, 60, 5, 'e'),
 ('h', 4, 60, 3, 'e'),
 ('h', 4, 60, 20, 'e'),
 ('h', 4, 60, 30, 'e'),
 ('h', 4, 60, 19, 'e'),
 ('h', 4, 60, 10, 'e'),
 ('h', 4, 60, 22, 'e'),
 ('h', 4, 60, 15, 'e'),
 ('h', 4, 60, 18, 'e');

SQL版本是PostgreSQL 9.5。

最佳答案

仅用 SQL 来解决这个问题非常困难且效率低下。我认为你最好的选择是程序解决方案。

这是 PL/pgSQL 中的实现。

CREATE OR REPLACE FUNCTION f_expand_oid_null()
  RETURNS TABLE(_id CHAR(1), _pid int, _sid int, _oid int, _event varchar(3))
  LANGUAGE plpgsql AS
$func$
DECLARE
   _key   int;
   _pof   int;  -- remember for subsequent 'pom'
BEGIN
   -- hold set of "active object IDs
   CREATE TEMP TABLE tmp_active_objects(
     oid int PRIMARY KEY
   , pid int
   , sid int
   , key int
   ) ON COMMIT DROP;

-- loop table rows in order
   FOR _key, _id, _pid, _sid, _oid, _event IN
      SELECT e.key, e.id, e.pid, e.sid, e.oid, e.event
      FROM   ext e
      ORDER  BY e.pid, e.sid, e.key
   LOOP
      IF _oid IS NULL THEN
         -- expand to ordered set of active objects
         RETURN QUERY
         SELECT _id, _pid, _sid, a.oid, _event 
         FROM   tmp_active_objects a
         ORDER  BY a.pid, a.sid, a.key;  -- keep original order of events
         -- returns nothing if no active objects
      ELSE
         RETURN QUERY VALUES (_id, _pid, _sid, _oid, _event);
         
         CASE _event
         WHEN 'rco' THEN
            DELETE FROM tmp_active_objects WHERE oid = _oid;
         WHEN 'pof' THEN
            _pof = _oid;   -- remember for subsequent 'pom'
         WHEN 'pom' THEN
            UPDATE tmp_active_objects
            SET    oid = _oid
            WHERE  oid = _pof;
         ELSE
            -- upsert active objects
            INSERT INTO tmp_active_objects (oid, pid, sid, key)
            VALUES (_oid, _pid, _sid, _key)
            ON CONFLICT (oid) DO
            UPDATE
            SET    (pid, sid, key)
                 = (EXCLUDED.pid, EXCLUDED.sid, EXCLUDED.key);
         END CASE;
      END IF;
   END LOOP;
END
$func$;

db<> fiddle here

解释一切比编写它更费力。我做了一些假设和需要避免的陷阱。特别是,每个“pof”行后面必须紧跟一个“pom”行。

如果您对 PL/pgSQL 不坚定,请考虑付费顾问。或者以您选择的过程语言实现。

关于sql - 随着时间的推移跟踪事件对象,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/65815468/

相关文章:

function - 尝试创建一个插入函数

windows - 如何启动psql.exe

python - 如何反向执行 date_range?

Mysql 子查询中的未知列有效

mysql - 排序结果以匹配 WHERE IN 表达式中值的顺序

Matlab 编译器和数据库应用程序

r - na.locf 但不要做尾随 NA

python - 使用 ARMAResult.predict() 函数的正确方法

java - Hibernate:通过单个连接表引用多个表

MySQL 格式数字,小数位数未知