sql - 如何高效维护一个传递闭包表?

标签 sql firebird directed-acyclic-graphs transitive-closure-table

我的关系数据库 (Firebird) 中有一个 DAG,有两个表 edgenode (邻接表模型)。我想递归查询它们,但发现递归查询效率很低。所以我尝试实现触发器来维持 Dong 等人的传递闭包。纸http://homepages.inf.ed.ac.uk/libkin/papers/tc-sql.pdf .
SELECT s 现在非常快,但是 DELETE s 非常慢,因为几乎整个图形都是为一次删除而复制的。更糟糕的是,并发更新似乎是不可能的。

有没有更好的方法来实现这一点?

编辑

我做了一些实验,并在 TC 表中引入了一个引用计数器。这样,删除速度很快。我写了一些简单的测试用例,但我不确定我是否做得对。这是我到目前为止:

CREATE GENERATOR graph_tc_seq;

CREATE TABLE EDGE (
    parent DECIMAL(10, 0) NOT NULL,
    child DECIMAL(10, 0) NOT NULL,
    PRIMARY KEY (parent, child)
);

CREATE TABLE GRAPH_TC (
    parent DECIMAL(10, 0) NOT NULL,
    child DECIMAL(10, 0) NOT NULL,
    refcount DECIMAL(9, 0),
    PRIMARY KEY (parent, child)
);

CREATE TABLE GRAPH_TC_TEMP (
    session_id DECIMAL(9, 0),
    parent DECIMAL(10, 0),
    child DECIMAL(10, 0)
);

CREATE PROCEDURE GRAPH_TC_CREATE (p_parent DECIMAL(10, 0), c_child DECIMAL(10, 0))
AS 
    declare variable tp_parent DECIMAL(10,0);
    declare variable tc_child DECIMAL(10,0);
    declare variable session_id DECIMAL(9,0);
    declare variable refs DECIMAL(9,0);
begin
    session_id = gen_id(graph_tc_seq,1);
    insert into graph_tc_temp (parent, child, session_id, refcount) values (:p_parent, :p_parent, :session_id, 1);
    insert into graph_tc_temp (parent, child, session_id, refcount) values (:c_child, :c_child, :session_id, 1);
    insert into graph_tc_temp (parent, child, session_id, refcount) values (:p_parent, :c_child, :session_id, 1);
    insert into graph_tc_temp (parent, child, session_id, refcount) select distinct :p_parent, child, :session_id, refcount from graph_tc where parent = :c_child and not parent = child;
    insert into graph_tc_temp (child, parent, session_id, refcount) select distinct :c_child, parent, :session_id, refcount from graph_tc where child = :p_parent and not parent = child;
    insert into graph_tc_temp (parent, child, session_id, refcount) select distinct a.parent, b.child, :session_id, a.refcount*b.refcount from graph_tc a, graph_tc b where a.child = :p_parent and b.parent = :c_child and not a.parent = a.child and not b.parent = b.child;
    for select parent, child, refcount from graph_tc_temp e where session_id= :session_id and exists (select * from graph_tc t where t.parent = e.parent and t.child = e.child ) into :tp_parent, :tc_child, :refs do begin
        update graph_tc set refcount=refcount+ :refs where parent = :tp_parent and child = :tc_child;
    end
    insert into graph_tc (parent, child, refcount) select parent, child, refcount from graph_tc_temp e where session_id = :session_id and not exists (select * from graph_tc t where t.parent = e.parent and t.child = e.child);
    delete from graph_tc_temp where session_id = :session_id;
end ^

CREATE PROCEDURE GRAPH_TC_DELETE (p_parent DECIMAL(10, 0), c_child DECIMAL(10, 0))
AS 
    declare variable tp_parent DECIMAL(10,0);
    declare variable tc_child DECIMAL(10,0);
    declare variable refs DECIMAL(9,0);
begin
    delete from graph_tc where parent = :p_parent and child = :p_parent and refcount <= 1;
    update graph_tc set refcount = refcount - 1 where parent = :p_parent and child = :p_parent and refcount > 1;
    delete from graph_tc where parent = :c_child and child = :c_child and refcount <= 1;
    update graph_tc set refcount = refcount - 1 where parent = :c_child and child = :c_child and refcount > 1;
    delete from graph_tc where parent = :p_parent and child = :c_child and refcount <= 1;
    update graph_tc set refcount = refcount - 1 where parent = :p_parent and child = :c_child and refcount > 1;
    for select distinct :p_parent,  b.child, refcount from graph_tc b where b.parent = :c_child and not b.parent = b.child into :tp_parent, :tc_child, :refs do begin
        delete from graph_tc where parent = :tp_parent and child = :tc_child and refcount <= :refs;
        update graph_tc set refcount = refcount - :refs where parent = :tp_parent and child = :tc_child and refcount > :refs;
    end
    for select distinct :c_child,  b.parent, refcount from graph_tc b where b.child = :p_parent and not b.parent = b.child into :tc_child, :tp_parent, :refs do begin
        delete from graph_tc where child = :tc_child and parent = :tp_parent and refcount <= :refs;
        update graph_tc set refcount = refcount - :refs where child = :tc_child and parent = :tp_parent and refcount > :refs;
    end
    for select distinct a.parent, b.child, a.refcount*b.refcount from graph_tc a, graph_tc b where not a.parent = a.child and not b.parent = b.child and a.child = :p_parent and b.parent = :c_child into :tp_parent, :tc_child, :refs do begin
        delete from graph_tc where parent = :tp_parent and child = :tc_child and refcount <= :refs;
        update graph_tc set refcount = refcount - :refs where parent = :tp_parent and child = :tc_child and refcount > :refs;
    end
end ^

CREATE TRIGGER GRAPH_TC_AFTER_INSERT FOR EDGE AFTER INSERT as
begin
    execute procedure graph_tc_create(new.parent,new.child);
end ^

CREATE TRIGGER GRAPH_TC_AFTER_UPDATE FOR EDGE AFTER UPDATE as
begin
    if ((new.parent <> old.parent) or (new.child <> old.child)) then begin
    execute procedure graph_tc_delete(old.parent,old.child);
    execute procedure graph_tc_create(new.parent,new.child);
    end
end ^

CREATE TRIGGER GRAPH_TC_AFTER_DELETE FOR EDGE AFTER DELETE as
begin
    execute procedure graph_tc_delete(old.parent,old.child);
end ^

这是我自己的想法,但我认为其他人已经实现了 TC。他们在做同样的事情吗?

我有一些测试用例,但我不确定我是否会与更大的图表不一致。

并发性如何,我认为当两个并发事务想要更新图形时,这种方法会失败,对吗?

编辑

我在我的代码中发现了一些错误,我想与您分享修复版本。

我发现了一篇很棒的文章:http://www.codeproject.com/Articles/22824/A-Model-to-Represent-Directed-Acyclic-Graphs-DAG-o .是否有更多有趣的文章或科学论文,采用不同的方法?

最佳答案

我只是通过扩展到这里描述的传递自反闭包表模型来修复一个缓慢的删除操作:
http://www.dba-oracle.com/t_sql_patterns_incremental_eval.htm .完全维护其中的路径数需要做更多的工作,但是当删除从每个单独的删除操作 6 秒变为可以忽略不计时,它得到了很大的返回(我现在可以删除图中的每个关系,然后将它们全部添加回来在 14 秒内总共有 4,000 个关系)。

关于sql - 如何高效维护一个传递闭包表?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/18993171/

相关文章:

php - 插入数据库时​​出现sql语法错误codeigniter

delphi - 尝试在 Delphi 中创建 Firebird 数据库时出现 EIBInterBaseError 'unavailable database'

c# - 如何使用 .NET 数据提供程序在 firebird 数据库中使用大文件 (1Gb) 更新二进制 blob 字段?

algorithm - 这在计算复杂性方面有多难?

algorithm - 有向无环图中的最大权连通子图

mysql - 在 WHERE 子句中使用 != 时 SELECT 不起作用(使用 GROUP BY 和 HAVING COUNT)

sql - 停用 postgres 用户帐户

python - 有没有用 Django 制作的 sql 管理面板? - Django

java - 将 Firebird 的全局 JNDI 中的以下条目绑定(bind)为空

amazon-web-services - 在 Airflow 中设置 sns_publish_operator