python - SQLAlchemy 中跨越四个表的关系

标签 python mysql sqlalchemy

我正在尝试建立跨越四个表的关系。我根据 this question 中的代码简化了我的代码匹配我的数据库。

from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()


class A(Base):
    __tablename__ = 'a'

    id = Column(Integer, primary_key=True)
    b_id = Column(Integer, ForeignKey('b.id'))


    # FIXME: This fails with:
    #   "Relationship A.ds could not determine any unambiguous local/remote column pairs based on
    #    join condition and remote_side arguments.  Consider using the remote() annotation to
    #    accurately mark those elements of the join condition that are on the remote side of the relationship."
    #
    # ds = relationship("D", primaryjoin="and_(A.b_id == B.id, B.id == C.b_id, D.id == C.d_id)", viewonly=True)

    def dq(self):
        return sess.query(D).filter(and_(D.id == C.d_id,
                                         C.b_id == B.id,
                                         B.id == A.id,
                                         A.id == self.id))


class B(Base):
    __tablename__ = 'b'

    id = Column(Integer, primary_key=True)


class C(Base):
    __tablename__ = 'c'

    b_id = Column(Integer, ForeignKey('b.id'), primary_key=True)
    d_id = Column(Integer, ForeignKey('d.id'), primary_key=True)


class D(Base):
    __tablename__ = 'd'

    id = Column(Integer, primary_key=True)


e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)

sess = Session(e)

sess.add(D(id=1))
sess.add(D(id=2))
sess.add(B(id=1))
sess.add(C(b_id=1, d_id=1))
sess.add(C(b_id=1, d_id=2))
sess.add(A(id=1, b_id=1))
sess.flush()


a1 = sess.query(A).first()
print a1.dq().all()
#print a1.ds

所以我的问题是“ds”关系的连接语法。当前错误提到添加 remote(),但我还没有让它工作。我也尝试过使用 secondaryjoin 但没有运气。 “dq”中的查询有效,我最终能够通过在我的代码中使用过滤器来解决它 - 我仍然很好奇如何尽可能构建关系?

最佳答案

我不是 sqlalchemy 专家,这是我的理解。

我认为 sqlalchemy 的关系 API 中的主要混淆来源是,参数 primaryjoinsecondarysecondaryjoin 的真正含义是什么。对我来说,它们在这里:

        primaryjoin              secondaryjoin(optional)
source -------------> secondary -------------------------> dest
 (A)                                                        (D)

现在我们需要弄清楚中间部分应该是什么。尽管 sqlalchemy 中的自定义连接出乎意料地复杂,但您确实需要了解您的要求,即原始 SQL。一种可能的解决方案是:

SELECT a.*, d.id 
FROM a JOIN (b JOIN c ON c.b_id = b.id JOIN d ON d.id = c.d_id) /* secondary */
ON a.b_id = b.id /* primaryjoin */ 
WHERE a.id = 1;

在这种情况下,源 a 与“辅助”(b JOIN c .. JOIN d ..) 连接,并且没有辅助连接到 D 因为它已经在 secondary 中了。我们有

ds1 = relationship(
    'D',
    primaryjoin='A.b_id == B.id',
    secondary='join(B, C, B.id == C.b_id).join(D, C.d_id == D.id)',
    viewonly=True,  # almost always a better to add this
)

另一种解决方案可能是:

SELECT a.*, d.id 
FROM a JOIN (b JOIN c ON c.b_id = b.id) /* secondary */
ON a.b_id = b.id /* primaryjoin */
JOIN d ON c.d_id = d.id /* secondaryjoin */
WHERE a.id = 1;

这里a加入secondary(b JOIN c..),secondary加入dc.d_id = d.id,因此:

ds2 = relationship(
    'D',
    primaryjoin='A.b_id == B.id',
    secondary='join(B, C, B.id == C.b_id)',
    secondaryjoin='C.d_id == D.id',
    viewonly=True,  # almost always a better to add this
)

经验法则是将长连接路径放在次要路径中,并将其链接到源和目标。

在性能方面,ds1ds2 导致查询计划比 dq 稍微简单一些,但我认为它们之间没有太大区别他们。规划者总是知道得更多。

这是更新后的代码供您引用。请注意如何使用 sess.query(A).options(joinedload('ds1')) 快速加载关系:

from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()


class A(Base):
    __tablename__ = 'a'

    id = Column(Integer, primary_key=True)
    b_id = Column(Integer, ForeignKey('b.id'))

    ds1 = relationship(
        'D',
        primaryjoin='A.b_id == B.id',
        secondary='join(B, C, B.id == C.b_id).join(D, C.d_id == D.id)',
        viewonly=True,  # almost always a better to add this
    )
    ds2 = relationship(
        'D',
        secondary='join(B, C, B.id == C.b_id)',
        primaryjoin='A.b_id == B.id',
        secondaryjoin='C.d_id == D.id',
        viewonly=True,  # almost always a better to add this
    )

    def dq(self):
        return sess.query(D).filter(and_(D.id == C.d_id,
                                         C.b_id == B.id,
                                         B.id == A.id,
                                         A.id == self.id))


class B(Base):
    __tablename__ = 'b'

    id = Column(Integer, primary_key=True)


class C(Base):
    __tablename__ = 'c'

    b_id = Column(Integer, ForeignKey('b.id'), primary_key=True)
    d_id = Column(Integer, ForeignKey('d.id'), primary_key=True)


class D(Base):
    __tablename__ = 'd'

    id = Column(Integer, primary_key=True)

    def __repr__(self):
        return str(self.id)


e = create_engine("sqlite://", echo=True)
Base.metadata.drop_all(e)
Base.metadata.create_all(e)

sess = Session(e)

sess.add(D(id=1))
sess.add(D(id=2))
sess.add(B(id=1))
sess.add(B(id=2))
sess.flush()
sess.add(C(b_id=1, d_id=1))
sess.add(C(b_id=1, d_id=2))
sess.add(A(id=1, b_id=1))
sess.add(A(id=2, b_id=2))
sess.commit()


def get_ids(ds):
    return {d.id for d in ds}


a1 = sess.query(A).options(joinedload('ds1')).filter_by(id=1).first()
print('{} a1.ds1: {}'.format('=' * 30, a1.ds1))
assert get_ids(a1.dq()) == get_ids(a1.ds1)


a1 = sess.query(A).options(joinedload('ds2')).filter_by(id=1).first()
print('{} a1.ds2: {}'.format('=' * 30, a1.ds2))
assert get_ids(a1.dq()) == get_ids(a1.ds2)

a2 = sess.query(A).options(joinedload('ds2')).filter_by(id=2).first()
print('{} a2.ds1: {}; a2.ds2 {};'.format('=' * 30, a2.ds1, a2.ds2))
assert a2.ds1 == a2.ds2 == []

关于python - SQLAlchemy 中跨越四个表的关系,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/29302176/

相关文章:

python - 在 SQLAlchemy 中按小时分组?

python - Flask 和 SQLAlchemy,应用程序未在实例上注册

python - sqlAlchemy,具体继承,但父级有外键

带有回调函数的 Python 记录器

php - 如何使用 PHP MySQL 在 AJAX 脚本中获取数据库值

python - 使用moviepy包(Python)的问题

mysql - 在循环中使用 MySQL 输出作为 BASH 函数的输入

mysql - 按日期重复查询

python - pymongo 连接池和客户端请求

python - 从一个 Airflow DAG 返回值到另一个