python - 查询多个 between 子句

标签 python postgresql python-3.x sqlalchemy

我的 ORM 如下所示:

from sqlalchemy import Column, Integer, String, TIMESTAMP, ForeignKey
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()

class Data(Base):
    __tablename__ = 'data'
    id = Column(Integer, primary_key=True)
    value = Column(String(8), nullable=False)
    timestamp = Column(TIMESTAMP, nullable=False)
    object = Column(Integer, ForeignKey('object.id'))

class Object(Base):
    __tablename__ = 'object'
    id = Column(Integer, primary_key=True)
    version = Column(String(8), nullable=False)
    setting = Column(String(8), nullable=False)
    history = relationship('ObjectHistory', backref='history')

class ObjectHistory(Base):
    __tablename__ = 'object_history'
    id = Column(Integer, primary_key=True)
    version = Column(String(8), nullable=False)
    setting = Column(String(8), nullable=False)
    start = Column(TIMESTAMP, nullable=False)
    end = Column(TIMESTAMP)
    object = Column(Integer, ForeignKey('object.id'))

我的数据如下所示:

from sqlalchemy import create_engine
from sqlalchemy.orm.session import sessionmaker
import datetime

engine = create_engine('postgresql://username:password@localhost/')
Session = sessionmaker(bind=engine)
session = Session()

Base.metadata.create_all(engine)

obj = Object(version='0001', setting='some')

# populate database
data = [
    obj,
    Data(value='a', timestamp=datetime.datetime(2017,6,21,12,0,0), object=obj.id),
    Data(value='b', timestamp=datetime.datetime(2017,6,21,13,0,0), object=obj.id),
    Data(value='c', timestamp=datetime.datetime(2017,6,21,14,0,0), object=obj.id),
    Data(value='d', timestamp=datetime.datetime(2017,6,21,15,0,0), object=obj.id),
    ObjectHistory(version='0001', setting='any', start=datetime.datetime(2017,6,21,11,30,0), end=datetime.datetime(2017,6,21,12,30,0)),
    ObjectHistory(version='0002', setting='some', start=datetime.datetime(2017,6,21,12,30,0), end=datetime.datetime(2017,6,21,13,30,0)),
    ObjectHistory(version='0001', setting='some', start=datetime.datetime(2017,6,21,13,30,0), end=None),
]

session.add_all(data)
session.commit()

我想查询所有 Data 以了解 Object 何时具有特定版本。如您所见,同一版本在历史记录中可以出现多次,我想拥有使用特定版本期间的所有数据条目。

我是这样想的:

version = '0001'

# get the start and end timestamps during which object had this version
between_these = session.query(ObjectHistory.start, ObjectHistory.end) \
    .filter(ObjectHistory.version == version)

# and then somehow query Data between these timestamps
# so that data contains the Data rows with values 'a', 'c', and 'd'
# this won't work
data = session.query(Data) \
    .filter(Data.timestamp.between(between_these.start, between_these.end)).all()

但是我认为这种方法行不通,因为可能有多个开始和结束时间戳。我想我需要使用 or_ ( http://docs.sqlalchemy.org/en/latest/core/sqlelement.html#sqlalchemy.sql.expression.or_ ),但我似乎无法弄清楚如何在这种情况下应用它。这完全可能吗?如果可能,怎么做?

编辑:因此所需的输出是 Data 行,在此期间 Data.object 的版本为“0001”,在示例中这些是 Data 行,其中 Data.value 为“a”、“c”和“d”。

最佳答案

这里的问题是 between_these 对象有 Query 类型,即它还没有被执行并且没有属性 start/结束

我们可以做到以下几点:

  • between_these 对象创建子查询,然后在过滤器中使用它,
  • 使用 PostgreSQLCOALESCE具有 NULL 值的 ObjectHistory.end 函数

可以这样

between_these = (session.query(ObjectHistory.start,
                               ObjectHistory.end)
                 .filter(ObjectHistory.version == '0001')
                 .subquery('between_these'))

data = (session.query(Data)
        .filter(Data.timestamp.between(between_these.c.start,
                                       func.coalesce(between_these.c.end,
                                                     datetime.max)))
        .all())

这会起作用,但我们永远不会知道 object_history 记录和过滤后的 data 记录之间的关系。

如果您希望每个过滤后的 Data 对象都具有过滤所基于的 ObjectHistory 对象,那么我们可以像这样查询两者

data = (session.query(Data, ObjectHistory)
        .filter(ObjectHistory.version == '0001')
        .filter(Data.timestamp.between(ObjectHistory.start,
                                       func.coalesce(ObjectHistory.end,
                                                     datetime.max))))
        .all())

(这里不需要between_these对象)

或者如果我们只想知道时间间隔

data = (session.query(Data, ObjectHistory.start, ObjectHistory.end)
        .filter(ObjectHistory.version == '0001')
        .filter(Data.timestamp.between(ObjectHistory.start,
                                       func.coalesce(ObjectHistory.end,
                                                     datetime.max))))
        .all())

测试

首先为模型导入和添加初始化器

from datetime import datetime

from sqlalchemy import Column, Integer, String, TIMESTAMP, create_engine, func
from sqlalchemy.engine.url import make_url
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

Base = declarative_base()


class Data(Base):
    __tablename__ = 'data'
    id = Column(Integer, primary_key=True)
    value = Column(String(8), nullable=False)
    timestamp = Column(TIMESTAMP, nullable=False)

    def __init__(self, value, timestamp):
        self.value = value
        self.timestamp = timestamp


class Object(Base):
    __tablename__ = 'object'
    id = Column(Integer, primary_key=True)
    version = Column(String(8), nullable=False)
    setting = Column(String(8), nullable=False)

    def __init__(self, version, setting):
        self.version = version
        self.setting = setting


class ObjectHistory(Base):
    __tablename__ = 'object_history'
    id = Column(Integer, primary_key=True)
    version = Column(String(8), nullable=False)
    setting = Column(String(8), nullable=False)
    start = Column(TIMESTAMP, nullable=False)
    end = Column(TIMESTAMP)

    def __init__(self, version, setting, start, end):
        self.version = version
        self.setting = setting
        self.start = start
        self.end = end

然后初始化数据库和创建 session

db_uri = make_url('postgresql://username:password@host:5432/database')
engine = create_engine(db_uri)
Base.metadata.create_all(bind=engine)
session_factory = sessionmaker(bind=engine)
session = session_factory()

然后我们将测试数据添加到数据库

session.add_all([
    # first `Data` object
    Data(value='a',
         timestamp=datetime(2017, 6, 21, 12, 0, 0)),
    # second `Data` object
    Data(value='b',
         timestamp=datetime(2017, 6, 21, 13, 0, 0)),
    # third `Data` object
    Data(value='c',
         timestamp=datetime(2017, 6, 21, 14, 0, 0)),
    # fourth `Data` object
    Data(value='d',
         timestamp=datetime(2017, 6, 21, 15, 0, 0)),
    Object(version='0001',
           setting='some'),
    ObjectHistory(version='0001',
                  setting='any',
                  start=datetime(2017, 6, 21, 11, 30, 0),
                  end=datetime(2017, 6, 21, 12, 30, 0)),
    ObjectHistory(version='0002',
                  setting='some',
                  start=datetime(2017, 6, 21, 12, 30, 0),
                  end=datetime(2017, 6, 21, 13, 30, 0)),
    ObjectHistory(version='0001',
                  setting='some',
                  start=datetime(2017, 6, 21, 13, 30, 0),
                  end=None)])
session.commit()

然后生成查询并获取它

between_these = (session.query(ObjectHistory.start,
                               ObjectHistory.end)
                 .filter(ObjectHistory.version == '0001')
                 .subquery('between_these'))

data = (session.query(Data)
        .filter(Data.timestamp.between(between_these.c.start,
                                       func.coalesce(between_these.c.end,
                                                     datetime.max)))
        .all())

最后——断言

assert len(data) == 3
assert all(datum.value in {'a', 'c', 'd'}
           for datum in data)

因此我们可以看到 data 对象由第一个、第三个和第四个 Data 对象组成。

关于python - 查询多个 between 子句,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/44680404/

相关文章:

python - 生成一个字符串后跟多个 None 值的列表

python - pandas 中的 dateoffset 和 bdate_range 丢失时间信息

python - 从字典列表创建 Dataframe,不使用 pd.concat()

sql - 在 DELETE 和更新选定语句后创建触发器

python - 将 pandas 列定位或移动到特定的列索引

regex - 正则表达式中的字节错误

Python:读取没有默认分隔符且包含数百万条记录的文件并将其放入数据框( Pandas )时的效率?

python - Flask-SQLAlchemy Postgres 错误 - 无法连接到服务器 : Connection refused (0x0000274D/10061)

sql-server - PostgreSQL 中外部应用的等效语法是什么

rest - 在 Falcon REST 中解码 JSON 文件上传