add "expressions" accessor to column attributes for special GROUP BY situations #5262

ericatkin · 2020-04-17T03:08:16Z

Included below is a test case that demonstrates the problem with inline comments and a suggested solution.
Python 3.8.2
SQLAlchemy 1.3.16
tested with Postgres 12.2 and 9.6.6 (create a db named group_by)

#import sqlparse
import sqlalchemy
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()


class Entity(Base):
    id = Column(Integer, primary_key=True)
    polymorphic_type = Column(Text, nullable=False)
    name = Column(Text, nullable=False)
    
    __tablename__ = 'entity'
    __mapper_args__ = {'polymorphic_on': 'polymorphic_type'}


class Employer(Entity):
    id = Column(Integer, ForeignKey('entity.id'), primary_key=True)
    industry = Column(Text, nullable=False)
    
    __tablename__ = 'employer'
    __mapper_args__ = {'polymorphic_identity': __tablename__}


class Employee(Entity):
    """Included to dispell the illusion that the joined-table inheritance of
    Employer isn't needed. Invoice for example would have a relationship here
    for a sales_rep"""
    
    id = Column(Integer, ForeignKey('entity.id'), primary_key=True)
    title = Column(Text, nullable=False)
    employer_id = Column(Integer, ForeignKey('employer.id'), nullable=False)
    employer = relationship(
        'Employer',
        foreign_keys=employer_id,
        backref=backref('employees', order_by='Employee.name'),
    )
    
    __tablename__ = 'employee'
    __mapper_args__ = {'polymorphic_identity': __tablename__}


class Invoice(Base):
    id = Column(Integer, primary_key=True)
    vendor_id = Column(Integer, ForeignKey('employer.id'), nullable=False)
    vendor = relationship('Employer', foreign_keys=vendor_id)
    customer_id = Column(Integer, ForeignKey('employer.id'), nullable=False)
    customer = relationship('Employer', foreign_keys=customer_id)
    
    __tablename__ = 'invoice'


class Item(Base):
    id = Column(Integer, primary_key=True)
    name = Column(Text, nullable=False)
    quantity = Column(Numeric, nullable=False)
    rate = Column(Numeric, nullable=False)
    invoice_id = Column(Integer, ForeignKey('invoice.id'), nullable=False)
    invoice = relationship('Invoice', backref=backref('items', order_by=name))

    __tablename__ = 'item'


engine = create_engine('postgresql:///group_by', echo=True)
Session = sessionmaker(bind=engine)
session = Session()

Base.metadata.create_all(engine)

vendor = Employer(name='XYZ Widgets', industry='Manufacturing')
customer = Employer(name='ABCMart', industry='Retail')
invoice = Invoice(vendor=vendor, customer=customer)
invoice.items = [Item(name=f'Item{i}', quantity=i, rate=i) for i in range(5)]

session.add(invoice)
session.commit()

engine.echo = False

query = (
    session
    .query(Invoice, Employer, func.sum(Item.quantity * Item.rate))
    .join(Invoice.vendor)
    .join(Invoice.items)
    .group_by(Invoice.id, Employer.id)
)
#print(sqlparse.format(str(query), reindent=True))
try:
    print(query.all())
except sqlalchemy.exc.ProgrammingError as e:
    print(e)
    session.rollback()

print('''
The error can be resolved by manually by adding Entity.id, but this is
cumbersome for a deep inheritance chain as each superclass would also need to
be added. It seems like SQLAlchemy could automate this.
''')
query = query.group_by(Entity.id)
#print(sqlparse.format(str(query), reindent=True))
print(query.all())


print('''
The above approach doesn't work for aliased classes for two reasons:
    1 - I don't see any API access to the the aliased Entity.id column other
        than something like `text('entity.id')`
    2 - Even with the text hack in bullet 1, these aliased tables are rendered
        into a subquery join which causes postgres to "forget" the primary key
        status of entity.id so all entity columns have to be added to the group
        by clause. I'm concerned this will become a serious performance
        problem.
''')
vendor_alias = aliased(Employer, name='vendor')
customer_alias = aliased(Employer, name='customer')
query = (
    session
    .query(Invoice, vendor_alias, func.sum(Item.quantity * Item.rate))
    .join(vendor_alias, Invoice.vendor)
    .join(customer_alias, Invoice.customer)
    .join(Invoice.items)
    .group_by(
        Invoice.id,
        text('vendor.entity_id'),
        vendor_alias.id,
        text('customer.entity_id'),
        customer_alias.id,
    )
)
try:
    #print(sqlparse.format(str(query), reindent=True))
    print(query.all())
except sqlalchemy.exc.ProgrammingError as e:
    print(e)
    session.rollback()

query = query.group_by(None).group_by(Invoice.id, vendor_alias, customer_alias)
#print(sqlparse.format(str(query), reindent=True))
print(query.all())
print('''This works, but doesn't seem optimal. My real entity tables have many
more columns''')

print('''
I propose two changes to address these issues:
    1 - (essential) Change aliased class joins to be rendered as inlined tables
        as in the unaliased case but aliasing individually each component table
        as in the example below. This allows primary key columns to be
        recognised by the query planner, but is also much easier to read as
        we're only aliasing the tables (not the columns) as in the select list
    2 - (optional) Provide api access to all ancestor primary keys that works
        with aliases for use in group_by(), ie. joined_table_keys(Employer)
        would return (Employer.id, Entity.id)

This would allow a query like:

vendor_alias = aliased(Employer, name='vendor')
customer_alias = aliased(Employer, name='customer')
query = (
    session
    .query(
        Invoice,
        vendor_alias,
        customer_alias,
        func.sum(Item.quantity * Item.rate),
    )
    .join(vendor_alias, Invoice.vendor)
    .join(customer_alias, Invoice.customer)
    .join(Invoice.items)
    .group_by(
        Invoice.id,
        *joined_table_keys(vendor_alias),
        *joined_table_keys(customer_alias),
    )
)

to be rendered as (works):

SELECT invoice.id AS invoice_id,
       invoice.vendor_id AS invoice_vendor_id,
       invoice.customer_id AS invoice_customer_id,
       vendor_entity.id AS vendor_entity_id,
       vendor_entity.polymorphic_type AS vendor_entity_polymorphic_type,
       vendor_entity.name AS vendor_entity_name,
       vendor_employer.id AS vendor_employer_id,
       vendor_employer.industry AS vendor_employer_industry,
       customer_entity.id AS customer_entity_id,
       customer_entity.polymorphic_type AS customer_entity_polymorphic_type,
       customer_entity.name AS customer_entity_name,
       customer_employer.id AS customer_employer_id,
       customer_employer.industry AS customer_employer_industry,
       sum(item.quantity * item.rate) AS sum_1
FROM invoice
JOIN (
    entity AS vendor_entity JOIN employer AS vendor_employer
    ON vendor_entity.id = vendor_employer.id
) ON vendor_employer.id = invoice.vendor_id
JOIN (
    entity AS customer_entity JOIN employer AS customer_employer
    ON customer_entity.id = customer_employer.id
) ON customer_employer.id = invoice.customer_id
JOIN item ON invoice.id = item.invoice_id
GROUP BY invoice.id,
         vendor_entity.id,
         vendor_employer.id,
         customer_entity.id,
         customer_employer.id
;
''')

The text was updated successfully, but these errors were encountered:

zzzeek · 2020-04-17T15:41:32Z

query = (
session
.query(Invoice, Employer, func.sum(Item.quantity * Item.rate))
.join(Invoice.vendor)
.join(Invoice.items)
.group_by(Invoice.id, Employer.id)
)

total news to me that PG allows GROUP BY primary key with other columns present not in the GROUP BY. but there you go and it's SQL standard.

what you want to do here is this:

group_by(Invoice.id, *Employer.id.property.columns)

we can add some accessor for that or MAYBE get an ORM group_by to figure that out, but we are in the process or removing automatic magical things from ORM queries right now including GROUP BY things, so, not clear if that should not remain explciit because this is an extremely unusual use case.

print('''
I propose two changes to address these issues:
1 - (essential) Change aliased class joins to be rendered as inlined tables
as in the unaliased case but aliasing individually each component table
as in the example below. This allows primary key columns to be
recognised by the query planner, but is also much easier to read as
we're only aliasing the tables (not the columns) as in the select list

Already done, seven years ago:

vendor_alias = aliased(Employer, flat=True)
customer_alias = aliased(Employer, flat=True)

2 - (optional) Provide api access to all ancestor primary keys that works
    with aliases for use in group_by(), ie. joined_table_keys(Employer)
    would return (Employer.id, Entity.id)

Some kind of InstrumentedAttribute accessor is definitely doable here. Here it is for now:


def joined_table_keys(prop):

    insp = inspect(prop.class_)

    orig_prop = insp.mapper.attrs[prop.key]
    if insp.is_aliased_class:
        return [insp._adapt_element(elem) for elem in orig_prop.columns]
    else:
        return orig_prop.columns

this is used as: joined_table_keys(Employee.id). give it the column, explicit is better than implicit. Also the function is missing some ORM-related annotations in the columns, so it may not work for more heavy ORMish things but the accessor can include all of that.

this query proceeds and returns:


vendor_alias = aliased(Employer, flat=True)
customer_alias = aliased(Employer, flat=True)

query = (
    session
    .query(
        Invoice,
        vendor_alias,
        customer_alias,
        func.sum(Item.quantity * Item.rate),
    )
    .join(vendor_alias, Invoice.vendor)
    .join(customer_alias, Invoice.customer)
    .join(Invoice.items)
    .group_by(
        Invoice.id,
        *joined_table_keys(vendor_alias.id),
        *joined_table_keys(customer_alias.id),
    )
)

print(query.all())

is this the right SQL below?

SELECT 
	invoice.id AS invoice_id, invoice.vendor_id AS invoice_vendor_id, 
	invoice.customer_id AS invoice_customer_id, 
	employer_1.id AS employer_1_id, entity_1.id AS entity_1_id, 
	entity_1.polymorphic_type AS entity_1_polymorphic_type, 
	entity_1.name AS entity_1_name, employer_1.industry AS employer_1_industry, 
	employer_2.id AS employer_2_id, entity_2.id AS entity_2_id, 
	entity_2.polymorphic_type AS entity_2_polymorphic_type, 
	entity_2.name AS entity_2_name, 
	employer_2.industry AS employer_2_industry, sum(item.quantity * item.rate) AS sum_1 
FROM
	invoice 
JOIN 
	(entity AS entity_1 
		JOIN employer AS employer_1 ON entity_1.id = employer_1.id
	) 
ON employer_1.id = invoice.vendor_id 
JOIN (
	entity AS entity_2 JOIN employer AS employer_2 ON entity_2.id = employer_2.id
) ON employer_2.id = invoice.customer_id 

JOIN item ON invoice.id = item.invoice_id 

GROUP BY invoice.id, employer_1.id, entity_1.id, employer_2.id, entity_2.id

2020-04-17 11:39:49,248 INFO sqlalchemy.engine.base.Engine {}
2020-04-17 11:39:49,256 DEBUG sqlalchemy.engine.base.Engine Col ('invoice_id', 'invoice_vendor_id', 'invoice_customer_id', 'employer_1_id', 'entity_1_id', 'entity_1_polymorphic_type', 'entity_1_name', 'employer_1_industry', 'employer_2_id', 'entity_2_id', 'entity_2_polymorphic_type', 'entity_2_name', 'employer_2_industry', 'sum_1')
2020-04-17 11:39:49,257 DEBUG sqlalchemy.engine.base.Engine Row (1, 1, 2, 1, 1, 'employer', 'XYZ Widgets', 'Manufacturing', 2, 2, 'employer', 'ABCMart', 'Retail', Decimal('30'))
[(<__main__.Invoice object at 0x7ff4cefe4290>, <__main__.Employer object at 0x7ff4cf0d8390>, <__main__.Employer object at 0x7ff4cf0d8b50>, Decimal('30'))]

zzzeek · 2020-04-17T17:53:50Z

here's a patch, really simple

diff --git a/lib/sqlalchemy/orm/properties.py b/lib/sqlalchemy/orm/properties.py
index 1022f4ef6..0699a0f55 100644
--- a/lib/sqlalchemy/orm/properties.py
+++ b/lib/sqlalchemy/orm/properties.py
@@ -337,7 +337,7 @@ class ColumnProperty(StrategizedProperty):
 
         """
 
-        __slots__ = "__clause_element__", "info"
+        __slots__ = "__clause_element__", "info", "expressions"
 
         def _memoized_method___clause_element__(self):
             if self.adapter:
@@ -360,6 +360,26 @@ class ColumnProperty(StrategizedProperty):
             except AttributeError:
                 return self.prop.info
 
+        def _memoized_attr_expressions(self):
+            if self.adapter:
+                return [
+                    self.adapter(col, self.prop.key)
+                    for col in self.prop.columns
+                ]
+            else:
+                # no adapter, so we aren't aliased
+                # assert self._parententity is self._parentmapper
+                return [
+                    col._annotate(
+                        {
+                            "parententity": self._parententity,
+                            "parentmapper": self._parententity,
+                            "orm_key": self.prop.key,
+                        }
+                    )
+                    for col in self.prop.columns
+                ]
+
         def _fallback_getattr(self, key):
             """proxy attribute access down to the mapped column.

we get some test or two written for that you can have it in 1.3.17.

ericatkin · 2020-04-17T19:08:02Z

we can add some accessor for that or MAYBE get an ORM group_by to figure that out, but we are in the process or removing automatic magical things from ORM queries right now including GROUP BY things, so, not clear if that should not remain explciit because this is an extremely unusual use case.

Totally understand! The accessor is probably what I'd prefer anyway over some magic done by group_by().

Already done, seven years ago:

Excellent!
One question though. Why can't a name be provided to aliased() when using flat=True? ie. aliased(Employer, name='vendor', flat=True) to produce (entity AS vendor_entity JOIN employer AS vendor_employer) vs (entity AS entity_1 JOIN employer AS employer_1). Is this a postgres extension? If so, could the postgres dialect allow the combination where others don't? It is not the end of the world, but the names do help in debugging queries which I presume is the point of naming them.

is this the right SQL below?

That query looks good, subject to my comments about the names above.

ericatkin · 2020-04-17T19:14:29Z

I guess I don't understand how to use the patch. I applied it and tried:

vendor_alias = aliased(Employer, flat=True)
customer_alias = aliased(Employer, flat=True)
query = (
    session
    .query(Invoice, vendor_alias, func.sum(Item.quantity * Item.rate))
    .join(vendor_alias, Invoice.vendor)
    .join(customer_alias, Invoice.customer)
    .join(Invoice.items)
    .group_by(
        Invoice.id,
        *vendor_alias.id.property.columns,
        *customer_alias.id.property.columns,
    )
)

but still got:

sqlalchemy.exc.ProgrammingError: (psycopg2.errors.UndefinedTable) invalid reference to FROM-clause entry for table "employer"
LINE 2: ...invoice.id = item.invoice_id GROUP BY invoice.id, employer.i...
                                                             ^
HINT:  Perhaps you meant to reference the table alias "employer_1".

[SQL: SELECT invoice.id AS invoice_id, invoice.vendor_id AS invoice_vendor_id, invoice.customer_id AS invoice_customer_id, employer_1.id AS employer_1_id, entity_1.id AS entity_1_id, entity_1.polymorphic_type AS entity_1_polymorphic_type, entity_1.name AS entity_1_name, employer_1.industry AS employer_1_industry, sum(item.quantity * item.rate) AS sum_1 
FROM invoice JOIN (entity AS entity_1 JOIN employer AS employer_1 ON entity_1.id = employer_1.id) ON employer_1.id = invoice.vendor_id JOIN (entity AS entity_2 JOIN employer AS employer_2 ON entity_2.id = employer_2.id) ON employer_2.id = invoice.customer_id JOIN item ON invoice.id = item.invoice_id GROUP BY invoice.id, employer.id, entity.id, employer.id, entity.id]
(Background on this error at: http://sqlalche.me/e/f405)

(the aliases were not used in the GROUP BY clause

zzzeek · 2020-04-17T20:00:50Z

Excellent!
One question though. Why can't a name be provided to aliased() when using flat=True? ie. aliased(Employer, name='vendor', flat=True) to produce (entity AS vendor_entity JOIN employer AS vendor_employer) vs (entity AS entity_1 JOIN employer AS employer_1). Is this a postgres extension? If so, could the postgres dialect allow the combination where others don't? It is not the end of the world, but the names do help in debugging queries which I presume is the point of naming them.

that didn't seem to be a very important behavior and it might be difficult to add. it also doesn't match any existing naming convention that SQLAlchemy uses , that is "myname_tablename", and I'm hesitant to add a whole new one for this special case. The "name" refers to the name of the subquery, but in the case of "flat", there is no subquery. I'm not sure if there are complications which could arise from this, seems unlikely but nothing surprises me. PRs with complete tests welcome of course, here's how it looks:

diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py
index 014e6969a..2bf94360a 100644
--- a/lib/sqlalchemy/sql/selectable.py
+++ b/lib/sqlalchemy/sql/selectable.py
@@ -1220,8 +1220,8 @@ class Join(FromClause):
         if flat:
             assert name is None, "Can't send name argument with flat"
             left_a, right_a = (
-                self.left.alias(flat=True),
-                self.right.alias(flat=True),
+                self.left.alias(name="your_name_here", flat=True),
+                self.right.alias(name="your_name_here", flat=True),
             )
             adapter = sqlutil.ClauseAdapter(left_a).chain(
                 sqlutil.ClauseAdapter(right_a)

the behavior is standard SQL and has nothing to do with Postgresql.

I guess I don't understand how to use the patch.

the patch adds a new attribute .expressions, so it works like this:

query = (
    session
    .query(
        Invoice,
        vendor_alias,
        customer_alias,
        func.sum(Item.quantity * Item.rate),
    )
    .join(vendor_alias, Invoice.vendor)
    .join(customer_alias, Invoice.customer)
    .join(Invoice.items)
    .group_by(
        Invoice.id,
        *vendor_alias.id.expressions,
        *customer_alias.id.expressions,
    )
)

ericatkin · 2020-04-17T20:04:52Z

Traceback (most recent call last):
  File "group_by_dynamics3.py", line 130, in <module>
    *vendor_alias.id.expressions,
  File ".../venv/lib/python3.8/site-packages/sqlalchemy/orm/attributes.py", line 228, in __getattr__
    return getattr(self.comparator, key)
  File ".../venv/lib/python3.8/site-packages/sqlalchemy/util/langhelpers.py", line 957, in __getattr__
    value = getattr(self, "_memoized_attr_%s" % key)()
  File ".../venv/lib/python3.8/site-packages/sqlalchemy/orm/properties.py", line 335, in _memoized_attr_expressions
    return [
  File ".../venv/lib/python3.8/site-packages/sqlalchemy/orm/properties.py", line 336, in <listcomp>
    self.adapter(col, self.prop.key)
TypeError: _adapt_element() takes 2 positional arguments but 3 were given

zzzeek · 2020-04-17T20:09:34Z

patch is against master. here is 1.3, just remove the "self.prop.key" part:

diff --git a/lib/sqlalchemy/orm/properties.py b/lib/sqlalchemy/orm/properties.py
index 1022f4ef6..0699a0f55 100644
--- a/lib/sqlalchemy/orm/properties.py
+++ b/lib/sqlalchemy/orm/properties.py
@@ -337,7 +337,7 @@ class ColumnProperty(StrategizedProperty):

         """

-        __slots__ = "__clause_element__", "info"
+        __slots__ = "__clause_element__", "info", "expressions"

         def _memoized_method___clause_element__(self):
             if self.adapter:
@@ -360,6 +360,26 @@ class ColumnProperty(StrategizedProperty):
             except AttributeError:
                 return self.prop.info

+        def _memoized_attr_expressions(self):
+            if self.adapter:
+                return [
+                    self.adapter(col)
+                    for col in self.prop.columns
+                ]
+            else:
+                # no adapter, so we aren't aliased
+                # assert self._parententity is self._parentmapper
+                return [
+                    col._annotate(
+                        {
+                            "parententity": self._parententity,
+                            "parentmapper": self._parententity,
+                            "orm_key": self.prop.key,
+                        }
+                    )
+                    for col in self.prop.columns
+                ]
+
         def _fallback_getattr(self, key):
             """proxy attribute access down to the mapped column.

ericatkin · 2020-04-17T20:49:27Z

patch is against master. here is 1.3, just remove the "self.prop.key" part:

That worked perfectly! thanks

that didn't seem to be a very important behavior and it might be difficult to add. it also doesn't match any existing naming convention that SQLAlchemy uses , that is "myname_tablename", and I'm hesitant to add a whole new one for this special case. The "name" refers to the name of the subquery, but in the case of "flat", there is no subquery. I'm not sure if there are complications which could arise from this, seems unlikely but nothing surprises me. PRs with complete tests welcome of course, here's how it looks:

Maybe I'll play with this. Re: the nameing convention. I wouldn't mind something else if you have an idea, but what I proposed actually matches the myname_tablename_columnname aliases used in the SELECT list, ie:

>>> employer_alias = aliased(Employer, name='customer')
>>> print(sqlparse.format(str(DBSession.query(employer_alias)), reindent=True))
SELECT customer.employer_id AS customer_employer_id,
       customer.entity_id AS customer_entity_id,
       customer.entity_polymorphic_type AS customer_entity_polymorphic_type,
       customer.entity_name AS customer_entity_name,
       customer.employer_industry AS customer_employer_industry
FROM
  (SELECT entity.id AS entity_id,
          entity.polymorphic_type AS entity_polymorphic_type,
          entity.name AS entity_name,
          employer.id AS employer_id,
          employer.industry AS employer_industry
   FROM entity
   JOIN employer ON entity.id = employer.id) AS customer
>>>

A quick implementation against rel_1_3_16 yields this:

diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py
index 0e82b830f..08b7d5da3 100644
--- a/lib/sqlalchemy/sql/selectable.py
+++ b/lib/sqlalchemy/sql/selectable.py
@@ -1183,10 +1183,17 @@ class Join(FromClause):
 
         """
         if flat:
-            assert name is None, "Can't send name argument with flat"
+            if isinstance(self.left, Join):
+                left_name = name  # will recurse
+            else:
+                left_name = name and '_'.join((name, self.left.name))
+            if isinstance(self.right, Join):
+                right_name = name  # will recurse
+            else:
+                right_name = name and '_'.join((name, self.right.name))
             left_a, right_a = (
-                self.left.alias(flat=True),
-                self.right.alias(flat=True),
+                self.left.alias(name=left_name, flat=True),
+                self.right.alias(name=right_name, flat=True),
             )
             adapter = sqlutil.ClauseAdapter(left_a).chain(
                 sqlutil.ClauseAdapter(right_a)

>>> employer_alias = aliased(Employer, name='customer', flat=True)
>>> print(sqlparse.format(str(session.query(employer_alias)), reindent=True))
SELECT customer_employer.id AS customer_employer_id,
       customer_entity.id AS customer_entity_id,
       customer_entity.polymorphic_type AS customer_entity_polymorphic_type,
       customer_entity.name AS customer_entity_name,
       customer_employer.industry AS customer_employer_industry
FROM entity AS customer_entity
JOIN employer AS customer_employer ON customer_entity.id = customer_employer.id

Pending your input on naming, I'll see what tests may be involved and work up a PR.

ericatkin · 2020-04-17T20:56:49Z

Perhaps this should be a separate issue, but is there some sort of global configuration for the flat arg to aliased() and siblings? I get the backward compatibility motivation, but requiring flat=True everywhere is unfortunate for the API, especially since there are performance implications. Maybe allow (and make default) flat=None which informs the engine to decide (presumably the postgres engine and most others would choose True), while still allowing an override by specifying True or False on the aliased() call. Though, if someone really wanted this, I imagine they'd want it global as well, ie. a flat arg to the engine constructor that could override the engine's preference (and warn if it's not allowed, ie. sqlite < 3.7.16). I guess I don't see the need for this to be controlled per-alias in most cases, but I imagine my perspective is naive :)

ericatkin · 2020-04-17T22:21:07Z

FYI, you probably know this, but the flat style is significantly faster on postgres (probably because the subquery style imposes optimization fences on the query planner). Here are the results (in seconds) from a particularly nasty query (procedurally generated) with and without flat joins. This seems like good motivation to provide an easier way (perhaps default without breaking compatibility as described above) to use the flat style globally without adding flat=True across your whole code base.

flat=False
0.3603609240017249
30.251361631999316 ordered

flat=True
0.19413214000087464
0.7021742640063167 ordered

zzzeek · 2020-04-18T00:06:41Z

Perhaps this should be a separate issue, but is there some sort of global configuration for the flat arg to aliased() and siblings?

no, that's definitely not how we would do things here. doing "global" API changes like that is a recipe for disaster.

since this is Python, do the "global" thing on your end:


from sqlalchemy import aliased as _aliased

def aliased(*arg, **kw):
    kw['flat'] = True
    return _aliased(*arg, **kw)

Maybe allow (and make default) flat=None which informs the engine to decide (presumably the postgres engine and most others would choose True), while still allowing an override by specifying True or False on the aliased() call.

when the subquery thing came out, SQLite couldn't nest JOINs together. now it can and it's likely every database can. so we'd want to look towards changing the default at some point.

zzzeek · 2020-04-18T00:09:12Z

FYI, you probably know this, but the flat style is significantly faster on postgres (probably because the subquery style imposes optimization fences on the query planner). Here are the results (in seconds) from a particularly nasty query (procedurally generated) with and without flat joins. This seems like good motivation to provide an easier way (perhaps default without breaking compatibility as described above) to use the flat style globally without adding flat=True across your whole code base.

flat=False
0.3603609240017249
30.251361631999316 ordered

flat=True
0.19413214000087464
0.7021742640063167 ordered

I think the bigger perspective is that joined table inheritance is already a special case that generates pretty elaborate queries regardless that are usually going to need tuning. we should change this default however this whole use case is fairly niche so it has not been a priority.

ericatkin · 2020-04-18T00:26:59Z

Understood. Thanks for the excellent support! I'll wrap aliased for now and look forward to the default changing in the future. Are you hoping that I'll write some tests for the expressions accessor and should I closed this issues or do you do that?

…

On Fri, Apr 17, 2020, 6:09 PM mike bayer ***@***.***> wrote: FYI, you probably know this, but the flat style is significantly faster on postgres (probably because the subquery style imposes optimization fences on the query planner). Here are the results (in seconds) from a particularly nasty query (procedurally generated) with and without flat joins. This seems like good motivation to provide an easier way (perhaps default without breaking compatibility as described above) to use the flat style globally without adding flat=True across your whole code base. flat=False 0.3603609240017249 30.251361631999316 ordered flat=True 0.19413214000087464 0.7021742640063167 ordered I think the bigger perspective is that joined table inheritance is already a special case that generates pretty elaborate queries regardless that are usually going to need tuning. we should change this default however this whole use case is fairly niche so it has not been a priority. — You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub <#5262 (comment)>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/AAVJ6ECGEFFJOCEZ2BUSGJTRNDVTLANCNFSM4MKM665Q> .

zzzeek · 2020-04-18T02:25:16Z

oh, the patch has to be turned into something yes, we have folks here who might pick it up, the ORM tests are a little crufty to work with. if you want to propose a PR feel free, I'd look into test/orm/inheritance/test_basic.py. just something very quick to see that an .id attrifbute with two columns comes out as the two.

sqla-tester · 2020-04-20T16:28:32Z

Mike Bayer has proposed a fix for this issue in the master branch:

Add ColumnProperty.Comparator.expressions https://gerrit.sqlalchemy.org/c/sqlalchemy/sqlalchemy/+/1906

sqla-tester · 2020-04-20T16:30:07Z

Mike Bayer has proposed a fix for this issue in the rel_1_3 branch:

Add ColumnProperty.Comparator.expressions https://gerrit.sqlalchemy.org/c/sqlalchemy/sqlalchemy/+/1907

Added an accessor :attr:`.ColumnProperty.Comparator.expressions` which provides access to the group of columns mapped under a multi-column :class:`.ColumnProperty` attribute. Fixes: #5262 Change-Id: I44cf53ff0e6cf76a0c90eee4638ca96da3df8088 (cherry picked from commit d9d724267afe867984a23abaa87f62e15786005f)

zzzeek · 2024-07-03T20:27:34Z

why did this close...hm.. welp reopening

zzzeek · 2024-07-03T20:28:12Z

oh. this was closed. so confused, the PR is for a new issue that is as yet undefined

zzzeek added the question label Apr 17, 2020

CaselIT added the great mcve label Apr 17, 2020

zzzeek added use case orm and removed question labels Apr 17, 2020

zzzeek modified the milestones: 1.3.x., 1.3.x Apr 17, 2020

zzzeek changed the title ~~subquery render of aliased joined-table class breaks group by on postgres~~ add "expressions" accessor to column attributes for special GROUP BY situations Apr 17, 2020

sqlalchemy-bot closed this as completed in 6abbda3 Apr 22, 2020

ericatkin mentioned this issue Dec 19, 2023

Allow alias with flat and name (Fixes: #5262) zzzeek/sqlalchemy#513

Closed

3 tasks

ericatkin mentioned this issue Jun 25, 2024

Allow flat for join with name (Fixes: #5262) #11531

Closed

3 tasks

zzzeek reopened this Jul 3, 2024

zzzeek closed this as completed Jul 3, 2024

zzzeek mentioned this issue Jul 3, 2024

allow "name" with flat=True #11575

Closed

ericatkin added a commit to ericatkin/sqlalchemy that referenced this issue Sep 18, 2024

Allow flat for join with name (Fixes: sqlalchemy#5262)

5994b61

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

GitHub Sponsors

add "expressions" accessor to column attributes for special GROUP BY situations #5262

add "expressions" accessor to column attributes for special GROUP BY situations #5262

ericatkin commented Apr 17, 2020 •

edited

Loading

zzzeek commented Apr 17, 2020 •

edited

Loading

zzzeek commented Apr 17, 2020

ericatkin commented Apr 17, 2020

ericatkin commented Apr 17, 2020

zzzeek commented Apr 17, 2020

ericatkin commented Apr 17, 2020

zzzeek commented Apr 17, 2020

ericatkin commented Apr 17, 2020 •

edited

Loading

ericatkin commented Apr 17, 2020

ericatkin commented Apr 17, 2020

zzzeek commented Apr 18, 2020

zzzeek commented Apr 18, 2020

ericatkin commented Apr 18, 2020 via email

zzzeek commented Apr 18, 2020

sqla-tester commented Apr 20, 2020

sqla-tester commented Apr 20, 2020

zzzeek commented Jul 3, 2024

zzzeek commented Jul 3, 2024

add "expressions" accessor to column attributes for special GROUP BY situations #5262

add "expressions" accessor to column attributes for special GROUP BY situations #5262

Comments

ericatkin commented Apr 17, 2020 • edited Loading

zzzeek commented Apr 17, 2020 • edited Loading

zzzeek commented Apr 17, 2020

ericatkin commented Apr 17, 2020

ericatkin commented Apr 17, 2020

zzzeek commented Apr 17, 2020

ericatkin commented Apr 17, 2020

zzzeek commented Apr 17, 2020

ericatkin commented Apr 17, 2020 • edited Loading

ericatkin commented Apr 17, 2020

ericatkin commented Apr 17, 2020

zzzeek commented Apr 18, 2020

zzzeek commented Apr 18, 2020

ericatkin commented Apr 18, 2020 via email

zzzeek commented Apr 18, 2020

sqla-tester commented Apr 20, 2020

sqla-tester commented Apr 20, 2020

zzzeek commented Jul 3, 2024

zzzeek commented Jul 3, 2024

ericatkin commented Apr 17, 2020 •

edited

Loading

zzzeek commented Apr 17, 2020 •

edited

Loading

ericatkin commented Apr 17, 2020 •

edited

Loading