Is there a way to monkeypatch SQLAlchemy in a way that intercepts all INSERT INTO statements generated during a session.flush() without sending them to the database? Instead, I want to capture these SQL statements and their parameters so I can later rewrite them into a single multi-row INSERT per table and execute them manually. The goal is to optimize batch inserts for MySQL using a single SQL network call. My sqlachemy is 1.4
Base = declarative_base()
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String(50), nullable=False)
profiles = relationship(
'Profile', back_populates='user', cascade='all, delete-orphan'
)
class Profile(Base):
__tablename__ = 'profiles'
id = Column(Integer, primary_key=True, autoincrement=True)
user_id = Column(Integer, ForeignKey('users.id'), nullable=False)
bio = Column(String(200), nullable=True)
user = relationship('User', back_populates='profiles')
engine = create_engine(
'mysql+pymysql://user:pass@host/dbname',
connect_args={'client_flag': CLIENT.MULTI_STATEMENTS}
)
Session = sessionmaker(bind=engine, autoflush=False)
captured_sql = []
def before_cursor_execute(conn, cursor, statement, parameters, context, executemany):
captured_sql.append((statement, parameters))
return statement, parameters
def run_dry_flush(session, obj):
# attach listener to this Session's SA Connection
sa_conn = session.connection()
event.listen(sa_conn, 'before_cursor_execute', before_cursor_execute, retval=True)
dbapi_conn = sa_conn.connection
orig_cursor_factory = dbapi_conn.cursor
def cursor_factory(*args, **kwargs):
cur = orig_cursor_factory(*args, **kwargs)
# wrap execute to no-op after firing SA events
orig_execute = cur.execute
def execute_noop(query, params=None):
# call no-op DBAPI, but SA before_cursor_execute already fired
return 0
cur.execute = execute_noop
cur.executemany = lambda query, params=None: 0
return cur
dbapi_conn.cursor = cursor_factory
# perform ORM ops and flush (SQL captured, not sent)
session.add(obj)
session.flush()
dbapi_conn.cursor = orig_cursor_factory
event.remove(sa_conn, 'before_cursor_execute', before_cursor_execute)
session = Session()
alice = User(name='alice')
alice.profiles.extend([
Profile(bio='Bio 1'),
Profile(bio='Bio 2'),
Profile(bio='Bio 3')
])
run_dry_flush(session, alice)
Sqlalchemy doesn't optimize sql statement at all, is there a way to have:
INSERT INTO users (name,id_parent) VALUES (bob,1), (alice,2), (stan,3);
instead of multi statements like here:
INSERT INTO users (name,id_parent) VALUES (bob,1);
INSERT INTO users (name,id_parent) VALUES (alice,2);
I was trying monkeypatching, mocking on dummy db session. I would like to omit creating of raw insert statment and use ORM functionally. Maybe there is a way to capture produced sql statment but not sending them to db, and at the end sort it by table and send only one insert into statment intend of multiple?