I was having some issues with my Flask app when some requests ended up using too much memory to send a response with all the data in just one go, reading the flask docs it says i can stream the response and i did the following exercises to compare the memory usage and times between the usual way i handle the request/response and with the streamed way.
The thing is the non-streamed version takes less than 1 second and the streamed version around 19 seconds i was able to find some information in other use cases but nothing explaining the works of this, i think i'm not understanding something to have such a big time difference between both methods.
Thanks!
This is the test code:
from flask import Flask, Response, jsonify, stream_with_context
import time
import json
from memory_profiler import memory_usage
app = Flask(__name__)
BIG_SIZE = 400_000
# --------- NON-STREAMED VERSION ----------
@app.route("/normal")
def normal_response():
start_time = time.time()
mem_before = memory_usage()[0]
# Build everything in memory first
data = [{"id": i, "value": f"Item-{i}"} for i in range(BIG_SIZE)]
mem_after = memory_usage()[0]
elapsed = time.time() - start_time
print(f"[NORMAL] Memory Before: {mem_before:.2f} MB, After: {mem_after:.2f} MB, Elapsed: {elapsed:.2f}s")
return jsonify(data)
# --------- STREAMED VERSION ----------
@app.route("/streamed")
def streamed_response():
start_time = time.time()
mem_before = memory_usage()[0]
def generate():
yield "["
first = True
for i in range(BIG_SIZE):
record = {"id": i, "value": f"Item-{i}"}
if not first:
yield ","
yield json.dumps(record)
first = False
yield "]"
mem_after = memory_usage()[0]
elapsed = time.time() - start_time
print(f"[STREAMED] Memory Before: {mem_before:.2f} MB, After: {mem_after:.2f} MB, Elapsed: {elapsed:.2f}s")
return Response(stream_with_context(generate()), mimetype="application/json")
if __name__ == "__main__":
app.run(debug=True,
host='0.0.0.0',
port=8080)