Table schema:
CREATE SEQUENCE fsa_online_id_seq INCREMENT 1 MINVALUE 1 MAXVALUE 9223372036854775807 START 1 CACHE 1;
CREATE TABLE "public"."fsa_online" (
"id" integer DEFAULT nextval('fsa_online_id_seq') NOT NULL,
"fsa_uuid" uuid NOT NULL,
"use_version" integer,
"last_original_version" integer,
"created_at" timestamp(0),
"updated_at" timestamp(0),
"deleted_at" timestamp(0),
"is_drug" boolean DEFAULT true NOT NULL,
CONSTRAINT "fsa_online_fsa_uuid_unique" UNIQUE ("fsa_uuid"),
CONSTRAINT "fsa_online_pkey" PRIMARY KEY ("id")
) WITH (oids = false);
CREATE INDEX "fsa_online_is_drug_index" ON "public"."fsa_online" USING btree ("is_drug");
CREATE INDEX "fsa_online_last_original_version_index" ON "public"."fsa_online" USING btree ("last_original_version");
CREATE INDEX "fsa_online_use_version_index" ON "public"."fsa_online" USING btree ("use_version");
CREATE SEQUENCE fsa_online_data_id_seq INCREMENT 1 MINVALUE 1 MAXVALUE 9223372036854775807 START 1 CACHE 1;
CREATE TABLE "public"."fsa_online_data" (
"id" integer DEFAULT nextval('fsa_online_data_id_seq') NOT NULL,
"fsa_id" integer NOT NULL,
"reason" text,
"is_original" boolean NOT NULL,
"is_published" boolean DEFAULT true NOT NULL,
"created_by_id" integer,
"created_at" timestamp(0),
"unparsed_data" jsonb,
"raw_id" integer NOT NULL,
"status_id" integer,
"type_id" integer,
"reg_num" character varying(255),
"start_date" date,
"end_date" date,
"docs" jsonb,
"docs_add" text,
"scheme" text,
"free_form" jsonb,
"fio_expert" text,
"lab_info" jsonb,
"change_info" jsonb,
"applicant_info" jsonb,
"manufacturer_info" jsonb,
"product_info" jsonb,
"standard_info" jsonb,
"manufacturer_tbl_info" jsonb,
"product_tbl_info" jsonb,
"certification_info" jsonb,
"trade_name" text,
"cert_num" character varying(255),
"man_form_txt" text,
"manufacturer_name" text,
"man_country_id" integer,
"serial_num" character varying(255),
"serial_size" integer,
"barcode" character varying(255),
"barcode_type_id" integer,
"is_cert" boolean NOT NULL,
"original_data" jsonb,
CONSTRAINT "fsa_online_data_pkey" PRIMARY KEY ("id"),
CONSTRAINT "fsa_online_data_fsa_id_foreign" FOREIGN KEY (fsa_id) REFERENCES fsa_online(id) ON UPDATE CASCADE ON DELETE RESTRICT NOT DEFERRABLE
) WITH (oids = false);
CREATE INDEX "fsa_online_data_barcode_index" ON "public"."fsa_online_data" USING btree ("barcode");
CREATE INDEX "fsa_online_data_barcode_type_id_index" ON "public"."fsa_online_data" USING btree ("barcode_type_id");
CREATE INDEX "fsa_online_data_cert_num_index" ON "public"."fsa_online_data" USING btree ("cert_num");
CREATE INDEX "fsa_online_data_created_by_id_index" ON "public"."fsa_online_data" USING btree ("created_by_id");
CREATE INDEX "fsa_online_data_end_date_index" ON "public"."fsa_online_data" USING btree ("end_date");
CREATE INDEX "fsa_online_data_fsa_id_index" ON "public"."fsa_online_data" USING btree ("fsa_id");
CREATE INDEX "fsa_online_data_is_cert_index" ON "public"."fsa_online_data" USING btree ("is_cert");
CREATE INDEX "fsa_online_data_man_country_id_index" ON "public"."fsa_online_data" USING btree ("man_country_id");
CREATE INDEX "fsa_online_data_raw_id_index" ON "public"."fsa_online_data" USING btree ("raw_id");
CREATE INDEX "fsa_online_data_reg_num_index" ON "public"."fsa_online_data" USING btree ("reg_num");
CREATE INDEX "fsa_online_data_start_date_index" ON "public"."fsa_online_data" USING btree ("start_date");
CREATE INDEX "fsa_online_data_status_id_index" ON "public"."fsa_online_data" USING btree ("status_id");
CREATE INDEX "fsa_online_data_trade_name_index" ON "public"."fsa_online_data" USING btree ("trade_name");
CREATE INDEX "fsa_online_data_type_id_index" ON "public"."fsa_online_data" USING btree ("type_id");
ALTER TABLE "fsa_online" ADD CONSTRAINT "fsa_online_last_original_version_foreign" FOREIGN KEY (last_original_version) REFERENCES fsa_online_data(id) ON UPDATE CASCADE ON DELETE RESTRICT NOT DEFERRABLE;
ALTER TABLE "fsa_online" ADD CONSTRAINT "fsa_online_use_version_foreign" FOREIGN KEY (use_version) REFERENCES fsa_online_data(id) ON UPDATE CASCADE ON DELETE RESTRICT NOT DEFERRABLE;
The "fsa_online" table contains about 800 000 records (possible grow up to 3 million records)
The "fsa_online_data" table contains about 3.5 million records (possible grow up to 15-25 million records)
I have the following query:
select
"fsa_online"."id",
"fsa_online"."fsa_uuid",
"fsa_online"."use_version",
"fsa_online"."last_original_version",
"fsa_online"."is_drug"
from
"fsa_online"
inner join "fsa_online_data" as "data" on "data"."id" = CASE WHEN fsa_online.use_version IS NULL THEN fsa_online.last_original_version ELSE fsa_online.use_version END
where
"unparsed_data" is not null
and "fsa_online"."deleted_at" is null
limit 10 offset 0
It took about 150 ms time.
But when I need to sort data, for example by "cert_num" column of the "fsa_online_data" table, it took an extremely long time (about 63000 ms).
select
"fsa_online"."id",
"fsa_online"."fsa_uuid",
"fsa_online"."use_version",
"fsa_online"."last_original_version",
"fsa_online"."is_drug"
from
"fsa_online"
inner join "fsa_online_data" as "data" on "data"."id" = CASE WHEN fsa_online.use_version IS NULL THEN fsa_online.last_original_version ELSE fsa_online.use_version END
where
"unparsed_data" is not null
and "fsa_online"."deleted_at" is null
ORDER BY "data"."cert_num"
limit 10 offset 0
I did a little trick by adding COLLATE "C" (but that is not correct, I really need to compare strings as Unicode strings) to ORDER BY statement and query took about 27500 ms.
Here is a query plan:
"Limit (cost=1037857.75..1037857.78 rows=10 width=545)"
" Output: fsa_online.id, fsa_online.fsa_uuid, fsa_online.use_version, fsa_online.last_original_version, fsa_online.is_drug, ((data.cert_num)::character varying(255))"
" -> Sort (cost=1037857.75..1039085.55 rows=491120 width=545)"
" Output: fsa_online.id, fsa_online.fsa_uuid, fsa_online.use_version, fsa_online.last_original_version, fsa_online.is_drug, ((data.cert_num)::character varying(255))"
" Sort Key: ((data.cert_num)::character varying(255)) COLLATE "C""
" -> Hash Join (cost=972267.98..1027244.83 rows=491120 width=545)"
" Output: fsa_online.id, fsa_online.fsa_uuid, fsa_online.use_version, fsa_online.last_original_version, fsa_online.is_drug, data.cert_num"
" Inner Unique: true"
" Hash Cond: (CASE WHEN (fsa_online.use_version IS NULL) THEN fsa_online.last_original_version ELSE fsa_online.use_version END = data.id)"
" -> Seq Scan on public.fsa_online (cost=0.00..19143.06 rows=899706 width=29)"
" Output: fsa_online.id, fsa_online.fsa_uuid, fsa_online.use_version, fsa_online.last_original_version, fsa_online.is_drug"
" Filter: (fsa_online.deleted_at IS NULL)"
" -> Hash (cost=934126.84..934126.84 rows=2077451 width=17)"
" Output: data.cert_num, data.id"
" -> Seq Scan on public.fsa_online_data data (cost=0.00..934126.84 rows=2077451 width=17)"
" Output: data.cert_num, data.id"
" Filter: (data.unparsed_data IS NOT NULL)"
Also, I need to perform queries using LIKE statements on different text columns (full-text search is not good for this case, because text columns contain an arbitrary data, like serial numbers).
select count(*)
from "fsa_online"
inner join "fsa_online_data" as "data" on "data"."id" =
CASE WHEN fsa_online.use_version IS NULL THEN fsa_online.last_original_version ELSE fsa_online.use_version END
The result is about 900 000 records. The "fsa_online_data" table size is 7GB.
Hardware list:
- Samsung SSD EVO 850
- Intel Core i7 6700k
- 16 GB DDR4 RAM
Tested on PostgreSQL versions: 9.6.9 and 10.5
How can I improve the performance of ORDER BY operations by text columns? I want to reduce query execution time to 200-300ms.
ORDER BYbut you don't show the clause you actually used to produce the explain output. You talk aboutLIKE, but you don't show that in the query either. On which columns? Which patterns are possible? This overview of pattern matching options should be instrumental: dba.stackexchange.com/a/10696/3684