PostgreSQL Source Code git master
dependencies.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * dependencies.c
4 * POSTGRES functional dependencies
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/statistics/dependencies.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "access/htup_details.h"
19#include "nodes/nodeFuncs.h"
20#include "optimizer/clauses.h"
21#include "optimizer/optimizer.h"
22#include "parser/parsetree.h"
24#include "utils/fmgroids.h"
25#include "utils/lsyscache.h"
26#include "utils/memutils.h"
27#include "utils/selfuncs.h"
28#include "utils/syscache.h"
29#include "utils/typcache.h"
30
31/* size of the struct header fields (magic, type, ndeps) */
32#define SizeOfHeader (3 * sizeof(uint32))
33
34/* size of a serialized dependency (degree, natts, atts) */
35#define SizeOfItem(natts) \
36 (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
37
38/* minimal size of a dependency (with two attributes) */
39#define MinSizeOfItem SizeOfItem(2)
40
41/* minimal size of dependencies, when all deps are minimal */
42#define MinSizeOfItems(ndeps) \
43 (SizeOfHeader + (ndeps) * MinSizeOfItem)
44
45/*
46 * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
47 * k-permutations of n elements, except that the order does not matter for the
48 * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
49 */
51{
52 int k; /* size of the dependency */
53 int n; /* number of possible attributes */
54 int current; /* next dependency to return (index) */
55 AttrNumber ndependencies; /* number of dependencies generated */
56 AttrNumber *dependencies; /* array of pre-generated dependencies */
58
60
62 int index, AttrNumber start, AttrNumber *current);
67static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
68static bool dependency_is_fully_matched(MVDependency *dependency,
69 Bitmapset *attnums);
70static bool dependency_is_compatible_clause(Node *clause, Index relid,
72static bool dependency_is_compatible_expression(Node *clause, Index relid,
73 List *statlist, Node **expr);
75 int ndependencies, Bitmapset *attnums);
77 int varRelid, JoinType jointype,
78 SpecialJoinInfo *sjinfo,
79 MVDependency **dependencies,
80 int ndependencies,
81 AttrNumber *list_attnums,
82 Bitmapset **estimatedclauses);
83
84static void
86 AttrNumber start, AttrNumber *current)
87{
88 /*
89 * The generator handles the first (k-1) elements differently from the
90 * last element.
91 */
92 if (index < (state->k - 1))
93 {
95
96 /*
97 * The first (k-1) values have to be in ascending order, which we
98 * generate recursively.
99 */
100
101 for (i = start; i < state->n; i++)
102 {
103 current[index] = i;
104 generate_dependencies_recurse(state, (index + 1), (i + 1), current);
105 }
106 }
107 else
108 {
109 int i;
110
111 /*
112 * the last element is the implied value, which does not respect the
113 * ascending order. We just need to check that the value is not in the
114 * first (k-1) elements.
115 */
116
117 for (i = 0; i < state->n; i++)
118 {
119 int j;
120 bool match = false;
121
122 current[index] = i;
123
124 for (j = 0; j < index; j++)
125 {
126 if (current[j] == i)
127 {
128 match = true;
129 break;
130 }
131 }
132
133 /*
134 * If the value is not found in the first part of the dependency,
135 * we're done.
136 */
137 if (!match)
138 {
139 state->dependencies = (AttrNumber *) repalloc(state->dependencies,
140 state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
141 memcpy(&state->dependencies[(state->k * state->ndependencies)],
142 current, state->k * sizeof(AttrNumber));
143 state->ndependencies++;
144 }
145 }
146 }
147}
148
149/* generate all dependencies (k-permutations of n elements) */
150static void
152{
153 AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
154
155 generate_dependencies_recurse(state, 0, 0, current);
156
157 pfree(current);
158}
159
160/*
161 * initialize the DependencyGenerator of variations, and prebuild the variations
162 *
163 * This pre-builds all the variations. We could also generate them in
164 * DependencyGenerator_next(), but this seems simpler.
165 */
168{
170
171 Assert((n >= k) && (k > 0));
172
173 /* allocate the DependencyGenerator state */
175 state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
176
177 state->ndependencies = 0;
178 state->current = 0;
179 state->k = k;
180 state->n = n;
181
182 /* now actually pre-generate all the variations */
184
185 return state;
186}
187
188/* free the DependencyGenerator state */
189static void
191{
192 pfree(state->dependencies);
193 pfree(state);
194}
195
196/* generate next combination */
197static AttrNumber *
199{
200 if (state->current == state->ndependencies)
201 return NULL;
202
203 return &state->dependencies[state->k * state->current++];
204}
205
206
207/*
208 * validates functional dependency on the data
209 *
210 * An actual work horse of detecting functional dependencies. Given a variation
211 * of k attributes, it checks that the first (k-1) are sufficient to determine
212 * the last one.
213 */
214static double
216{
217 int i,
218 nitems;
221 AttrNumber *attnums_dep;
222
223 /* counters valid within a group */
224 int group_size = 0;
225 int n_violations = 0;
226
227 /* total number of rows supporting (consistent with) the dependency */
228 int n_supporting_rows = 0;
229
230 /* Make sure we have at least two input attributes. */
231 Assert(k >= 2);
232
233 /* sort info for all attributes columns */
234 mss = multi_sort_init(k);
235
236 /*
237 * Translate the array of indexes to regular attnums for the dependency
238 * (we will need this to identify the columns in StatsBuildData).
239 */
240 attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
241 for (i = 0; i < k; i++)
242 attnums_dep[i] = data->attnums[dependency[i]];
243
244 /*
245 * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
246 *
247 * (a) sort the data lexicographically
248 *
249 * (b) split the data into groups by first (k-1) columns
250 *
251 * (c) for each group count different values in the last column
252 *
253 * We use the column data types' default sort operators and collations;
254 * perhaps at some point it'd be worth using column-specific collations?
255 */
256
257 /* prepare the sort function for the dimensions */
258 for (i = 0; i < k; i++)
259 {
260 VacAttrStats *colstat = data->stats[dependency[i]];
262
264 if (type->lt_opr == InvalidOid) /* shouldn't happen */
265 elog(ERROR, "cache lookup failed for ordering operator for type %u",
266 colstat->attrtypid);
267
268 /* prepare the sort function for this dimension */
269 multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
270 }
271
272 /*
273 * build an array of SortItem(s) sorted using the multi-sort support
274 *
275 * XXX This relies on all stats entries pointing to the same tuple
276 * descriptor. For now that assumption holds, but it might change in the
277 * future for example if we support statistics on multiple tables.
278 */
279 items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
280
281 /*
282 * Walk through the sorted array, split it into rows according to the
283 * first (k-1) columns. If there's a single value in the last column, we
284 * count the group as 'supporting' the functional dependency. Otherwise we
285 * count it as contradicting.
286 */
287
288 /* start with the first row forming a group */
289 group_size = 1;
290
291 /* loop 1 beyond the end of the array so that we count the final group */
292 for (i = 1; i <= nitems; i++)
293 {
294 /*
295 * Check if the group ended, which may be either because we processed
296 * all the items (i==nitems), or because the i-th item is not equal to
297 * the preceding one.
298 */
299 if (i == nitems ||
300 multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
301 {
302 /*
303 * If no violations were found in the group then track the rows of
304 * the group as supporting the functional dependency.
305 */
306 if (n_violations == 0)
307 n_supporting_rows += group_size;
308
309 /* Reset counters for the new group */
310 n_violations = 0;
311 group_size = 1;
312 continue;
313 }
314 /* first columns match, but the last one does not (so contradicting) */
315 else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
316 n_violations++;
317
318 group_size++;
319 }
320
321 /* Compute the 'degree of validity' as (supporting/total). */
322 return (n_supporting_rows * 1.0 / data->numrows);
323}
324
325/*
326 * detects functional dependencies between groups of columns
327 *
328 * Generates all possible subsets of columns (variations) and computes
329 * the degree of validity for each one. For example when creating statistics
330 * on three columns (a,b,c) there are 9 possible dependencies
331 *
332 * two columns three columns
333 * ----------- -------------
334 * (a) -> b (a,b) -> c
335 * (a) -> c (a,c) -> b
336 * (b) -> a (b,c) -> a
337 * (b) -> c
338 * (c) -> a
339 * (c) -> b
340 */
343{
344 int i,
345 k;
346
347 /* result */
348 MVDependencies *dependencies = NULL;
349 MemoryContext cxt;
350
351 Assert(data->nattnums >= 2);
352
353 /* tracks memory allocated by dependency_degree calls */
355 "dependency_degree cxt",
357
358 /*
359 * We'll try build functional dependencies starting from the smallest ones
360 * covering just 2 columns, to the largest ones, covering all columns
361 * included in the statistics object. We start from the smallest ones
362 * because we want to be able to skip already implied ones.
363 */
364 for (k = 2; k <= data->nattnums; k++)
365 {
366 AttrNumber *dependency; /* array with k elements */
367
368 /* prepare a DependencyGenerator of variation */
370
371 /* generate all possible variations of k values (out of n) */
372 while ((dependency = DependencyGenerator_next(DependencyGenerator)))
373 {
374 double degree;
375 MVDependency *d;
376 MemoryContext oldcxt;
377
378 /* release memory used by dependency degree calculation */
379 oldcxt = MemoryContextSwitchTo(cxt);
380
381 /* compute how valid the dependency seems */
382 degree = dependency_degree(data, k, dependency);
383
384 MemoryContextSwitchTo(oldcxt);
386
387 /*
388 * if the dependency seems entirely invalid, don't store it
389 */
390 if (degree == 0.0)
391 continue;
392
393 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
394 + k * sizeof(AttrNumber));
395
396 /* copy the dependency (and keep the indexes into stxkeys) */
397 d->degree = degree;
398 d->nattributes = k;
399 for (i = 0; i < k; i++)
400 d->attributes[i] = data->attnums[dependency[i]];
401
402 /* initialize the list of dependencies */
403 if (dependencies == NULL)
404 {
405 dependencies
407
408 dependencies->magic = STATS_DEPS_MAGIC;
409 dependencies->type = STATS_DEPS_TYPE_BASIC;
410 dependencies->ndeps = 0;
411 }
412
413 dependencies->ndeps++;
414 dependencies = (MVDependencies *) repalloc(dependencies,
415 offsetof(MVDependencies, deps)
416 + dependencies->ndeps * sizeof(MVDependency *));
417
418 dependencies->deps[dependencies->ndeps - 1] = d;
419 }
420
421 /*
422 * we're done with variations of k elements, so free the
423 * DependencyGenerator
424 */
426 }
427
429
430 return dependencies;
431}
432
433
434/*
435 * Serialize list of dependencies into a bytea value.
436 */
437bytea *
439{
440 int i;
441 bytea *output;
442 char *tmp;
443 Size len;
444
445 /* we need to store ndeps, with a number of attributes for each one */
447
448 /* and also include space for the actual attribute numbers and degrees */
449 for (i = 0; i < dependencies->ndeps; i++)
450 len += SizeOfItem(dependencies->deps[i]->nattributes);
451
452 output = (bytea *) palloc0(len);
454
455 tmp = VARDATA(output);
456
457 /* Store the base struct values (magic, type, ndeps) */
458 memcpy(tmp, &dependencies->magic, sizeof(uint32));
459 tmp += sizeof(uint32);
460 memcpy(tmp, &dependencies->type, sizeof(uint32));
461 tmp += sizeof(uint32);
462 memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
463 tmp += sizeof(uint32);
464
465 /* store number of attributes and attribute numbers for each dependency */
466 for (i = 0; i < dependencies->ndeps; i++)
467 {
468 MVDependency *d = dependencies->deps[i];
469
470 memcpy(tmp, &d->degree, sizeof(double));
471 tmp += sizeof(double);
472
473 memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
474 tmp += sizeof(AttrNumber);
475
476 memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
477 tmp += sizeof(AttrNumber) * d->nattributes;
478
479 /* protect against overflow */
480 Assert(tmp <= ((char *) output + len));
481 }
482
483 /* make sure we've produced exactly the right amount of data */
484 Assert(tmp == ((char *) output + len));
485
486 return output;
487}
488
489/*
490 * Reads serialized dependencies into MVDependencies structure.
491 */
494{
495 int i;
496 Size min_expected_size;
497 MVDependencies *dependencies;
498 char *tmp;
499
500 if (data == NULL)
501 return NULL;
502
504 elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
506
507 /* read the MVDependencies header */
508 dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
509
510 /* initialize pointer to the data part (skip the varlena header) */
511 tmp = VARDATA_ANY(data);
512
513 /* read the header fields and perform basic sanity checks */
514 memcpy(&dependencies->magic, tmp, sizeof(uint32));
515 tmp += sizeof(uint32);
516 memcpy(&dependencies->type, tmp, sizeof(uint32));
517 tmp += sizeof(uint32);
518 memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
519 tmp += sizeof(uint32);
520
521 if (dependencies->magic != STATS_DEPS_MAGIC)
522 elog(ERROR, "invalid dependency magic %d (expected %d)",
523 dependencies->magic, STATS_DEPS_MAGIC);
524
525 if (dependencies->type != STATS_DEPS_TYPE_BASIC)
526 elog(ERROR, "invalid dependency type %d (expected %d)",
527 dependencies->type, STATS_DEPS_TYPE_BASIC);
528
529 if (dependencies->ndeps == 0)
530 elog(ERROR, "invalid zero-length item array in MVDependencies");
531
532 /* what minimum bytea size do we expect for those parameters */
533 min_expected_size = SizeOfItem(dependencies->ndeps);
534
535 if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
536 elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
537 VARSIZE_ANY_EXHDR(data), min_expected_size);
538
539 /* allocate space for the MCV items */
540 dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
541 + (dependencies->ndeps * sizeof(MVDependency *)));
542
543 for (i = 0; i < dependencies->ndeps; i++)
544 {
545 double degree;
546 AttrNumber k;
547 MVDependency *d;
548
549 /* degree of validity */
550 memcpy(&degree, tmp, sizeof(double));
551 tmp += sizeof(double);
552
553 /* number of attributes */
554 memcpy(&k, tmp, sizeof(AttrNumber));
555 tmp += sizeof(AttrNumber);
556
557 /* is the number of attributes valid? */
558 Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
559
560 /* now that we know the number of attributes, allocate the dependency */
561 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
562 + (k * sizeof(AttrNumber)));
563
564 d->degree = degree;
565 d->nattributes = k;
566
567 /* copy attribute numbers */
568 memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
569 tmp += sizeof(AttrNumber) * d->nattributes;
570
571 dependencies->deps[i] = d;
572
573 /* still within the bytea */
574 Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
575 }
576
577 /* we should have consumed the whole bytea exactly */
578 Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
579
580 return dependencies;
581}
582
583/*
584 * dependency_is_fully_matched
585 * checks that a functional dependency is fully matched given clauses on
586 * attributes (assuming the clauses are suitable equality clauses)
587 */
588static bool
590{
591 int j;
592
593 /*
594 * Check that the dependency actually is fully covered by clauses. We have
595 * to translate all attribute numbers, as those are referenced
596 */
597 for (j = 0; j < dependency->nattributes; j++)
598 {
599 int attnum = dependency->attributes[j];
600
601 if (!bms_is_member(attnum, attnums))
602 return false;
603 }
604
605 return true;
606}
607
608/*
609 * statext_dependencies_load
610 * Load the functional dependencies for the indicated pg_statistic_ext tuple
611 */
614{
615 MVDependencies *result;
616 bool isnull;
617 Datum deps;
618 HeapTuple htup;
619
620 htup = SearchSysCache2(STATEXTDATASTXOID,
621 ObjectIdGetDatum(mvoid),
622 BoolGetDatum(inh));
623 if (!HeapTupleIsValid(htup))
624 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
625
626 deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
627 Anum_pg_statistic_ext_data_stxddependencies, &isnull);
628 if (isnull)
629 elog(ERROR,
630 "requested statistics kind \"%c\" is not yet built for statistics object %u",
631 STATS_EXT_DEPENDENCIES, mvoid);
632
634
635 ReleaseSysCache(htup);
636
637 return result;
638}
639
640/*
641 * dependency_is_compatible_clause
642 * Determines if the clause is compatible with functional dependencies
643 *
644 * Only clauses that have the form of equality to a pseudoconstant, or can be
645 * interpreted that way, are currently accepted. Furthermore the variable
646 * part of the clause must be a simple Var belonging to the specified
647 * relation, whose attribute number we return in *attnum on success.
648 */
649static bool
651{
652 Var *var;
653 Node *clause_expr;
654
655 if (IsA(clause, RestrictInfo))
656 {
657 RestrictInfo *rinfo = (RestrictInfo *) clause;
658
659 /* Pseudoconstants are not interesting (they couldn't contain a Var) */
660 if (rinfo->pseudoconstant)
661 return false;
662
663 /* Clauses referencing multiple, or no, varnos are incompatible */
664 if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
665 return false;
666
667 clause = (Node *) rinfo->clause;
668 }
669
670 if (is_opclause(clause))
671 {
672 /* If it's an opclause, check for Var = Const or Const = Var. */
673 OpExpr *expr = (OpExpr *) clause;
674
675 /* Only expressions with two arguments are candidates. */
676 if (list_length(expr->args) != 2)
677 return false;
678
679 /* Make sure non-selected argument is a pseudoconstant. */
681 clause_expr = linitial(expr->args);
682 else if (is_pseudo_constant_clause(linitial(expr->args)))
683 clause_expr = lsecond(expr->args);
684 else
685 return false;
686
687 /*
688 * If it's not an "=" operator, just ignore the clause, as it's not
689 * compatible with functional dependencies.
690 *
691 * This uses the function for estimating selectivity, not the operator
692 * directly (a bit awkward, but well ...).
693 *
694 * XXX this is pretty dubious; probably it'd be better to check btree
695 * or hash opclass membership, so as not to be fooled by custom
696 * selectivity functions, and to be more consistent with decisions
697 * elsewhere in the planner.
698 */
699 if (get_oprrest(expr->opno) != F_EQSEL)
700 return false;
701
702 /* OK to proceed with checking "var" */
703 }
704 else if (IsA(clause, ScalarArrayOpExpr))
705 {
706 /* If it's a scalar array operator, check for Var IN Const. */
707 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
708
709 /*
710 * Reject ALL() variant, we only care about ANY/IN.
711 *
712 * XXX Maybe we should check if all the values are the same, and allow
713 * ALL in that case? Doesn't seem very practical, though.
714 */
715 if (!expr->useOr)
716 return false;
717
718 /* Only expressions with two arguments are candidates. */
719 if (list_length(expr->args) != 2)
720 return false;
721
722 /*
723 * We know it's always (Var IN Const), so we assume the var is the
724 * first argument, and pseudoconstant is the second one.
725 */
727 return false;
728
729 clause_expr = linitial(expr->args);
730
731 /*
732 * If it's not an "=" operator, just ignore the clause, as it's not
733 * compatible with functional dependencies. The operator is identified
734 * simply by looking at which function it uses to estimate
735 * selectivity. That's a bit strange, but it's what other similar
736 * places do.
737 */
738 if (get_oprrest(expr->opno) != F_EQSEL)
739 return false;
740
741 /* OK to proceed with checking "var" */
742 }
743 else if (is_orclause(clause))
744 {
745 BoolExpr *bool_expr = (BoolExpr *) clause;
746 ListCell *lc;
747
748 /* start with no attribute number */
750
751 foreach(lc, bool_expr->args)
752 {
753 AttrNumber clause_attnum;
754
755 /*
756 * Had we found incompatible clause in the arguments, treat the
757 * whole clause as incompatible.
758 */
760 relid, &clause_attnum))
761 return false;
762
764 *attnum = clause_attnum;
765
766 /* ensure all the variables are the same (same attnum) */
767 if (*attnum != clause_attnum)
768 return false;
769 }
770
771 /* the Var is already checked by the recursive call */
772 return true;
773 }
774 else if (is_notclause(clause))
775 {
776 /*
777 * "NOT x" can be interpreted as "x = false", so get the argument and
778 * proceed with seeing if it's a suitable Var.
779 */
780 clause_expr = (Node *) get_notclausearg(clause);
781 }
782 else
783 {
784 /*
785 * A boolean expression "x" can be interpreted as "x = true", so
786 * proceed with seeing if it's a suitable Var.
787 */
788 clause_expr = (Node *) clause;
789 }
790
791 /*
792 * We may ignore any RelabelType node above the operand. (There won't be
793 * more than one, since eval_const_expressions has been applied already.)
794 */
795 if (IsA(clause_expr, RelabelType))
796 clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
797
798 /* We only support plain Vars for now */
799 if (!IsA(clause_expr, Var))
800 return false;
801
802 /* OK, we know we have a Var */
803 var = (Var *) clause_expr;
804
805 /* Ensure Var is from the correct relation */
806 if (var->varno != relid)
807 return false;
808
809 /* We also better ensure the Var is from the current level */
810 if (var->varlevelsup != 0)
811 return false;
812
813 /* Also ignore system attributes (we don't allow stats on those) */
815 return false;
816
817 *attnum = var->varattno;
818 return true;
819}
820
821/*
822 * find_strongest_dependency
823 * find the strongest dependency on the attributes
824 *
825 * When applying functional dependencies, we start with the strongest
826 * dependencies. That is, we select the dependency that:
827 *
828 * (a) has all attributes covered by equality clauses
829 *
830 * (b) has the most attributes
831 *
832 * (c) has the highest degree of validity
833 *
834 * This guarantees that we eliminate the most redundant conditions first
835 * (see the comment in dependencies_clauselist_selectivity).
836 */
837static MVDependency *
838find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
839 Bitmapset *attnums)
840{
841 int i,
842 j;
843 MVDependency *strongest = NULL;
844
845 /* number of attnums in clauses */
846 int nattnums = bms_num_members(attnums);
847
848 /*
849 * Iterate over the MVDependency items and find the strongest one from the
850 * fully-matched dependencies. We do the cheap checks first, before
851 * matching it against the attnums.
852 */
853 for (i = 0; i < ndependencies; i++)
854 {
855 for (j = 0; j < dependencies[i]->ndeps; j++)
856 {
857 MVDependency *dependency = dependencies[i]->deps[j];
858
859 /*
860 * Skip dependencies referencing more attributes than available
861 * clauses, as those can't be fully matched.
862 */
863 if (dependency->nattributes > nattnums)
864 continue;
865
866 if (strongest)
867 {
868 /* skip dependencies on fewer attributes than the strongest. */
869 if (dependency->nattributes < strongest->nattributes)
870 continue;
871
872 /* also skip weaker dependencies when attribute count matches */
873 if (strongest->nattributes == dependency->nattributes &&
874 strongest->degree > dependency->degree)
875 continue;
876 }
877
878 /*
879 * this dependency is stronger, but we must still check that it's
880 * fully matched to these attnums. We perform this check last as
881 * it's slightly more expensive than the previous checks.
882 */
883 if (dependency_is_fully_matched(dependency, attnums))
884 strongest = dependency; /* save new best match */
885 }
886 }
887
888 return strongest;
889}
890
891/*
892 * clauselist_apply_dependencies
893 * Apply the specified functional dependencies to a list of clauses and
894 * return the estimated selectivity of the clauses that are compatible
895 * with any of the given dependencies.
896 *
897 * This will estimate all not-already-estimated clauses that are compatible
898 * with functional dependencies, and which have an attribute mentioned by any
899 * of the given dependencies (either as an implying or implied attribute).
900 *
901 * Given (lists of) clauses on attributes (a,b) and a functional dependency
902 * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
903 * using the formula
904 *
905 * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
906 *
907 * where 'f' is the degree of dependency. This reflects the fact that we
908 * expect a fraction f of all rows to be consistent with the dependency
909 * (a=>b), and so have a selectivity of P(a), while the remaining rows are
910 * treated as independent.
911 *
912 * In practice, we use a slightly modified version of this formula, which uses
913 * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
914 * should obviously not exceed either column's individual selectivity. I.e.,
915 * we actually combine selectivities using the formula
916 *
917 * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
918 *
919 * This can make quite a difference if the specific values matching the
920 * clauses are not consistent with the functional dependency.
921 */
922static Selectivity
924 int varRelid, JoinType jointype,
925 SpecialJoinInfo *sjinfo,
926 MVDependency **dependencies, int ndependencies,
927 AttrNumber *list_attnums,
928 Bitmapset **estimatedclauses)
929{
930 Bitmapset *attnums;
931 int i;
932 int j;
933 int nattrs;
934 Selectivity *attr_sel;
935 int attidx;
936 int listidx;
937 ListCell *l;
939
940 /*
941 * Extract the attnums of all implying and implied attributes from all the
942 * given dependencies. Each of these attributes is expected to have at
943 * least 1 not-already-estimated compatible clause that we will estimate
944 * here.
945 */
946 attnums = NULL;
947 for (i = 0; i < ndependencies; i++)
948 {
949 for (j = 0; j < dependencies[i]->nattributes; j++)
950 {
951 AttrNumber attnum = dependencies[i]->attributes[j];
952
953 attnums = bms_add_member(attnums, attnum);
954 }
955 }
956
957 /*
958 * Compute per-column selectivity estimates for each of these attributes,
959 * and mark all the corresponding clauses as estimated.
960 */
961 nattrs = bms_num_members(attnums);
962 attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs);
963
964 attidx = 0;
965 i = -1;
966 while ((i = bms_next_member(attnums, i)) >= 0)
967 {
968 List *attr_clauses = NIL;
969 Selectivity simple_sel;
970
971 listidx = -1;
972 foreach(l, clauses)
973 {
974 Node *clause = (Node *) lfirst(l);
975
976 listidx++;
977 if (list_attnums[listidx] == i)
978 {
979 attr_clauses = lappend(attr_clauses, clause);
980 *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
981 }
982 }
983
984 simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
985 jointype, sjinfo, false);
986 attr_sel[attidx++] = simple_sel;
987 }
988
989 /*
990 * Now combine these selectivities using the dependency information. For
991 * chains of dependencies such as a -> b -> c, the b -> c dependency will
992 * come before the a -> b dependency in the array, so we traverse the
993 * array backwards to ensure such chains are computed in the right order.
994 *
995 * As explained above, pairs of selectivities are combined using the
996 * formula
997 *
998 * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
999 *
1000 * to ensure that the combined selectivity is never greater than either
1001 * individual selectivity.
1002 *
1003 * Where multiple dependencies apply (e.g., a -> b -> c), we use
1004 * conditional probabilities to compute the overall result as follows:
1005 *
1006 * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1007 *
1008 * so we replace the selectivities of all implied attributes with
1009 * conditional probabilities, that are conditional on all their implying
1010 * attributes. The selectivities of all other non-implied attributes are
1011 * left as they are.
1012 */
1013 for (i = ndependencies - 1; i >= 0; i--)
1014 {
1015 MVDependency *dependency = dependencies[i];
1018 double f;
1019
1020 /* Selectivity of all the implying attributes */
1021 s1 = 1.0;
1022 for (j = 0; j < dependency->nattributes - 1; j++)
1023 {
1024 attnum = dependency->attributes[j];
1025 attidx = bms_member_index(attnums, attnum);
1026 s1 *= attr_sel[attidx];
1027 }
1028
1029 /* Original selectivity of the implied attribute */
1030 attnum = dependency->attributes[j];
1031 attidx = bms_member_index(attnums, attnum);
1032 s2 = attr_sel[attidx];
1033
1034 /*
1035 * Replace s2 with the conditional probability s2 given s1, computed
1036 * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1037 *
1038 * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1039 *
1040 * where P(a) = s1, the selectivity of the implying attributes, and
1041 * P(b) = s2, the selectivity of the implied attribute.
1042 */
1043 f = dependency->degree;
1044
1045 if (s1 <= s2)
1046 attr_sel[attidx] = f + (1 - f) * s2;
1047 else
1048 attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
1049 }
1050
1051 /*
1052 * The overall selectivity of all the clauses on all these attributes is
1053 * then the product of all the original (non-implied) probabilities and
1054 * the new conditional (implied) probabilities.
1055 */
1056 s1 = 1.0;
1057 for (i = 0; i < nattrs; i++)
1058 s1 *= attr_sel[i];
1059
1061
1062 pfree(attr_sel);
1063 bms_free(attnums);
1064
1065 return s1;
1066}
1067
1068/*
1069 * dependency_is_compatible_expression
1070 * Determines if the expression is compatible with functional dependencies
1071 *
1072 * Similar to dependency_is_compatible_clause, but doesn't enforce that the
1073 * expression is a simple Var. On success, return the matching statistics
1074 * expression into *expr.
1075 */
1076static bool
1077dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
1078{
1079 ListCell *lc,
1080 *lc2;
1081 Node *clause_expr;
1082
1083 if (IsA(clause, RestrictInfo))
1084 {
1085 RestrictInfo *rinfo = (RestrictInfo *) clause;
1086
1087 /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1088 if (rinfo->pseudoconstant)
1089 return false;
1090
1091 /* Clauses referencing multiple, or no, varnos are incompatible */
1092 if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
1093 return false;
1094
1095 clause = (Node *) rinfo->clause;
1096 }
1097
1098 if (is_opclause(clause))
1099 {
1100 /* If it's an opclause, check for Var = Const or Const = Var. */
1101 OpExpr *expr = (OpExpr *) clause;
1102
1103 /* Only expressions with two arguments are candidates. */
1104 if (list_length(expr->args) != 2)
1105 return false;
1106
1107 /* Make sure non-selected argument is a pseudoconstant. */
1109 clause_expr = linitial(expr->args);
1110 else if (is_pseudo_constant_clause(linitial(expr->args)))
1111 clause_expr = lsecond(expr->args);
1112 else
1113 return false;
1114
1115 /*
1116 * If it's not an "=" operator, just ignore the clause, as it's not
1117 * compatible with functional dependencies.
1118 *
1119 * This uses the function for estimating selectivity, not the operator
1120 * directly (a bit awkward, but well ...).
1121 *
1122 * XXX this is pretty dubious; probably it'd be better to check btree
1123 * or hash opclass membership, so as not to be fooled by custom
1124 * selectivity functions, and to be more consistent with decisions
1125 * elsewhere in the planner.
1126 */
1127 if (get_oprrest(expr->opno) != F_EQSEL)
1128 return false;
1129
1130 /* OK to proceed with checking "var" */
1131 }
1132 else if (IsA(clause, ScalarArrayOpExpr))
1133 {
1134 /* If it's a scalar array operator, check for Var IN Const. */
1135 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1136
1137 /*
1138 * Reject ALL() variant, we only care about ANY/IN.
1139 *
1140 * FIXME Maybe we should check if all the values are the same, and
1141 * allow ALL in that case? Doesn't seem very practical, though.
1142 */
1143 if (!expr->useOr)
1144 return false;
1145
1146 /* Only expressions with two arguments are candidates. */
1147 if (list_length(expr->args) != 2)
1148 return false;
1149
1150 /*
1151 * We know it's always (Var IN Const), so we assume the var is the
1152 * first argument, and pseudoconstant is the second one.
1153 */
1155 return false;
1156
1157 clause_expr = linitial(expr->args);
1158
1159 /*
1160 * If it's not an "=" operator, just ignore the clause, as it's not
1161 * compatible with functional dependencies. The operator is identified
1162 * simply by looking at which function it uses to estimate
1163 * selectivity. That's a bit strange, but it's what other similar
1164 * places do.
1165 */
1166 if (get_oprrest(expr->opno) != F_EQSEL)
1167 return false;
1168
1169 /* OK to proceed with checking "var" */
1170 }
1171 else if (is_orclause(clause))
1172 {
1173 BoolExpr *bool_expr = (BoolExpr *) clause;
1174
1175 /* start with no expression (we'll use the first match) */
1176 *expr = NULL;
1177
1178 foreach(lc, bool_expr->args)
1179 {
1180 Node *or_expr = NULL;
1181
1182 /*
1183 * Had we found incompatible expression in the arguments, treat
1184 * the whole expression as incompatible.
1185 */
1187 statlist, &or_expr))
1188 return false;
1189
1190 if (*expr == NULL)
1191 *expr = or_expr;
1192
1193 /* ensure all the expressions are the same */
1194 if (!equal(or_expr, *expr))
1195 return false;
1196 }
1197
1198 /* the expression is already checked by the recursive call */
1199 return true;
1200 }
1201 else if (is_notclause(clause))
1202 {
1203 /*
1204 * "NOT x" can be interpreted as "x = false", so get the argument and
1205 * proceed with seeing if it's a suitable Var.
1206 */
1207 clause_expr = (Node *) get_notclausearg(clause);
1208 }
1209 else
1210 {
1211 /*
1212 * A boolean expression "x" can be interpreted as "x = true", so
1213 * proceed with seeing if it's a suitable Var.
1214 */
1215 clause_expr = (Node *) clause;
1216 }
1217
1218 /*
1219 * We may ignore any RelabelType node above the operand. (There won't be
1220 * more than one, since eval_const_expressions has been applied already.)
1221 */
1222 if (IsA(clause_expr, RelabelType))
1223 clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
1224
1225 /*
1226 * Search for a matching statistics expression.
1227 */
1228 foreach(lc, statlist)
1229 {
1231
1232 /* ignore stats without dependencies */
1233 if (info->kind != STATS_EXT_DEPENDENCIES)
1234 continue;
1235
1236 foreach(lc2, info->exprs)
1237 {
1238 Node *stat_expr = (Node *) lfirst(lc2);
1239
1240 if (equal(clause_expr, stat_expr))
1241 {
1242 *expr = stat_expr;
1243 return true;
1244 }
1245 }
1246 }
1247
1248 return false;
1249}
1250
1251/*
1252 * dependencies_clauselist_selectivity
1253 * Return the estimated selectivity of (a subset of) the given clauses
1254 * using functional dependency statistics, or 1.0 if no useful functional
1255 * dependency statistic exists.
1256 *
1257 * 'estimatedclauses' is an input/output argument that gets a bit set
1258 * corresponding to the (zero-based) list index of each clause that is included
1259 * in the estimated selectivity.
1260 *
1261 * Given equality clauses on attributes (a,b) we find the strongest dependency
1262 * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
1263 * dependency, we then combine the per-clause selectivities using the formula
1264 *
1265 * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1266 *
1267 * where 'f' is the degree of the dependency. (Actually we use a slightly
1268 * modified version of this formula -- see clauselist_apply_dependencies()).
1269 *
1270 * With clauses on more than two attributes, the dependencies are applied
1271 * recursively, starting with the widest/strongest dependencies. For example
1272 * P(a,b,c) is first split like this:
1273 *
1274 * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
1275 *
1276 * assuming (a,b=>c) is the strongest dependency.
1277 */
1280 List *clauses,
1281 int varRelid,
1282 JoinType jointype,
1283 SpecialJoinInfo *sjinfo,
1284 RelOptInfo *rel,
1285 Bitmapset **estimatedclauses)
1286{
1287 Selectivity s1 = 1.0;
1288 ListCell *l;
1289 Bitmapset *clauses_attnums = NULL;
1290 AttrNumber *list_attnums;
1291 int listidx;
1292 MVDependencies **func_dependencies;
1293 int nfunc_dependencies;
1294 int total_ndeps;
1295 MVDependency **dependencies;
1296 int ndependencies;
1297 int i;
1298 AttrNumber attnum_offset;
1300
1301 /* unique expressions */
1302 Node **unique_exprs;
1303 int unique_exprs_cnt;
1304
1305 /* check if there's any stats that might be useful for us. */
1306 if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
1307 return 1.0;
1308
1309 list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
1310 list_length(clauses));
1311
1312 /*
1313 * We allocate space as if every clause was a unique expression, although
1314 * that's probably overkill. Some will be simple column references that
1315 * we'll translate to attnums, and there might be duplicates. But it's
1316 * easier and cheaper to just do one allocation than repalloc later.
1317 */
1318 unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
1319 unique_exprs_cnt = 0;
1320
1321 /*
1322 * Pre-process the clauses list to extract the attnums seen in each item.
1323 * We need to determine if there's any clauses which will be useful for
1324 * dependency selectivity estimations. Along the way we'll record all of
1325 * the attnums for each clause in a list which we'll reference later so we
1326 * don't need to repeat the same work again. We'll also keep track of all
1327 * attnums seen.
1328 *
1329 * We also skip clauses that we already estimated using different types of
1330 * statistics (we treat them as incompatible).
1331 *
1332 * To handle expressions, we assign them negative attnums, as if it was a
1333 * system attribute (this is fine, as we only allow extended stats on user
1334 * attributes). And then we offset everything by the number of
1335 * expressions, so that we can store the values in a bitmapset.
1336 */
1337 listidx = 0;
1338 foreach(l, clauses)
1339 {
1340 Node *clause = (Node *) lfirst(l);
1342 Node *expr = NULL;
1343
1344 /* ignore clause by default */
1345 list_attnums[listidx] = InvalidAttrNumber;
1346
1347 if (!bms_is_member(listidx, *estimatedclauses))
1348 {
1349 /*
1350 * If it's a simple column reference, just extract the attnum. If
1351 * it's an expression, assign a negative attnum as if it was a
1352 * system attribute.
1353 */
1354 if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
1355 {
1356 list_attnums[listidx] = attnum;
1357 }
1358 else if (dependency_is_compatible_expression(clause, rel->relid,
1359 rel->statlist,
1360 &expr))
1361 {
1362 /* special attnum assigned to this expression */
1364
1365 Assert(expr != NULL);
1366
1367 /* If the expression is duplicate, use the same attnum. */
1368 for (i = 0; i < unique_exprs_cnt; i++)
1369 {
1370 if (equal(unique_exprs[i], expr))
1371 {
1372 /* negative attribute number to expression */
1373 attnum = -(i + 1);
1374 break;
1375 }
1376 }
1377
1378 /* not found in the list, so add it */
1380 {
1381 unique_exprs[unique_exprs_cnt++] = expr;
1382
1383 /* after incrementing the value, to get -1, -2, ... */
1384 attnum = (-unique_exprs_cnt);
1385 }
1386
1387 /* remember which attnum was assigned to this clause */
1388 list_attnums[listidx] = attnum;
1389 }
1390 }
1391
1392 listidx++;
1393 }
1394
1395 Assert(listidx == list_length(clauses));
1396
1397 /*
1398 * How much we need to offset the attnums? If there are no expressions,
1399 * then no offset is needed. Otherwise we need to offset enough for the
1400 * lowest value (-unique_exprs_cnt) to become 1.
1401 */
1402 if (unique_exprs_cnt > 0)
1403 attnum_offset = (unique_exprs_cnt + 1);
1404 else
1405 attnum_offset = 0;
1406
1407 /*
1408 * Now that we know how many expressions there are, we can offset the
1409 * values just enough to build the bitmapset.
1410 */
1411 for (i = 0; i < list_length(clauses); i++)
1412 {
1414
1415 /* ignore incompatible or already estimated clauses */
1416 if (list_attnums[i] == InvalidAttrNumber)
1417 continue;
1418
1419 /* make sure the attnum is in the expected range */
1420 Assert(list_attnums[i] >= (-unique_exprs_cnt));
1421 Assert(list_attnums[i] <= MaxHeapAttributeNumber);
1422
1423 /* make sure the attnum is positive (valid AttrNumber) */
1424 attnum = list_attnums[i] + attnum_offset;
1425
1426 /*
1427 * Either it's a regular attribute, or it's an expression, in which
1428 * case we must not have seen it before (expressions are unique).
1429 *
1430 * XXX Check whether it's a regular attribute has to be done using the
1431 * original attnum, while the second check has to use the value with
1432 * an offset.
1433 */
1434 Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
1435 !bms_is_member(attnum, clauses_attnums));
1436
1437 /*
1438 * Remember the offset attnum, both for attributes and expressions.
1439 * We'll pass list_attnums to clauselist_apply_dependencies, which
1440 * uses it to identify clauses in a bitmap. We could also pass the
1441 * offset, but this is more convenient.
1442 */
1443 list_attnums[i] = attnum;
1444
1445 clauses_attnums = bms_add_member(clauses_attnums, attnum);
1446 }
1447
1448 /*
1449 * If there's not at least two distinct attnums and expressions, then
1450 * reject the whole list of clauses. We must return 1.0 so the calling
1451 * function's selectivity is unaffected.
1452 */
1453 if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
1454 {
1455 bms_free(clauses_attnums);
1456 pfree(list_attnums);
1457 return 1.0;
1458 }
1459
1460 /*
1461 * Load all functional dependencies matching at least two parameters. We
1462 * can simply consider all dependencies at once, without having to search
1463 * for the best statistics object.
1464 *
1465 * To not waste cycles and memory, we deserialize dependencies only for
1466 * statistics that match at least two attributes. The array is allocated
1467 * with the assumption that all objects match - we could grow the array to
1468 * make it just the right size, but it's likely wasteful anyway thanks to
1469 * moving the freed chunks to freelists etc.
1470 */
1471 func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
1472 list_length(rel->statlist));
1473 nfunc_dependencies = 0;
1474 total_ndeps = 0;
1475
1476 foreach(l, rel->statlist)
1477 {
1479 int nmatched;
1480 int nexprs;
1481 int k;
1482 MVDependencies *deps;
1483
1484 /* skip statistics that are not of the correct type */
1485 if (stat->kind != STATS_EXT_DEPENDENCIES)
1486 continue;
1487
1488 /* skip statistics with mismatching stxdinherit value */
1489 if (stat->inherit != rte->inh)
1490 continue;
1491
1492 /*
1493 * Count matching attributes - we have to undo the attnum offsets. The
1494 * input attribute numbers are not offset (expressions are not
1495 * included in stat->keys, so it's not necessary). But we need to
1496 * offset it before checking against clauses_attnums.
1497 */
1498 nmatched = 0;
1499 k = -1;
1500 while ((k = bms_next_member(stat->keys, k)) >= 0)
1501 {
1503
1504 /* skip expressions */
1506 continue;
1507
1508 /* apply the same offset as above */
1509 attnum += attnum_offset;
1510
1511 if (bms_is_member(attnum, clauses_attnums))
1512 nmatched++;
1513 }
1514
1515 /* count matching expressions */
1516 nexprs = 0;
1517 for (i = 0; i < unique_exprs_cnt; i++)
1518 {
1519 ListCell *lc;
1520
1521 foreach(lc, stat->exprs)
1522 {
1523 Node *stat_expr = (Node *) lfirst(lc);
1524
1525 /* try to match it */
1526 if (equal(stat_expr, unique_exprs[i]))
1527 nexprs++;
1528 }
1529 }
1530
1531 /*
1532 * Skip objects matching fewer than two attributes/expressions from
1533 * clauses.
1534 */
1535 if (nmatched + nexprs < 2)
1536 continue;
1537
1538 deps = statext_dependencies_load(stat->statOid, rte->inh);
1539
1540 /*
1541 * The expressions may be represented by different attnums in the
1542 * stats, we need to remap them to be consistent with the clauses.
1543 * That will make the later steps (e.g. picking the strongest item and
1544 * so on) much simpler and cheaper, because it won't need to care
1545 * about the offset at all.
1546 *
1547 * When we're at it, we can ignore dependencies that are not fully
1548 * matched by clauses (i.e. referencing attributes or expressions that
1549 * are not in the clauses).
1550 *
1551 * We have to do this for all statistics, as long as there are any
1552 * expressions - we need to shift the attnums in all dependencies.
1553 *
1554 * XXX Maybe we should do this always, because it also eliminates some
1555 * of the dependencies early. It might be cheaper than having to walk
1556 * the longer list in find_strongest_dependency later, especially as
1557 * we need to do that repeatedly?
1558 *
1559 * XXX We have to do this even when there are no expressions in
1560 * clauses, otherwise find_strongest_dependency may fail for stats
1561 * with expressions (due to lookup of negative value in bitmap). So we
1562 * need to at least filter out those dependencies. Maybe we could do
1563 * it in a cheaper way (if there are no expr clauses, we can just
1564 * discard all negative attnums without any lookups).
1565 */
1566 if (unique_exprs_cnt > 0 || stat->exprs != NIL)
1567 {
1568 int ndeps = 0;
1569
1570 for (i = 0; i < deps->ndeps; i++)
1571 {
1572 bool skip = false;
1573 MVDependency *dep = deps->deps[i];
1574 int j;
1575
1576 for (j = 0; j < dep->nattributes; j++)
1577 {
1578 int idx;
1579 Node *expr;
1580 AttrNumber unique_attnum = InvalidAttrNumber;
1582
1583 /* undo the per-statistics offset */
1584 attnum = dep->attributes[j];
1585
1586 /*
1587 * For regular attributes we can simply check if it
1588 * matches any clause. If there's no matching clause, we
1589 * can just ignore it. We need to offset the attnum
1590 * though.
1591 */
1593 {
1594 dep->attributes[j] = attnum + attnum_offset;
1595
1596 if (!bms_is_member(dep->attributes[j], clauses_attnums))
1597 {
1598 skip = true;
1599 break;
1600 }
1601
1602 continue;
1603 }
1604
1605 /*
1606 * the attnum should be a valid system attnum (-1, -2,
1607 * ...)
1608 */
1610
1611 /*
1612 * For expressions, we need to do two translations. First
1613 * we have to translate the negative attnum to index in
1614 * the list of expressions (in the statistics object).
1615 * Then we need to see if there's a matching clause. The
1616 * index of the unique expression determines the attnum
1617 * (and we offset it).
1618 */
1619 idx = -(1 + attnum);
1620
1621 /* Is the expression index is valid? */
1622 Assert((idx >= 0) && (idx < list_length(stat->exprs)));
1623
1624 expr = (Node *) list_nth(stat->exprs, idx);
1625
1626 /* try to find the expression in the unique list */
1627 for (int m = 0; m < unique_exprs_cnt; m++)
1628 {
1629 /*
1630 * found a matching unique expression, use the attnum
1631 * (derived from index of the unique expression)
1632 */
1633 if (equal(unique_exprs[m], expr))
1634 {
1635 unique_attnum = -(m + 1) + attnum_offset;
1636 break;
1637 }
1638 }
1639
1640 /*
1641 * Found no matching expression, so we can simply skip
1642 * this dependency, because there's no chance it will be
1643 * fully covered.
1644 */
1645 if (unique_attnum == InvalidAttrNumber)
1646 {
1647 skip = true;
1648 break;
1649 }
1650
1651 /* otherwise remap it to the new attnum */
1652 dep->attributes[j] = unique_attnum;
1653 }
1654
1655 /* if found a matching dependency, keep it */
1656 if (!skip)
1657 {
1658 /* maybe we've skipped something earlier, so move it */
1659 if (ndeps != i)
1660 deps->deps[ndeps] = deps->deps[i];
1661
1662 ndeps++;
1663 }
1664 }
1665
1666 deps->ndeps = ndeps;
1667 }
1668
1669 /*
1670 * It's possible we've removed all dependencies, in which case we
1671 * don't bother adding it to the list.
1672 */
1673 if (deps->ndeps > 0)
1674 {
1675 func_dependencies[nfunc_dependencies] = deps;
1676 total_ndeps += deps->ndeps;
1677 nfunc_dependencies++;
1678 }
1679 }
1680
1681 /* if no matching stats could be found then we've nothing to do */
1682 if (nfunc_dependencies == 0)
1683 {
1684 pfree(func_dependencies);
1685 bms_free(clauses_attnums);
1686 pfree(list_attnums);
1687 pfree(unique_exprs);
1688 return 1.0;
1689 }
1690
1691 /*
1692 * Work out which dependencies we can apply, starting with the
1693 * widest/strongest ones, and proceeding to smaller/weaker ones.
1694 */
1695 dependencies = (MVDependency **) palloc(sizeof(MVDependency *) *
1696 total_ndeps);
1697 ndependencies = 0;
1698
1699 while (true)
1700 {
1701 MVDependency *dependency;
1703
1704 /* the widest/strongest dependency, fully matched by clauses */
1705 dependency = find_strongest_dependency(func_dependencies,
1706 nfunc_dependencies,
1707 clauses_attnums);
1708 if (!dependency)
1709 break;
1710
1711 dependencies[ndependencies++] = dependency;
1712
1713 /* Ignore dependencies using this implied attribute in later loops */
1714 attnum = dependency->attributes[dependency->nattributes - 1];
1715 clauses_attnums = bms_del_member(clauses_attnums, attnum);
1716 }
1717
1718 /*
1719 * If we found applicable dependencies, use them to estimate all
1720 * compatible clauses on attributes that they refer to.
1721 */
1722 if (ndependencies != 0)
1723 s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
1724 sjinfo, dependencies, ndependencies,
1725 list_attnums, estimatedclauses);
1726
1727 /* free deserialized functional dependencies (and then the array) */
1728 for (i = 0; i < nfunc_dependencies; i++)
1729 pfree(func_dependencies[i]);
1730
1731 pfree(dependencies);
1732 pfree(func_dependencies);
1733 bms_free(clauses_attnums);
1734 pfree(list_attnums);
1735 pfree(unique_exprs);
1736
1737 return s1;
1738}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:262
int16 AttrNumber
Definition: attnum.h:21
#define AttributeNumberIsValid(attributeNumber)
Definition: attnum.h:34
#define AttrNumberIsForUserDefinedAttr(attributeNumber)
Definition: attnum.h:41
#define InvalidAttrNumber
Definition: attnum.h:23
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1306
Bitmapset * bms_del_member(Bitmapset *a, int x)
Definition: bitmapset.c:868
void bms_free(Bitmapset *a)
Definition: bitmapset.c:239
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:751
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:510
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:815
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:781
int bms_member_index(Bitmapset *a, int x)
Definition: bitmapset.c:539
@ BMS_SINGLETON
Definition: bitmapset.h:72
@ BMS_MULTIPLE
Definition: bitmapset.h:73
#define VARHDRSZ
Definition: c.h:702
uint32_t uint32
Definition: c.h:543
unsigned int Index
Definition: c.h:624
size_t Size
Definition: c.h:615
bool is_pseudo_constant_clause(Node *clause)
Definition: clauses.c:2101
Selectivity clauselist_selectivity_ext(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:117
static void generate_dependencies(DependencyGenerator state)
Definition: dependencies.c:151
#define SizeOfHeader
Definition: dependencies.c:32
MVDependencies * statext_dependencies_deserialize(bytea *data)
Definition: dependencies.c:493
MVDependencies * statext_dependencies_load(Oid mvoid, bool inh)
Definition: dependencies.c:613
static bool dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
Definition: dependencies.c:198
MVDependencies * statext_dependencies_build(StatsBuildData *data)
Definition: dependencies.c:342
static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
Definition: dependencies.c:650
static bool dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
Definition: dependencies.c:589
bytea * statext_dependencies_serialize(MVDependencies *dependencies)
Definition: dependencies.c:438
static void DependencyGenerator_free(DependencyGenerator state)
Definition: dependencies.c:190
struct DependencyGeneratorData DependencyGeneratorData
static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, MVDependency **dependencies, int ndependencies, AttrNumber *list_attnums, Bitmapset **estimatedclauses)
Definition: dependencies.c:923
static DependencyGenerator DependencyGenerator_init(int n, int k)
Definition: dependencies.c:167
#define SizeOfItem(natts)
Definition: dependencies.c:35
Selectivity dependencies_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses)
static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
Definition: dependencies.c:215
static void generate_dependencies_recurse(DependencyGenerator state, int index, AttrNumber start, AttrNumber *current)
Definition: dependencies.c:85
DependencyGeneratorData * DependencyGenerator
Definition: dependencies.c:59
static MVDependency * find_strongest_dependency(MVDependencies **dependencies, int ndependencies, Bitmapset *attnums)
Definition: dependencies.c:838
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
bool has_stats_of_kind(List *stats, char requiredkind)
int multi_sort_compare_dims(int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
int multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
MultiSortSupport multi_sort_init(int ndims)
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
Assert(PointerIsAligned(start, uint64))
return str start
for(;;)
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define MaxHeapAttributeNumber
Definition: htup_details.h:48
#define nitems(x)
Definition: indent.h:31
FILE * output
int j
Definition: isn.c:78
int i
Definition: isn.c:77
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
List * lappend(List *list, void *datum)
Definition: list.c:339
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1724
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:400
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1610
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc0(Size size)
Definition: mcxt.c:1395
void * palloc(Size size)
Definition: mcxt.c:1365
MemoryContext CurrentMemoryContext
Definition: mcxt.c:160
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:469
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:116
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:76
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:125
static Expr * get_notclausearg(const void *notclause)
Definition: nodeFuncs.h:134
#define IsA(nodeptr, _type_)
Definition: nodes.h:164
double Selectivity
Definition: nodes.h:260
JoinType
Definition: nodes.h:298
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
#define planner_rt_fetch(rti, root)
Definition: pathnodes.h:610
int16 attnum
Definition: pg_attribute.h:74
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:108
const void size_t len
const void * data
#define lfirst(lc)
Definition: pg_list.h:172
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
static Datum BoolGetDatum(bool X)
Definition: postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
uint64_t Datum
Definition: postgres.h:70
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
char * s1
char * s2
tree ctl root
Definition: radixtree.h:1857
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
#define STATS_DEPS_MAGIC
Definition: statistics.h:43
#define STATS_DEPS_TYPE_BASIC
Definition: statistics.h:44
List * args
Definition: primnodes.h:972
AttrNumber * dependencies
Definition: dependencies.c:56
Definition: pg_list.h:54
uint32 ndeps
Definition: statistics.h:61
uint32 magic
Definition: statistics.h:59
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:62
AttrNumber nattributes
Definition: statistics.h:53
double degree
Definition: statistics.h:52
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:54
Definition: nodes.h:135
Oid opno
Definition: primnodes.h:850
List * args
Definition: primnodes.h:868
Index relid
Definition: pathnodes.h:973
List * statlist
Definition: pathnodes.h:997
Expr * clause
Definition: pathnodes.h:2792
Oid attrtypid
Definition: vacuum.h:126
Oid attrcollid
Definition: vacuum.h:129
Definition: primnodes.h:262
AttrNumber varattno
Definition: primnodes.h:274
int varno
Definition: primnodes.h:269
Index varlevelsup
Definition: primnodes.h:294
Definition: type.h:96
Definition: regguts.h:323
Definition: c.h:697
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:264
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:595
HeapTuple SearchSysCache2(int cacheId, Datum key1, Datum key2)
Definition: syscache.c:230
static ItemArray items
Definition: test_tidstore.c:48
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:386
#define TYPECACHE_LT_OPR
Definition: typcache.h:139
static Size VARSIZE_ANY(const void *PTR)
Definition: varatt.h:460
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition: varatt.h:472
static char * VARDATA(const void *PTR)
Definition: varatt.h:305
static char * VARDATA_ANY(const void *PTR)
Definition: varatt.h:486
static void SET_VARSIZE(void *PTR, Size len)
Definition: varatt.h:432
const char * type