firtst release
Revisión | 8edf1db866961ff432fe9b89ff708e38267b4505 (tree) |
---|---|
Tiempo | 2016-01-15 14:12:14 |
Autor | Kyotaro Horiguchi <horiguchi.kyotaro@lab....> |
Commiter | Kyotaro Horiguchi |
Support PostgreSQL 9.5.0.
This branch PG95 has been a bit too early so it needs an additional
merge from master branch to complete to support PostgreSQL 9.5.
@@ -1,4 +1,4 @@ | ||
1 | -Copyright (c) 2012-2014, NIPPON TELEGRAPH AND TELEPHONE CORPORATION | |
1 | +Copyright (c) 2012-2016, NIPPON TELEGRAPH AND TELEPHONE CORPORATION | |
2 | 2 | All rights reserved. |
3 | 3 | |
4 | 4 | Redistribution and use in source and binary forms, with or without |
@@ -2,7 +2,7 @@ core.c and make_join_rel.c are parts of PostgreSQL Database Management System. | ||
2 | 2 | (formerly known as Postgres, then as Postgres95) |
3 | 3 | Copyright holders of those files are following organizations: |
4 | 4 | |
5 | -Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group | |
5 | +Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group | |
6 | 6 | |
7 | 7 | Portions Copyright (c) 1994, The Regents of the University of California |
8 | 8 |
@@ -1,11 +1,11 @@ | ||
1 | 1 | # |
2 | 2 | # pg_hint_plan: Makefile |
3 | 3 | # |
4 | -# Copyright (c) 2012-2014, NIPPON TELEGRAPH AND TELEPHONE CORPORATION | |
4 | +# Copyright (c) 2012-2015, NIPPON TELEGRAPH AND TELEPHONE CORPORATION | |
5 | 5 | # |
6 | 6 | |
7 | 7 | MODULES = pg_hint_plan |
8 | -HINTPLANVER = 1.1.2 | |
8 | +HINTPLANVER = 1.1.3 | |
9 | 9 | |
10 | 10 | REGRESS = init base_plan pg_hint_plan ut-init ut-A ut-S ut-J ut-L ut-G ut-R ut-fdw ut-fini |
11 | 11 |
@@ -14,7 +14,7 @@ REGRESSION_EXPECTED = expected/init.out expected/base_plan.out expected/pg_hint_ | ||
14 | 14 | REGRESS_OPTS = --encoding=UTF8 |
15 | 15 | |
16 | 16 | EXTENSION = pg_hint_plan |
17 | -DATA = pg_hint_plan--1.1.2.sql | |
17 | +DATA = pg_hint_plan--1.1.3.sql | |
18 | 18 | |
19 | 19 | EXTRA_CLEAN = sql/ut-fdw.sql expected/ut-fdw.out |
20 | 20 |
@@ -23,8 +23,8 @@ PGXS := $(shell $(PG_CONFIG) --pgxs) | ||
23 | 23 | include $(PGXS) |
24 | 24 | |
25 | 25 | STARBALL = pg_dbms_stats-$(DBMSSTATSVER).tar.gz |
26 | -STARBALL94 = pg_hint_plan94-$(HINTPLANVER).tar.gz | |
27 | -STARBALLS = $(STARBALL) $(STARBALL94) | |
26 | +STARBALL95 = pg_hint_plan95-$(HINTPLANVER).tar.gz | |
27 | +STARBALLS = $(STARBALL) $(STARBALL95) | |
28 | 28 | |
29 | 29 | TARSOURCES = Makefile *.c *.h \ |
30 | 30 | pg_hint_plan--*.sql \ |
@@ -34,7 +34,7 @@ TARSOURCES = Makefile *.c *.h \ | ||
34 | 34 | |
35 | 35 | installcheck: $(REGRESSION_EXPECTED) |
36 | 36 | |
37 | -rpms: rpm94 | |
37 | +rpms: rpm95 | |
38 | 38 | |
39 | 39 | # pg_hint_plan.c includes core.c and make_join_rel.c |
40 | 40 | pg_hint_plan.o: core.c make_join_rel.c # pg_stat_statements.c |
@@ -49,7 +49,7 @@ $(STARBALLS): $(TARSOURCES) | ||
49 | 49 | tar -chzf $@ $(addprefix $(subst .tar.gz,,$@)/, $^) |
50 | 50 | rm $(subst .tar.gz,,$@) |
51 | 51 | |
52 | -rpm94: $(STARBALL94) | |
53 | - MAKE_ROOT=`pwd` rpmbuild -bb SPECS/pg_hint_plan94.spec | |
52 | +rpm95: $(STARBALL95) | |
53 | + MAKE_ROOT=`pwd` rpmbuild -bb SPECS/pg_hint_plan95.spec | |
54 | 54 | |
55 | 55 |
@@ -1,84 +0,0 @@ | ||
1 | -# SPEC file for pg_hint_plan | |
2 | -# Copyright(C) 2012-2014 NIPPON TELEGRAPH AND TELEPHONE CORPORATION | |
3 | - | |
4 | -%define _pgdir /usr/pgsql-9.4 | |
5 | -%define _bindir %{_pgdir}/bin | |
6 | -%define _libdir %{_pgdir}/lib | |
7 | -%define _datadir %{_pgdir}/share | |
8 | -%if "%(echo ${MAKE_ROOT})" != "" | |
9 | - %define _rpmdir %(echo ${MAKE_ROOT})/RPMS | |
10 | - %define _sourcedir %(echo ${MAKE_ROOT}) | |
11 | -%endif | |
12 | - | |
13 | -## Set general information for pg_hint_plan. | |
14 | -Summary: Optimizer hint for PostgreSQL 9.4 | |
15 | -Name: pg_hint_plan94 | |
16 | -Version: 1.1.2 | |
17 | -Release: 1%{?dist} | |
18 | -License: BSD | |
19 | -Group: Applications/Databases | |
20 | -Source0: %{name}-%{version}.tar.gz | |
21 | -#URL: http://example.com/pg_hint_plan/ | |
22 | -BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-%(%{__id_u} -n) | |
23 | -Vendor: NIPPON TELEGRAPH AND TELEPHONE CORPORATION | |
24 | - | |
25 | -## We use postgresql-devel package | |
26 | -BuildRequires: postgresql94-devel | |
27 | -Requires: postgresql94-libs | |
28 | - | |
29 | -## Description for "pg_hint_plan" | |
30 | -%description | |
31 | -pg_hint_plan provides capability to force arbitrary plan to PostgreSQL' planner | |
32 | -to optimize queries by hand directly. | |
33 | - | |
34 | -If you have query plan better than which PostgreSQL chooses, you can force your | |
35 | -plan by adding special comment block with optimizer hint before the query you | |
36 | -want to optimize. You can control scan method, join method, join order, and | |
37 | -planner-related GUC parameters during planning. | |
38 | - | |
39 | -Note that this package is available for only PostgreSQL 9.4. | |
40 | - | |
41 | -## pre work for build pg_hint_plan | |
42 | -%prep | |
43 | -PATH=/usr/pgsql-9.4/bin:$PATH | |
44 | -if [ "${MAKE_ROOT}" != "" ]; then | |
45 | - pushd ${MAKE_ROOT} | |
46 | - make clean %{name}-%{version}.tar.gz | |
47 | - popd | |
48 | -fi | |
49 | -if [ ! -d %{_rpmdir} ]; then mkdir -p %{_rpmdir}; fi | |
50 | -%setup -q | |
51 | - | |
52 | -## Set variables for build environment | |
53 | -%build | |
54 | -PATH=/usr/pgsql-9.4/bin:$PATH | |
55 | -make USE_PGXS=1 %{?_smp_mflags} | |
56 | - | |
57 | -## Set variables for install | |
58 | -%install | |
59 | -rm -rf %{buildroot} | |
60 | -install -d %{buildroot}%{_libdir} | |
61 | -install pg_hint_plan.so %{buildroot}%{_libdir}/pg_hint_plan.so | |
62 | -install -d %{buildroot}%{_datadir}/extension | |
63 | -install -m 644 pg_hint_plan--1.1.2.sql %{buildroot}%{_datadir}/extension/pg_hint_plan--1.1.2.sql | |
64 | -install -m 644 pg_hint_plan.control %{buildroot}%{_datadir}/extension/pg_hint_plan.control | |
65 | - | |
66 | -%clean | |
67 | -rm -rf %{buildroot} | |
68 | - | |
69 | -%files | |
70 | -%defattr(0755,root,root) | |
71 | -%{_libdir}/pg_hint_plan.so | |
72 | -%defattr(0644,root,root) | |
73 | -%{_datadir}/extension/pg_hint_plan--1.1.2.sql | |
74 | -%{_datadir}/extension/pg_hint_plan.control | |
75 | - | |
76 | -# History of pg_hint_plan. | |
77 | -%changelog | |
78 | -* Thu Dec 17 2014 Kyotaro Horiguchi | |
79 | -- Support 9.4. New rev 1.1.2. | |
80 | -* Mon Sep 02 2013 Takashi Suzuki | |
81 | -- Initial cut for 1.1.0 | |
82 | -* Mon Sep 24 2012 Shigeru Hanada <shigeru.hanada@gmail.com> | |
83 | -- Initial cut for 1.0.0 | |
84 | - |
@@ -20,7 +20,7 @@ | ||
20 | 20 | * mark_dummy_rel() |
21 | 21 | * restriction_is_constant_false() |
22 | 22 | * |
23 | - * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group | |
23 | + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group | |
24 | 24 | * Portions Copyright (c) 1994, Regents of the University of California |
25 | 25 | * |
26 | 26 | *------------------------------------------------------------------------- |
@@ -215,9 +215,6 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, | ||
215 | 215 | add_path(rel, (Path *) |
216 | 216 | create_append_path(rel, subpaths, required_outer)); |
217 | 217 | } |
218 | - | |
219 | - /* Select cheapest paths */ | |
220 | - set_cheapest(rel); | |
221 | 218 | } |
222 | 219 | |
223 | 220 | /* |
@@ -720,7 +717,7 @@ join_search_one_level(PlannerInfo *root, int level) | ||
720 | 717 | */ |
721 | 718 | if (joinrels[level] == NIL && |
722 | 719 | root->join_info_list == NIL && |
723 | - root->lateral_info_list == NIL) | |
720 | + !root->hasLateralRTEs) | |
724 | 721 | elog(ERROR, "failed to build any %d-way joins", level); |
725 | 722 | } |
726 | 723 | } |
@@ -819,9 +816,7 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, | ||
819 | 816 | SpecialJoinInfo *match_sjinfo; |
820 | 817 | bool reversed; |
821 | 818 | bool unique_ified; |
822 | - bool is_valid_inner; | |
823 | - bool lateral_fwd; | |
824 | - bool lateral_rev; | |
819 | + bool must_be_leftjoin; | |
825 | 820 | ListCell *l; |
826 | 821 | |
827 | 822 | /* |
@@ -834,12 +829,12 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, | ||
834 | 829 | /* |
835 | 830 | * If we have any special joins, the proposed join might be illegal; and |
836 | 831 | * in any case we have to determine its join type. Scan the join info |
837 | - * list for conflicts. | |
832 | + * list for matches and conflicts. | |
838 | 833 | */ |
839 | 834 | match_sjinfo = NULL; |
840 | 835 | reversed = false; |
841 | 836 | unique_ified = false; |
842 | - is_valid_inner = true; | |
837 | + must_be_leftjoin = false; | |
843 | 838 | |
844 | 839 | foreach(l, root->join_info_list) |
845 | 840 | { |
@@ -890,7 +885,8 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, | ||
890 | 885 | * If one input contains min_lefthand and the other contains |
891 | 886 | * min_righthand, then we can perform the SJ at this join. |
892 | 887 | * |
893 | - * Barf if we get matches to more than one SJ (is that possible?) | |
888 | + * Reject if we get matches to more than one SJ; that implies we're | |
889 | + * considering something that's not really valid. | |
894 | 890 | */ |
895 | 891 | if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && |
896 | 892 | bms_is_subset(sjinfo->min_righthand, rel2->relids)) |
@@ -955,90 +951,168 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, | ||
955 | 951 | } |
956 | 952 | else |
957 | 953 | { |
958 | - /*---------- | |
959 | - * Otherwise, the proposed join overlaps the RHS but isn't | |
960 | - * a valid implementation of this SJ. It might still be | |
961 | - * a legal join, however. If both inputs overlap the RHS, | |
962 | - * assume that it's OK. Since the inputs presumably got past | |
963 | - * this function's checks previously, they can't overlap the | |
964 | - * LHS and their violations of the RHS boundary must represent | |
965 | - * SJs that have been determined to commute with this one. | |
966 | - * We have to allow this to work correctly in cases like | |
967 | - * (a LEFT JOIN (b JOIN (c LEFT JOIN d))) | |
968 | - * when the c/d join has been determined to commute with the join | |
969 | - * to a, and hence d is not part of min_righthand for the upper | |
970 | - * join. It should be legal to join b to c/d but this will appear | |
971 | - * as a violation of the upper join's RHS. | |
972 | - * Furthermore, if one input overlaps the RHS and the other does | |
973 | - * not, we should still allow the join if it is a valid | |
974 | - * implementation of some other SJ. We have to allow this to | |
975 | - * support the associative identity | |
976 | - * (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab | |
977 | - * since joining B directly to C violates the lower SJ's RHS. | |
978 | - * We assume that make_outerjoininfo() set things up correctly | |
979 | - * so that we'll only match to some SJ if the join is valid. | |
980 | - * Set flag here to check at bottom of loop. | |
981 | - *---------- | |
954 | + /* | |
955 | + * Otherwise, the proposed join overlaps the RHS but isn't a valid | |
956 | + * implementation of this SJ. But don't panic quite yet: the RHS | |
957 | + * violation might have occurred previously, in one or both input | |
958 | + * relations, in which case we must have previously decided that | |
959 | + * it was OK to commute some other SJ with this one. If we need | |
960 | + * to perform this join to finish building up the RHS, rejecting | |
961 | + * it could lead to not finding any plan at all. (This can occur | |
962 | + * because of the heuristics elsewhere in this file that postpone | |
963 | + * clauseless joins: we might not consider doing a clauseless join | |
964 | + * within the RHS until after we've performed other, validly | |
965 | + * commutable SJs with one or both sides of the clauseless join.) | |
966 | + * This consideration boils down to the rule that if both inputs | |
967 | + * overlap the RHS, we can allow the join --- they are either | |
968 | + * fully within the RHS, or represent previously-allowed joins to | |
969 | + * rels outside it. | |
982 | 970 | */ |
983 | - if (sjinfo->jointype != JOIN_SEMI && | |
984 | - bms_overlap(rel1->relids, sjinfo->min_righthand) && | |
971 | + if (bms_overlap(rel1->relids, sjinfo->min_righthand) && | |
985 | 972 | bms_overlap(rel2->relids, sjinfo->min_righthand)) |
986 | - { | |
987 | - /* seems OK */ | |
988 | - Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand)); | |
989 | - } | |
990 | - else | |
991 | - is_valid_inner = false; | |
973 | + continue; /* assume valid previous violation of RHS */ | |
974 | + | |
975 | + /* | |
976 | + * The proposed join could still be legal, but only if we're | |
977 | + * allowed to associate it into the RHS of this SJ. That means | |
978 | + * this SJ must be a LEFT join (not SEMI or ANTI, and certainly | |
979 | + * not FULL) and the proposed join must not overlap the LHS. | |
980 | + */ | |
981 | + if (sjinfo->jointype != JOIN_LEFT || | |
982 | + bms_overlap(joinrelids, sjinfo->min_lefthand)) | |
983 | + return false; /* invalid join path */ | |
984 | + | |
985 | + /* | |
986 | + * To be valid, the proposed join must be a LEFT join; otherwise | |
987 | + * it can't associate into this SJ's RHS. But we may not yet have | |
988 | + * found the SpecialJoinInfo matching the proposed join, so we | |
989 | + * can't test that yet. Remember the requirement for later. | |
990 | + */ | |
991 | + must_be_leftjoin = true; | |
992 | 992 | } |
993 | 993 | } |
994 | 994 | |
995 | 995 | /* |
996 | - * Fail if violated some SJ's RHS and didn't match to another SJ. However, | |
997 | - * "matching" to a semijoin we are implementing by unique-ification | |
998 | - * doesn't count (think: it's really an inner join). | |
996 | + * Fail if violated any SJ's RHS and didn't match to a LEFT SJ: the | |
997 | + * proposed join can't associate into an SJ's RHS. | |
998 | + * | |
999 | + * Also, fail if the proposed join's predicate isn't strict; we're | |
1000 | + * essentially checking to see if we can apply outer-join identity 3, and | |
1001 | + * that's a requirement. (This check may be redundant with checks in | |
1002 | + * make_outerjoininfo, but I'm not quite sure, and it's cheap to test.) | |
999 | 1003 | */ |
1000 | - if (!is_valid_inner && | |
1001 | - (match_sjinfo == NULL || unique_ified)) | |
1004 | + if (must_be_leftjoin && | |
1005 | + (match_sjinfo == NULL || | |
1006 | + match_sjinfo->jointype != JOIN_LEFT || | |
1007 | + !match_sjinfo->lhs_strict)) | |
1002 | 1008 | return false; /* invalid join path */ |
1003 | 1009 | |
1004 | 1010 | /* |
1005 | 1011 | * We also have to check for constraints imposed by LATERAL references. |
1006 | - * The proposed rels could each contain lateral references to the other, | |
1007 | - * in which case the join is impossible. If there are lateral references | |
1008 | - * in just one direction, then the join has to be done with a nestloop | |
1009 | - * with the lateral referencer on the inside. If the join matches an SJ | |
1010 | - * that cannot be implemented by such a nestloop, the join is impossible. | |
1011 | 1012 | */ |
1012 | - lateral_fwd = lateral_rev = false; | |
1013 | - foreach(l, root->lateral_info_list) | |
1013 | + if (root->hasLateralRTEs) | |
1014 | 1014 | { |
1015 | - LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l); | |
1015 | + bool lateral_fwd; | |
1016 | + bool lateral_rev; | |
1017 | + Relids join_lateral_rels; | |
1016 | 1018 | |
1017 | - if (bms_is_subset(ljinfo->lateral_rhs, rel2->relids) && | |
1018 | - bms_overlap(ljinfo->lateral_lhs, rel1->relids)) | |
1019 | + /* | |
1020 | + * The proposed rels could each contain lateral references to the | |
1021 | + * other, in which case the join is impossible. If there are lateral | |
1022 | + * references in just one direction, then the join has to be done with | |
1023 | + * a nestloop with the lateral referencer on the inside. If the join | |
1024 | + * matches an SJ that cannot be implemented by such a nestloop, the | |
1025 | + * join is impossible. | |
1026 | + * | |
1027 | + * Also, if the lateral reference is only indirect, we should reject | |
1028 | + * the join; whatever rel(s) the reference chain goes through must be | |
1029 | + * joined to first. | |
1030 | + * | |
1031 | + * Another case that might keep us from building a valid plan is the | |
1032 | + * implementation restriction described by have_dangerous_phv(). | |
1033 | + */ | |
1034 | + lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids); | |
1035 | + lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids); | |
1036 | + if (lateral_fwd && lateral_rev) | |
1037 | + return false; /* have lateral refs in both directions */ | |
1038 | + if (lateral_fwd) | |
1019 | 1039 | { |
1020 | 1040 | /* has to be implemented as nestloop with rel1 on left */ |
1021 | - if (lateral_rev) | |
1022 | - return false; /* have lateral refs in both directions */ | |
1023 | - lateral_fwd = true; | |
1024 | - if (!bms_is_subset(ljinfo->lateral_lhs, rel1->relids)) | |
1025 | - return false; /* rel1 can't compute the required parameter */ | |
1026 | 1041 | if (match_sjinfo && |
1027 | - (reversed || match_sjinfo->jointype == JOIN_FULL)) | |
1042 | + (reversed || | |
1043 | + unique_ified || | |
1044 | + match_sjinfo->jointype == JOIN_FULL)) | |
1028 | 1045 | return false; /* not implementable as nestloop */ |
1046 | + /* check there is a direct reference from rel2 to rel1 */ | |
1047 | + if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids)) | |
1048 | + return false; /* only indirect refs, so reject */ | |
1049 | + /* check we won't have a dangerous PHV */ | |
1050 | + if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids)) | |
1051 | + return false; /* might be unable to handle required PHV */ | |
1029 | 1052 | } |
1030 | - if (bms_is_subset(ljinfo->lateral_rhs, rel1->relids) && | |
1031 | - bms_overlap(ljinfo->lateral_lhs, rel2->relids)) | |
1053 | + else if (lateral_rev) | |
1032 | 1054 | { |
1033 | 1055 | /* has to be implemented as nestloop with rel2 on left */ |
1034 | - if (lateral_fwd) | |
1035 | - return false; /* have lateral refs in both directions */ | |
1036 | - lateral_rev = true; | |
1037 | - if (!bms_is_subset(ljinfo->lateral_lhs, rel2->relids)) | |
1038 | - return false; /* rel2 can't compute the required parameter */ | |
1039 | 1056 | if (match_sjinfo && |
1040 | - (!reversed || match_sjinfo->jointype == JOIN_FULL)) | |
1057 | + (!reversed || | |
1058 | + unique_ified || | |
1059 | + match_sjinfo->jointype == JOIN_FULL)) | |
1041 | 1060 | return false; /* not implementable as nestloop */ |
1061 | + /* check there is a direct reference from rel1 to rel2 */ | |
1062 | + if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids)) | |
1063 | + return false; /* only indirect refs, so reject */ | |
1064 | + /* check we won't have a dangerous PHV */ | |
1065 | + if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids)) | |
1066 | + return false; /* might be unable to handle required PHV */ | |
1067 | + } | |
1068 | + | |
1069 | + /* | |
1070 | + * LATERAL references could also cause problems later on if we accept | |
1071 | + * this join: if the join's minimum parameterization includes any rels | |
1072 | + * that would have to be on the inside of an outer join with this join | |
1073 | + * rel, then it's never going to be possible to build the complete | |
1074 | + * query using this join. We should reject this join not only because | |
1075 | + * it'll save work, but because if we don't, the clauseless-join | |
1076 | + * heuristics might think that legality of this join means that some | |
1077 | + * other join rel need not be formed, and that could lead to failure | |
1078 | + * to find any plan at all. We have to consider not only rels that | |
1079 | + * are directly on the inner side of an OJ with the joinrel, but also | |
1080 | + * ones that are indirectly so, so search to find all such rels. | |
1081 | + */ | |
1082 | + join_lateral_rels = min_join_parameterization(root, joinrelids, | |
1083 | + rel1, rel2); | |
1084 | + if (join_lateral_rels) | |
1085 | + { | |
1086 | + Relids join_plus_rhs = bms_copy(joinrelids); | |
1087 | + bool more; | |
1088 | + | |
1089 | + do | |
1090 | + { | |
1091 | + more = false; | |
1092 | + foreach(l, root->join_info_list) | |
1093 | + { | |
1094 | + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l); | |
1095 | + | |
1096 | + if (bms_overlap(sjinfo->min_lefthand, join_plus_rhs) && | |
1097 | + !bms_is_subset(sjinfo->min_righthand, join_plus_rhs)) | |
1098 | + { | |
1099 | + join_plus_rhs = bms_add_members(join_plus_rhs, | |
1100 | + sjinfo->min_righthand); | |
1101 | + more = true; | |
1102 | + } | |
1103 | + /* full joins constrain both sides symmetrically */ | |
1104 | + if (sjinfo->jointype == JOIN_FULL && | |
1105 | + bms_overlap(sjinfo->min_righthand, join_plus_rhs) && | |
1106 | + !bms_is_subset(sjinfo->min_lefthand, join_plus_rhs)) | |
1107 | + { | |
1108 | + join_plus_rhs = bms_add_members(join_plus_rhs, | |
1109 | + sjinfo->min_lefthand); | |
1110 | + more = true; | |
1111 | + } | |
1112 | + } | |
1113 | + } while (more); | |
1114 | + if (bms_overlap(join_plus_rhs, join_lateral_rels)) | |
1115 | + return false; /* will not be able to join to some RHS rel */ | |
1042 | 1116 | } |
1043 | 1117 | } |
1044 | 1118 |
@@ -1052,7 +1126,7 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, | ||
1052 | 1126 | * has_join_restriction |
1053 | 1127 | * Detect whether the specified relation has join-order restrictions, |
1054 | 1128 | * due to being inside an outer join or an IN (sub-SELECT), |
1055 | - * or participating in any LATERAL references. | |
1129 | + * or participating in any LATERAL references or multi-rel PHVs. | |
1056 | 1130 | * |
1057 | 1131 | * Essentially, this tests whether have_join_order_restriction() could |
1058 | 1132 | * succeed with this rel and some other one. It's OK if we sometimes |
@@ -1064,12 +1138,15 @@ has_join_restriction(PlannerInfo *root, RelOptInfo *rel) | ||
1064 | 1138 | { |
1065 | 1139 | ListCell *l; |
1066 | 1140 | |
1067 | - foreach(l, root->lateral_info_list) | |
1141 | + if (rel->lateral_relids != NULL || rel->lateral_referencers != NULL) | |
1142 | + return true; | |
1143 | + | |
1144 | + foreach(l, root->placeholder_list) | |
1068 | 1145 | { |
1069 | - LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l); | |
1146 | + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); | |
1070 | 1147 | |
1071 | - if (bms_is_subset(ljinfo->lateral_rhs, rel->relids) || | |
1072 | - bms_overlap(ljinfo->lateral_lhs, rel->relids)) | |
1148 | + if (bms_is_subset(rel->relids, phinfo->ph_eval_at) && | |
1149 | + !bms_equal(rel->relids, phinfo->ph_eval_at)) | |
1073 | 1150 | return true; |
1074 | 1151 | } |
1075 | 1152 |
@@ -3016,7 +3016,7 @@ error hint: | ||
3016 | 3016 | -> Seq Scan on t2 |
3017 | 3017 | (5 rows) |
3018 | 3018 | |
3019 | --- inherite table test | |
3019 | +-- inheritance tables test | |
3020 | 3020 | SET constraint_exclusion TO off; |
3021 | 3021 | EXPLAIN (COSTS false) SELECT * FROM p1 WHERE id >= 50 AND id <= 51 AND p1.ctid = '(1,1)'; |
3022 | 3022 | QUERY PLAN |
@@ -7983,7 +7983,7 @@ duplication hint: | ||
7983 | 7983 | error hint: |
7984 | 7984 | |
7985 | 7985 | CONTEXT: SQL statement "/*+ SeqScan(t1) */ SELECT * FROM t1" |
7986 | -PL/pgSQL function testfunc() line 3 at EXECUTE statement | |
7986 | +PL/pgSQL function testfunc() line 3 at EXECUTE | |
7987 | 7987 | testfunc |
7988 | 7988 | ---------- |
7989 | 7989 |
@@ -1,4 +1,4 @@ | ||
1 | -/* pg_hint_plan/pg_hint_plan--1.1.2.sql */ | |
1 | +/* pg_hint_plan/pg_hint_plan--1.1.3.sql */ | |
2 | 2 | |
3 | 3 | -- complain if script is sourced in psql, rather than via CREATE EXTENSION |
4 | 4 | \echo Use "CREATE EXTENSION pg_hint_plan" to load this file. \quit |
@@ -3785,6 +3785,8 @@ rebuild_scan_path(HintState *hstate, PlannerInfo *root, int level, | ||
3785 | 3785 | { |
3786 | 3786 | set_plain_rel_pathlist(root, rel, rte); |
3787 | 3787 | } |
3788 | + | |
3789 | + set_cheapest(rel); | |
3788 | 3790 | } |
3789 | 3791 | |
3790 | 3792 | /* |
@@ -1,6 +1,6 @@ | ||
1 | 1 | # pg_hint_plan extension |
2 | 2 | |
3 | 3 | comment = '' |
4 | -default_version = '1.1.2' | |
4 | +default_version = '1.1.3' | |
5 | 5 | relocatable = false |
6 | 6 | schema = hint_plan |
@@ -1,2148 +1,28 @@ | ||
1 | 1 | /*------------------------------------------------------------------------- |
2 | 2 | * |
3 | 3 | * pg_stat_statements.c |
4 | - * Track statement execution times across a whole database cluster. | |
4 | + * | |
5 | + * Part of pg_stat_statements.c in PostgreSQL 9.5. | |
5 | 6 | * |
6 | - * Execution costs are totalled for each distinct source query, and kept in | |
7 | - * a shared hashtable. (We track only as many distinct queries as will fit | |
8 | - * in the designated amount of shared memory.) | |
7 | + * Copyright (c) 2008-2015, PostgreSQL Global Development Group | |
9 | 8 | * |
10 | - * As of Postgres 9.2, this module normalizes query entries. Normalization | |
11 | - * is a process whereby similar queries, typically differing only in their | |
12 | - * constants (though the exact rules are somewhat more subtle than that) are | |
13 | - * recognized as equivalent, and are tracked as a single entry. This is | |
14 | - * particularly useful for non-prepared queries. | |
15 | - * | |
16 | - * Normalization is implemented by fingerprinting queries, selectively | |
17 | - * serializing those fields of each query tree's nodes that are judged to be | |
18 | - * essential to the query. This is referred to as a query jumble. This is | |
19 | - * distinct from a regular serialization in that various extraneous | |
20 | - * information is ignored as irrelevant or not essential to the query, such | |
21 | - * as the collations of Vars and, most notably, the values of constants. | |
22 | - * | |
23 | - * This jumble is acquired at the end of parse analysis of each query, and | |
24 | - * a 32-bit hash of it is stored into the query's Query.queryId field. | |
25 | - * The server then copies this value around, making it available in plan | |
26 | - * tree(s) generated from the query. The executor can then use this value | |
27 | - * to blame query costs on the proper queryId. | |
28 | - * | |
29 | - * To facilitate presenting entries to users, we create "representative" query | |
30 | - * strings in which constants are replaced with '?' characters, to make it | |
31 | - * clearer what a normalized entry can represent. To save on shared memory, | |
32 | - * and to avoid having to truncate oversized query strings, we store these | |
33 | - * strings in a temporary external query-texts file. Offsets into this | |
34 | - * file are kept in shared memory. | |
35 | - * | |
36 | - * Note about locking issues: to create or delete an entry in the shared | |
37 | - * hashtable, one must hold pgss->lock exclusively. Modifying any field | |
38 | - * in an entry except the counters requires the same. To look up an entry, | |
39 | - * one must hold the lock shared. To read or update the counters within | |
40 | - * an entry, one must hold the lock shared or exclusive (so the entry doesn't | |
41 | - * disappear!) and also take the entry's mutex spinlock. | |
42 | - * The shared state variable pgss->extent (the next free spot in the external | |
43 | - * query-text file) should be accessed only while holding either the | |
44 | - * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to | |
45 | - * allow reserving file space while holding only shared lock on pgss->lock. | |
46 | - * Rewriting the entire external query-text file, eg for garbage collection, | |
47 | - * requires holding pgss->lock exclusively; this allows individual entries | |
48 | - * in the file to be read or written while holding only shared lock. | |
49 | - * | |
50 | - * | |
51 | - * Copyright (c) 2008-2014, PostgreSQL Global Development Group | |
52 | - * | |
53 | - * IDENTIFICATION | |
54 | - * contrib/pg_stat_statements/pg_stat_statements.c | |
55 | - * | |
56 | - *------------------------------------------------------------------------- | |
57 | - */ | |
58 | -#include "postgres.h" | |
59 | - | |
60 | -#include <sys/stat.h> | |
61 | - | |
62 | -#ifdef NOT_USED | |
63 | -#include <unistd.h> | |
64 | -#endif | |
65 | - | |
66 | -#include "access/hash.h" | |
67 | -#ifdef NOT_USED | |
68 | -#include "executor/instrument.h" | |
69 | -#include "funcapi.h" | |
70 | -#include "mb/pg_wchar.h" | |
71 | -#include "miscadmin.h" | |
72 | -#include "parser/analyze.h" | |
73 | -#include "parser/parsetree.h" | |
74 | -#endif | |
75 | -#include "parser/scanner.h" | |
76 | -#ifdef NOT_USED | |
77 | -#include "pgstat.h" | |
78 | -#include "storage/fd.h" | |
79 | -#include "storage/ipc.h" | |
80 | -#include "storage/spin.h" | |
81 | -#include "tcop/utility.h" | |
82 | -#include "utils/builtins.h" | |
83 | -#include "utils/memutils.h" | |
84 | - | |
85 | -PG_MODULE_MAGIC; | |
86 | - | |
87 | -/* Location of permanent stats file (valid when database is shut down) */ | |
88 | -#define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat" | |
89 | - | |
90 | -/* | |
91 | - * Location of external query text file. We don't keep it in the core | |
92 | - * system's stats_temp_directory. The core system can safely use that GUC | |
93 | - * setting, because the statistics collector temp file paths are set only once | |
94 | - * as part of changing the GUC, but pg_stat_statements has no way of avoiding | |
95 | - * race conditions. Besides, we only expect modest, infrequent I/O for query | |
96 | - * strings, so placing the file on a faster filesystem is not compelling. | |
97 | - */ | |
98 | -#define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat" | |
99 | - | |
100 | -/* Magic number identifying the stats file format */ | |
101 | -static const uint32 PGSS_FILE_HEADER = 0x20140125; | |
102 | - | |
103 | -/* PostgreSQL major version number, changes in which invalidate all entries */ | |
104 | -static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; | |
105 | - | |
106 | -/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */ | |
107 | -#define USAGE_EXEC(duration) (1.0) | |
108 | -#define USAGE_INIT (1.0) /* including initial planning */ | |
109 | -#define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */ | |
110 | -#define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */ | |
111 | -#define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */ | |
112 | -#define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */ | |
113 | -#define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */ | |
114 | - | |
115 | -#define JUMBLE_SIZE 1024 /* query serialization buffer size */ | |
116 | - | |
117 | -/* | |
118 | - * Extension version number, for supporting older extension versions' objects | |
119 | - */ | |
120 | -typedef enum pgssVersion | |
121 | -{ | |
122 | - PGSS_V1_0 = 0, | |
123 | - PGSS_V1_1, | |
124 | - PGSS_V1_2 | |
125 | -} pgssVersion; | |
126 | - | |
127 | -/* | |
128 | - * Hashtable key that defines the identity of a hashtable entry. We separate | |
129 | - * queries by user and by database even if they are otherwise identical. | |
130 | - */ | |
131 | -typedef struct pgssHashKey | |
132 | -{ | |
133 | - Oid userid; /* user OID */ | |
134 | - Oid dbid; /* database OID */ | |
135 | - uint32 queryid; /* query identifier */ | |
136 | -} pgssHashKey; | |
137 | - | |
138 | -/* | |
139 | - * The actual stats counters kept within pgssEntry. | |
140 | - */ | |
141 | -typedef struct Counters | |
142 | -{ | |
143 | - int64 calls; /* # of times executed */ | |
144 | - double total_time; /* total execution time, in msec */ | |
145 | - int64 rows; /* total # of retrieved or affected rows */ | |
146 | - int64 shared_blks_hit; /* # of shared buffer hits */ | |
147 | - int64 shared_blks_read; /* # of shared disk blocks read */ | |
148 | - int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */ | |
149 | - int64 shared_blks_written; /* # of shared disk blocks written */ | |
150 | - int64 local_blks_hit; /* # of local buffer hits */ | |
151 | - int64 local_blks_read; /* # of local disk blocks read */ | |
152 | - int64 local_blks_dirtied; /* # of local disk blocks dirtied */ | |
153 | - int64 local_blks_written; /* # of local disk blocks written */ | |
154 | - int64 temp_blks_read; /* # of temp blocks read */ | |
155 | - int64 temp_blks_written; /* # of temp blocks written */ | |
156 | - double blk_read_time; /* time spent reading, in msec */ | |
157 | - double blk_write_time; /* time spent writing, in msec */ | |
158 | - double usage; /* usage factor */ | |
159 | -} Counters; | |
160 | - | |
161 | -/* | |
162 | - * Statistics per statement | |
163 | - * | |
164 | - * Note: in event of a failure in garbage collection of the query text file, | |
165 | - * we reset query_offset to zero and query_len to -1. This will be seen as | |
166 | - * an invalid state by qtext_fetch(). | |
167 | - */ | |
168 | -typedef struct pgssEntry | |
169 | -{ | |
170 | - pgssHashKey key; /* hash key of entry - MUST BE FIRST */ | |
171 | - Counters counters; /* the statistics for this query */ | |
172 | - Size query_offset; /* query text offset in external file */ | |
173 | - int query_len; /* # of valid bytes in query string */ | |
174 | - int encoding; /* query text encoding */ | |
175 | - slock_t mutex; /* protects the counters only */ | |
176 | -} pgssEntry; | |
177 | - | |
178 | -/* | |
179 | - * Global shared state | |
180 | - */ | |
181 | -typedef struct pgssSharedState | |
182 | -{ | |
183 | - LWLock *lock; /* protects hashtable search/modification */ | |
184 | - double cur_median_usage; /* current median usage in hashtable */ | |
185 | - Size mean_query_len; /* current mean entry text length */ | |
186 | - slock_t mutex; /* protects following fields only: */ | |
187 | - Size extent; /* current extent of query file */ | |
188 | - int n_writers; /* number of active writers to query file */ | |
189 | - int gc_count; /* query file garbage collection cycle count */ | |
190 | -} pgssSharedState; | |
191 | - | |
192 | -/* | |
193 | - * Struct for tracking locations/lengths of constants during normalization | |
194 | - */ | |
195 | -typedef struct pgssLocationLen | |
196 | -{ | |
197 | - int location; /* start offset in query text */ | |
198 | - int length; /* length in bytes, or -1 to ignore */ | |
199 | -} pgssLocationLen; | |
200 | - | |
201 | -/* | |
202 | - * Working state for computing a query jumble and producing a normalized | |
203 | - * query string | |
204 | - */ | |
205 | -typedef struct pgssJumbleState | |
206 | -{ | |
207 | - /* Jumble of current query tree */ | |
208 | - unsigned char *jumble; | |
209 | - | |
210 | - /* Number of bytes used in jumble[] */ | |
211 | - Size jumble_len; | |
212 | - | |
213 | - /* Array of locations of constants that should be removed */ | |
214 | - pgssLocationLen *clocations; | |
215 | - | |
216 | - /* Allocated length of clocations array */ | |
217 | - int clocations_buf_size; | |
218 | - | |
219 | - /* Current number of valid entries in clocations array */ | |
220 | - int clocations_count; | |
221 | -} pgssJumbleState; | |
222 | - | |
223 | -/*---- Local variables ----*/ | |
224 | - | |
225 | -/* Current nesting depth of ExecutorRun+ProcessUtility calls */ | |
226 | -static int nested_level = 0; | |
227 | - | |
228 | -/* Saved hook values in case of unload */ | |
229 | -static shmem_startup_hook_type prev_shmem_startup_hook = NULL; | |
230 | -static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL; | |
231 | -static ExecutorStart_hook_type prev_ExecutorStart = NULL; | |
232 | -static ExecutorRun_hook_type prev_ExecutorRun = NULL; | |
233 | -static ExecutorFinish_hook_type prev_ExecutorFinish = NULL; | |
234 | -static ExecutorEnd_hook_type prev_ExecutorEnd = NULL; | |
235 | -static ProcessUtility_hook_type prev_ProcessUtility = NULL; | |
236 | - | |
237 | -/* Links to shared memory state */ | |
238 | -static pgssSharedState *pgss = NULL; | |
239 | -static HTAB *pgss_hash = NULL; | |
240 | - | |
241 | -/*---- GUC variables ----*/ | |
242 | - | |
243 | -typedef enum | |
244 | -{ | |
245 | - PGSS_TRACK_NONE, /* track no statements */ | |
246 | - PGSS_TRACK_TOP, /* only top level statements */ | |
247 | - PGSS_TRACK_ALL /* all statements, including nested ones */ | |
248 | -} PGSSTrackLevel; | |
249 | - | |
250 | -static const struct config_enum_entry track_options[] = | |
251 | -{ | |
252 | - {"none", PGSS_TRACK_NONE, false}, | |
253 | - {"top", PGSS_TRACK_TOP, false}, | |
254 | - {"all", PGSS_TRACK_ALL, false}, | |
255 | - {NULL, 0, false} | |
256 | -}; | |
257 | - | |
258 | -static int pgss_max; /* max # statements to track */ | |
259 | -static int pgss_track; /* tracking level */ | |
260 | -static bool pgss_track_utility; /* whether to track utility commands */ | |
261 | -static bool pgss_save; /* whether to save stats across shutdown */ | |
262 | - | |
263 | - | |
264 | -#define pgss_enabled() \ | |
265 | - (pgss_track == PGSS_TRACK_ALL || \ | |
266 | - (pgss_track == PGSS_TRACK_TOP && nested_level == 0)) | |
267 | - | |
268 | -#define record_gc_qtexts() \ | |
269 | - do { \ | |
270 | - volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; \ | |
271 | - SpinLockAcquire(&s->mutex); \ | |
272 | - s->gc_count++; \ | |
273 | - SpinLockRelease(&s->mutex); \ | |
274 | - } while(0) | |
275 | - | |
276 | -/*---- Function declarations ----*/ | |
277 | - | |
278 | -void _PG_init(void); | |
279 | -void _PG_fini(void); | |
280 | - | |
281 | -PG_FUNCTION_INFO_V1(pg_stat_statements_reset); | |
282 | -PG_FUNCTION_INFO_V1(pg_stat_statements_1_2); | |
283 | -PG_FUNCTION_INFO_V1(pg_stat_statements); | |
284 | - | |
285 | -static void pgss_shmem_startup(void); | |
286 | -static void pgss_shmem_shutdown(int code, Datum arg); | |
287 | -static void pgss_post_parse_analyze(ParseState *pstate, Query *query); | |
288 | -static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags); | |
289 | -static void pgss_ExecutorRun(QueryDesc *queryDesc, | |
290 | - ScanDirection direction, | |
291 | - long count); | |
292 | -static void pgss_ExecutorFinish(QueryDesc *queryDesc); | |
293 | -static void pgss_ExecutorEnd(QueryDesc *queryDesc); | |
294 | -static void pgss_ProcessUtility(Node *parsetree, const char *queryString, | |
295 | - ProcessUtilityContext context, ParamListInfo params, | |
296 | - DestReceiver *dest, char *completionTag); | |
297 | -static uint32 pgss_hash_fn(const void *key, Size keysize); | |
298 | -static int pgss_match_fn(const void *key1, const void *key2, Size keysize); | |
299 | -static uint32 pgss_hash_string(const char *str); | |
300 | -static void pgss_store(const char *query, uint32 queryId, | |
301 | - double total_time, uint64 rows, | |
302 | - const BufferUsage *bufusage, | |
303 | - pgssJumbleState *jstate); | |
304 | -static void pg_stat_statements_internal(FunctionCallInfo fcinfo, | |
305 | - pgssVersion api_version, | |
306 | - bool showtext); | |
307 | -static Size pgss_memsize(void); | |
308 | -static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len, | |
309 | - int encoding, bool sticky); | |
310 | -static void entry_dealloc(void); | |
311 | -static bool qtext_store(const char *query, int query_len, | |
312 | - Size *query_offset, int *gc_count); | |
313 | -static char *qtext_load_file(Size *buffer_size); | |
314 | -static char *qtext_fetch(Size query_offset, int query_len, | |
315 | - char *buffer, Size buffer_size); | |
316 | -static bool need_gc_qtexts(void); | |
317 | -static void gc_qtexts(void); | |
318 | -static void entry_reset(void); | |
319 | -#endif | |
320 | -static void AppendJumble(pgssJumbleState *jstate, | |
321 | - const unsigned char *item, Size size); | |
322 | -static void JumbleQuery(pgssJumbleState *jstate, Query *query); | |
323 | -static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable); | |
324 | -static void JumbleExpr(pgssJumbleState *jstate, Node *node); | |
325 | -static void RecordConstLocation(pgssJumbleState *jstate, int location); | |
326 | -#ifdef NOT_USED | |
327 | -static char *generate_normalized_query(pgssJumbleState *jstate, const char *query, | |
328 | - int *query_len_p, int encoding); | |
329 | -#endif | |
330 | -static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query); | |
331 | -static int comp_location(const void *a, const void *b); | |
332 | - | |
333 | - | |
334 | -#ifdef NOT_USED | |
335 | -/* | |
336 | - * Module load callback | |
337 | - */ | |
338 | -void | |
339 | -_PG_init(void) | |
340 | -{ | |
341 | - /* | |
342 | - * In order to create our shared memory area, we have to be loaded via | |
343 | - * shared_preload_libraries. If not, fall out without hooking into any of | |
344 | - * the main system. (We don't throw error here because it seems useful to | |
345 | - * allow the pg_stat_statements functions to be created even when the | |
346 | - * module isn't active. The functions must protect themselves against | |
347 | - * being called then, however.) | |
348 | - */ | |
349 | - if (!process_shared_preload_libraries_in_progress) | |
350 | - return; | |
351 | - | |
352 | - /* | |
353 | - * Define (or redefine) custom GUC variables. | |
354 | - */ | |
355 | - DefineCustomIntVariable("pg_stat_statements.max", | |
356 | - "Sets the maximum number of statements tracked by pg_stat_statements.", | |
357 | - NULL, | |
358 | - &pgss_max, | |
359 | - 5000, | |
360 | - 100, | |
361 | - INT_MAX, | |
362 | - PGC_POSTMASTER, | |
363 | - 0, | |
364 | - NULL, | |
365 | - NULL, | |
366 | - NULL); | |
367 | - | |
368 | - DefineCustomEnumVariable("pg_stat_statements.track", | |
369 | - "Selects which statements are tracked by pg_stat_statements.", | |
370 | - NULL, | |
371 | - &pgss_track, | |
372 | - PGSS_TRACK_TOP, | |
373 | - track_options, | |
374 | - PGC_SUSET, | |
375 | - 0, | |
376 | - NULL, | |
377 | - NULL, | |
378 | - NULL); | |
379 | - | |
380 | - DefineCustomBoolVariable("pg_stat_statements.track_utility", | |
381 | - "Selects whether utility commands are tracked by pg_stat_statements.", | |
382 | - NULL, | |
383 | - &pgss_track_utility, | |
384 | - true, | |
385 | - PGC_SUSET, | |
386 | - 0, | |
387 | - NULL, | |
388 | - NULL, | |
389 | - NULL); | |
390 | - | |
391 | - DefineCustomBoolVariable("pg_stat_statements.save", | |
392 | - "Save pg_stat_statements statistics across server shutdowns.", | |
393 | - NULL, | |
394 | - &pgss_save, | |
395 | - true, | |
396 | - PGC_SIGHUP, | |
397 | - 0, | |
398 | - NULL, | |
399 | - NULL, | |
400 | - NULL); | |
401 | - | |
402 | - EmitWarningsOnPlaceholders("pg_stat_statements"); | |
403 | - | |
404 | - /* | |
405 | - * Request additional shared resources. (These are no-ops if we're not in | |
406 | - * the postmaster process.) We'll allocate or attach to the shared | |
407 | - * resources in pgss_shmem_startup(). | |
408 | - */ | |
409 | - RequestAddinShmemSpace(pgss_memsize()); | |
410 | - RequestAddinLWLocks(1); | |
411 | - | |
412 | - /* | |
413 | - * Install hooks. | |
414 | - */ | |
415 | - prev_shmem_startup_hook = shmem_startup_hook; | |
416 | - shmem_startup_hook = pgss_shmem_startup; | |
417 | - prev_post_parse_analyze_hook = post_parse_analyze_hook; | |
418 | - post_parse_analyze_hook = pgss_post_parse_analyze; | |
419 | - prev_ExecutorStart = ExecutorStart_hook; | |
420 | - ExecutorStart_hook = pgss_ExecutorStart; | |
421 | - prev_ExecutorRun = ExecutorRun_hook; | |
422 | - ExecutorRun_hook = pgss_ExecutorRun; | |
423 | - prev_ExecutorFinish = ExecutorFinish_hook; | |
424 | - ExecutorFinish_hook = pgss_ExecutorFinish; | |
425 | - prev_ExecutorEnd = ExecutorEnd_hook; | |
426 | - ExecutorEnd_hook = pgss_ExecutorEnd; | |
427 | - prev_ProcessUtility = ProcessUtility_hook; | |
428 | - ProcessUtility_hook = pgss_ProcessUtility; | |
429 | -} | |
430 | - | |
431 | -/* | |
432 | - * Module unload callback | |
433 | - */ | |
434 | -void | |
435 | -_PG_fini(void) | |
436 | -{ | |
437 | - /* Uninstall hooks. */ | |
438 | - shmem_startup_hook = prev_shmem_startup_hook; | |
439 | - post_parse_analyze_hook = prev_post_parse_analyze_hook; | |
440 | - ExecutorStart_hook = prev_ExecutorStart; | |
441 | - ExecutorRun_hook = prev_ExecutorRun; | |
442 | - ExecutorFinish_hook = prev_ExecutorFinish; | |
443 | - ExecutorEnd_hook = prev_ExecutorEnd; | |
444 | - ProcessUtility_hook = prev_ProcessUtility; | |
445 | -} | |
446 | - | |
447 | -/* | |
448 | - * shmem_startup hook: allocate or attach to shared memory, | |
449 | - * then load any pre-existing statistics from file. | |
450 | - * Also create and load the query-texts file, which is expected to exist | |
451 | - * (even if empty) while the module is enabled. | |
452 | - */ | |
453 | -static void | |
454 | -pgss_shmem_startup(void) | |
455 | -{ | |
456 | - bool found; | |
457 | - HASHCTL info; | |
458 | - FILE *file = NULL; | |
459 | - FILE *qfile = NULL; | |
460 | - uint32 header; | |
461 | - int32 num; | |
462 | - int32 pgver; | |
463 | - int32 i; | |
464 | - int buffer_size; | |
465 | - char *buffer = NULL; | |
466 | - | |
467 | - if (prev_shmem_startup_hook) | |
468 | - prev_shmem_startup_hook(); | |
469 | - | |
470 | - /* reset in case this is a restart within the postmaster */ | |
471 | - pgss = NULL; | |
472 | - pgss_hash = NULL; | |
473 | - | |
474 | - /* | |
475 | - * Create or attach to the shared memory state, including hash table | |
476 | - */ | |
477 | - LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); | |
478 | - | |
479 | - pgss = ShmemInitStruct("pg_stat_statements", | |
480 | - sizeof(pgssSharedState), | |
481 | - &found); | |
482 | - | |
483 | - if (!found) | |
484 | - { | |
485 | - /* First time through ... */ | |
486 | - pgss->lock = LWLockAssign(); | |
487 | - pgss->cur_median_usage = ASSUMED_MEDIAN_INIT; | |
488 | - pgss->mean_query_len = ASSUMED_LENGTH_INIT; | |
489 | - SpinLockInit(&pgss->mutex); | |
490 | - pgss->extent = 0; | |
491 | - pgss->n_writers = 0; | |
492 | - pgss->gc_count = 0; | |
493 | - } | |
494 | - | |
495 | - memset(&info, 0, sizeof(info)); | |
496 | - info.keysize = sizeof(pgssHashKey); | |
497 | - info.entrysize = sizeof(pgssEntry); | |
498 | - info.hash = pgss_hash_fn; | |
499 | - info.match = pgss_match_fn; | |
500 | - pgss_hash = ShmemInitHash("pg_stat_statements hash", | |
501 | - pgss_max, pgss_max, | |
502 | - &info, | |
503 | - HASH_ELEM | HASH_FUNCTION | HASH_COMPARE); | |
504 | - | |
505 | - LWLockRelease(AddinShmemInitLock); | |
506 | - | |
507 | - /* | |
508 | - * If we're in the postmaster (or a standalone backend...), set up a shmem | |
509 | - * exit hook to dump the statistics to disk. | |
510 | - */ | |
511 | - if (!IsUnderPostmaster) | |
512 | - on_shmem_exit(pgss_shmem_shutdown, (Datum) 0); | |
513 | - | |
514 | - /* | |
515 | - * Done if some other process already completed our initialization. | |
516 | - */ | |
517 | - if (found) | |
518 | - return; | |
519 | - | |
520 | - /* | |
521 | - * Note: we don't bother with locks here, because there should be no other | |
522 | - * processes running when this code is reached. | |
523 | - */ | |
524 | - | |
525 | - /* Unlink query text file possibly left over from crash */ | |
526 | - unlink(PGSS_TEXT_FILE); | |
527 | - | |
528 | - /* Allocate new query text temp file */ | |
529 | - qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W); | |
530 | - if (qfile == NULL) | |
531 | - goto write_error; | |
532 | - | |
533 | - /* | |
534 | - * If we were told not to load old statistics, we're done. (Note we do | |
535 | - * not try to unlink any old dump file in this case. This seems a bit | |
536 | - * questionable but it's the historical behavior.) | |
537 | - */ | |
538 | - if (!pgss_save) | |
539 | - { | |
540 | - FreeFile(qfile); | |
541 | - return; | |
542 | - } | |
543 | - | |
544 | - /* | |
545 | - * Attempt to load old statistics from the dump file. | |
546 | - */ | |
547 | - file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R); | |
548 | - if (file == NULL) | |
549 | - { | |
550 | - if (errno != ENOENT) | |
551 | - goto read_error; | |
552 | - /* No existing persisted stats file, so we're done */ | |
553 | - FreeFile(qfile); | |
554 | - return; | |
555 | - } | |
556 | - | |
557 | - buffer_size = 2048; | |
558 | - buffer = (char *) palloc(buffer_size); | |
559 | - | |
560 | - if (fread(&header, sizeof(uint32), 1, file) != 1 || | |
561 | - fread(&pgver, sizeof(uint32), 1, file) != 1 || | |
562 | - fread(&num, sizeof(int32), 1, file) != 1) | |
563 | - goto read_error; | |
564 | - | |
565 | - if (header != PGSS_FILE_HEADER || | |
566 | - pgver != PGSS_PG_MAJOR_VERSION) | |
567 | - goto data_error; | |
568 | - | |
569 | - for (i = 0; i < num; i++) | |
570 | - { | |
571 | - pgssEntry temp; | |
572 | - pgssEntry *entry; | |
573 | - Size query_offset; | |
574 | - | |
575 | - if (fread(&temp, sizeof(pgssEntry), 1, file) != 1) | |
576 | - goto read_error; | |
577 | - | |
578 | - /* Encoding is the only field we can easily sanity-check */ | |
579 | - if (!PG_VALID_BE_ENCODING(temp.encoding)) | |
580 | - goto data_error; | |
581 | - | |
582 | - /* Resize buffer as needed */ | |
583 | - if (temp.query_len >= buffer_size) | |
584 | - { | |
585 | - buffer_size = Max(buffer_size * 2, temp.query_len + 1); | |
586 | - buffer = repalloc(buffer, buffer_size); | |
587 | - } | |
588 | - | |
589 | - if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1) | |
590 | - goto read_error; | |
591 | - | |
592 | - /* Should have a trailing null, but let's make sure */ | |
593 | - buffer[temp.query_len] = '\0'; | |
594 | - | |
595 | - /* Skip loading "sticky" entries */ | |
596 | - if (temp.counters.calls == 0) | |
597 | - continue; | |
598 | - | |
599 | - /* Store the query text */ | |
600 | - query_offset = pgss->extent; | |
601 | - if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1) | |
602 | - goto write_error; | |
603 | - pgss->extent += temp.query_len + 1; | |
604 | - | |
605 | - /* make the hashtable entry (discards old entries if too many) */ | |
606 | - entry = entry_alloc(&temp.key, query_offset, temp.query_len, | |
607 | - temp.encoding, | |
608 | - false); | |
609 | - | |
610 | - /* copy in the actual stats */ | |
611 | - entry->counters = temp.counters; | |
612 | - } | |
613 | - | |
614 | - pfree(buffer); | |
615 | - FreeFile(file); | |
616 | - FreeFile(qfile); | |
617 | - | |
618 | - /* | |
619 | - * Remove the persisted stats file so it's not included in | |
620 | - * backups/replication slaves, etc. A new file will be written on next | |
621 | - * shutdown. | |
622 | - * | |
623 | - * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup, | |
624 | - * because we remove that file on startup; it acts inversely to | |
625 | - * PGSS_DUMP_FILE, in that it is only supposed to be around when the | |
626 | - * server is running, whereas PGSS_DUMP_FILE is only supposed to be around | |
627 | - * when the server is not running. Leaving the file creates no danger of | |
628 | - * a newly restored database having a spurious record of execution costs, | |
629 | - * which is what we're really concerned about here. | |
630 | - */ | |
631 | - unlink(PGSS_DUMP_FILE); | |
632 | - | |
633 | - return; | |
634 | - | |
635 | -read_error: | |
636 | - ereport(LOG, | |
637 | - (errcode_for_file_access(), | |
638 | - errmsg("could not read pg_stat_statement file \"%s\": %m", | |
639 | - PGSS_DUMP_FILE))); | |
640 | - goto fail; | |
641 | -data_error: | |
642 | - ereport(LOG, | |
643 | - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), | |
644 | - errmsg("ignoring invalid data in pg_stat_statement file \"%s\"", | |
645 | - PGSS_DUMP_FILE))); | |
646 | - goto fail; | |
647 | -write_error: | |
648 | - ereport(LOG, | |
649 | - (errcode_for_file_access(), | |
650 | - errmsg("could not write pg_stat_statement file \"%s\": %m", | |
651 | - PGSS_TEXT_FILE))); | |
652 | -fail: | |
653 | - if (buffer) | |
654 | - pfree(buffer); | |
655 | - if (file) | |
656 | - FreeFile(file); | |
657 | - if (qfile) | |
658 | - FreeFile(qfile); | |
659 | - /* If possible, throw away the bogus file; ignore any error */ | |
660 | - unlink(PGSS_DUMP_FILE); | |
661 | - | |
662 | - /* | |
663 | - * Don't unlink PGSS_TEXT_FILE here; it should always be around while the | |
664 | - * server is running with pg_stat_statements enabled | |
665 | - */ | |
666 | -} | |
667 | - | |
668 | -/* | |
669 | - * shmem_shutdown hook: Dump statistics into file. | |
670 | - * | |
671 | - * Note: we don't bother with acquiring lock, because there should be no | |
672 | - * other processes running when this is called. | |
673 | - */ | |
674 | -static void | |
675 | -pgss_shmem_shutdown(int code, Datum arg) | |
676 | -{ | |
677 | - FILE *file; | |
678 | - char *qbuffer = NULL; | |
679 | - Size qbuffer_size = 0; | |
680 | - HASH_SEQ_STATUS hash_seq; | |
681 | - int32 num_entries; | |
682 | - pgssEntry *entry; | |
683 | - | |
684 | - /* Don't try to dump during a crash. */ | |
685 | - if (code) | |
686 | - return; | |
687 | - | |
688 | - /* Safety check ... shouldn't get here unless shmem is set up. */ | |
689 | - if (!pgss || !pgss_hash) | |
690 | - return; | |
691 | - | |
692 | - /* Don't dump if told not to. */ | |
693 | - if (!pgss_save) | |
694 | - return; | |
695 | - | |
696 | - file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W); | |
697 | - if (file == NULL) | |
698 | - goto error; | |
699 | - | |
700 | - if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1) | |
701 | - goto error; | |
702 | - if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1) | |
703 | - goto error; | |
704 | - num_entries = hash_get_num_entries(pgss_hash); | |
705 | - if (fwrite(&num_entries, sizeof(int32), 1, file) != 1) | |
706 | - goto error; | |
707 | - | |
708 | - qbuffer = qtext_load_file(&qbuffer_size); | |
709 | - if (qbuffer == NULL) | |
710 | - goto error; | |
711 | - | |
712 | - /* | |
713 | - * When serializing to disk, we store query texts immediately after their | |
714 | - * entry data. Any orphaned query texts are thereby excluded. | |
715 | - */ | |
716 | - hash_seq_init(&hash_seq, pgss_hash); | |
717 | - while ((entry = hash_seq_search(&hash_seq)) != NULL) | |
718 | - { | |
719 | - int len = entry->query_len; | |
720 | - char *qstr = qtext_fetch(entry->query_offset, len, | |
721 | - qbuffer, qbuffer_size); | |
722 | - | |
723 | - if (qstr == NULL) | |
724 | - continue; /* Ignore any entries with bogus texts */ | |
725 | - | |
726 | - if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 || | |
727 | - fwrite(qstr, 1, len + 1, file) != len + 1) | |
728 | - { | |
729 | - /* note: we assume hash_seq_term won't change errno */ | |
730 | - hash_seq_term(&hash_seq); | |
731 | - goto error; | |
732 | - } | |
733 | - } | |
734 | - | |
735 | - free(qbuffer); | |
736 | - qbuffer = NULL; | |
737 | - | |
738 | - if (FreeFile(file)) | |
739 | - { | |
740 | - file = NULL; | |
741 | - goto error; | |
742 | - } | |
743 | - | |
744 | - /* | |
745 | - * Rename file into place, so we atomically replace any old one. | |
746 | - */ | |
747 | - if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0) | |
748 | - ereport(LOG, | |
749 | - (errcode_for_file_access(), | |
750 | - errmsg("could not rename pg_stat_statement file \"%s\": %m", | |
751 | - PGSS_DUMP_FILE ".tmp"))); | |
752 | - | |
753 | - /* Unlink query-texts file; it's not needed while shutdown */ | |
754 | - unlink(PGSS_TEXT_FILE); | |
755 | - | |
756 | - return; | |
757 | - | |
758 | -error: | |
759 | - ereport(LOG, | |
760 | - (errcode_for_file_access(), | |
761 | - errmsg("could not write pg_stat_statement file \"%s\": %m", | |
762 | - PGSS_DUMP_FILE ".tmp"))); | |
763 | - if (qbuffer) | |
764 | - free(qbuffer); | |
765 | - if (file) | |
766 | - FreeFile(file); | |
767 | - unlink(PGSS_DUMP_FILE ".tmp"); | |
768 | - unlink(PGSS_TEXT_FILE); | |
769 | -} | |
770 | - | |
771 | -/* | |
772 | - * Post-parse-analysis hook: mark query with a queryId | |
773 | - */ | |
774 | -static void | |
775 | -pgss_post_parse_analyze(ParseState *pstate, Query *query) | |
776 | -{ | |
777 | - pgssJumbleState jstate; | |
778 | - | |
779 | - if (prev_post_parse_analyze_hook) | |
780 | - prev_post_parse_analyze_hook(pstate, query); | |
781 | - | |
782 | - /* Assert we didn't do this already */ | |
783 | - Assert(query->queryId == 0); | |
784 | - | |
785 | - /* Safety check... */ | |
786 | - if (!pgss || !pgss_hash) | |
787 | - return; | |
788 | - | |
789 | - /* | |
790 | - * Utility statements get queryId zero. We do this even in cases where | |
791 | - * the statement contains an optimizable statement for which a queryId | |
792 | - * could be derived (such as EXPLAIN or DECLARE CURSOR). For such cases, | |
793 | - * runtime control will first go through ProcessUtility and then the | |
794 | - * executor, and we don't want the executor hooks to do anything, since we | |
795 | - * are already measuring the statement's costs at the utility level. | |
796 | - */ | |
797 | - if (query->utilityStmt) | |
798 | - { | |
799 | - query->queryId = 0; | |
800 | - return; | |
801 | - } | |
802 | - | |
803 | - /* Set up workspace for query jumbling */ | |
804 | - jstate.jumble = (unsigned char *) palloc(JUMBLE_SIZE); | |
805 | - jstate.jumble_len = 0; | |
806 | - jstate.clocations_buf_size = 32; | |
807 | - jstate.clocations = (pgssLocationLen *) | |
808 | - palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen)); | |
809 | - jstate.clocations_count = 0; | |
810 | - | |
811 | - /* Compute query ID and mark the Query node with it */ | |
812 | - JumbleQuery(&jstate, query); | |
813 | - query->queryId = hash_any(jstate.jumble, jstate.jumble_len); | |
814 | - | |
815 | - /* | |
816 | - * If we are unlucky enough to get a hash of zero, use 1 instead, to | |
817 | - * prevent confusion with the utility-statement case. | |
818 | - */ | |
819 | - if (query->queryId == 0) | |
820 | - query->queryId = 1; | |
821 | - | |
822 | - /* | |
823 | - * If we were able to identify any ignorable constants, we immediately | |
824 | - * create a hash table entry for the query, so that we can record the | |
825 | - * normalized form of the query string. If there were no such constants, | |
826 | - * the normalized string would be the same as the query text anyway, so | |
827 | - * there's no need for an early entry. | |
828 | - */ | |
829 | - if (jstate.clocations_count > 0) | |
830 | - pgss_store(pstate->p_sourcetext, | |
831 | - query->queryId, | |
832 | - 0, | |
833 | - 0, | |
834 | - NULL, | |
835 | - &jstate); | |
836 | -} | |
837 | - | |
838 | -/* | |
839 | - * ExecutorStart hook: start up tracking if needed | |
840 | - */ | |
841 | -static void | |
842 | -pgss_ExecutorStart(QueryDesc *queryDesc, int eflags) | |
843 | -{ | |
844 | - if (prev_ExecutorStart) | |
845 | - prev_ExecutorStart(queryDesc, eflags); | |
846 | - else | |
847 | - standard_ExecutorStart(queryDesc, eflags); | |
848 | - | |
849 | - /* | |
850 | - * If query has queryId zero, don't track it. This prevents double | |
851 | - * counting of optimizable statements that are directly contained in | |
852 | - * utility statements. | |
853 | - */ | |
854 | - if (pgss_enabled() && queryDesc->plannedstmt->queryId != 0) | |
855 | - { | |
856 | - /* | |
857 | - * Set up to track total elapsed time in ExecutorRun. Make sure the | |
858 | - * space is allocated in the per-query context so it will go away at | |
859 | - * ExecutorEnd. | |
860 | - */ | |
861 | - if (queryDesc->totaltime == NULL) | |
862 | - { | |
863 | - MemoryContext oldcxt; | |
864 | - | |
865 | - oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt); | |
866 | - queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL); | |
867 | - MemoryContextSwitchTo(oldcxt); | |
868 | - } | |
869 | - } | |
870 | -} | |
871 | - | |
872 | -/* | |
873 | - * ExecutorRun hook: all we need do is track nesting depth | |
874 | - */ | |
875 | -static void | |
876 | -pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count) | |
877 | -{ | |
878 | - nested_level++; | |
879 | - PG_TRY(); | |
880 | - { | |
881 | - if (prev_ExecutorRun) | |
882 | - prev_ExecutorRun(queryDesc, direction, count); | |
883 | - else | |
884 | - standard_ExecutorRun(queryDesc, direction, count); | |
885 | - nested_level--; | |
886 | - } | |
887 | - PG_CATCH(); | |
888 | - { | |
889 | - nested_level--; | |
890 | - PG_RE_THROW(); | |
891 | - } | |
892 | - PG_END_TRY(); | |
893 | -} | |
894 | - | |
895 | -/* | |
896 | - * ExecutorFinish hook: all we need do is track nesting depth | |
897 | - */ | |
898 | -static void | |
899 | -pgss_ExecutorFinish(QueryDesc *queryDesc) | |
900 | -{ | |
901 | - nested_level++; | |
902 | - PG_TRY(); | |
903 | - { | |
904 | - if (prev_ExecutorFinish) | |
905 | - prev_ExecutorFinish(queryDesc); | |
906 | - else | |
907 | - standard_ExecutorFinish(queryDesc); | |
908 | - nested_level--; | |
909 | - } | |
910 | - PG_CATCH(); | |
911 | - { | |
912 | - nested_level--; | |
913 | - PG_RE_THROW(); | |
914 | - } | |
915 | - PG_END_TRY(); | |
916 | -} | |
917 | - | |
918 | -/* | |
919 | - * ExecutorEnd hook: store results if needed | |
920 | - */ | |
921 | -static void | |
922 | -pgss_ExecutorEnd(QueryDesc *queryDesc) | |
923 | -{ | |
924 | - uint32 queryId = queryDesc->plannedstmt->queryId; | |
925 | - | |
926 | - if (queryId != 0 && queryDesc->totaltime && pgss_enabled()) | |
927 | - { | |
928 | - /* | |
929 | - * Make sure stats accumulation is done. (Note: it's okay if several | |
930 | - * levels of hook all do this.) | |
931 | - */ | |
932 | - InstrEndLoop(queryDesc->totaltime); | |
933 | - | |
934 | - pgss_store(queryDesc->sourceText, | |
935 | - queryId, | |
936 | - queryDesc->totaltime->total * 1000.0, /* convert to msec */ | |
937 | - queryDesc->estate->es_processed, | |
938 | - &queryDesc->totaltime->bufusage, | |
939 | - NULL); | |
940 | - } | |
941 | - | |
942 | - if (prev_ExecutorEnd) | |
943 | - prev_ExecutorEnd(queryDesc); | |
944 | - else | |
945 | - standard_ExecutorEnd(queryDesc); | |
946 | -} | |
947 | - | |
948 | -/* | |
949 | - * ProcessUtility hook | |
950 | - */ | |
951 | -static void | |
952 | -pgss_ProcessUtility(Node *parsetree, const char *queryString, | |
953 | - ProcessUtilityContext context, ParamListInfo params, | |
954 | - DestReceiver *dest, char *completionTag) | |
955 | -{ | |
956 | - /* | |
957 | - * If it's an EXECUTE statement, we don't track it and don't increment the | |
958 | - * nesting level. This allows the cycles to be charged to the underlying | |
959 | - * PREPARE instead (by the Executor hooks), which is much more useful. | |
960 | - * | |
961 | - * We also don't track execution of PREPARE. If we did, we would get one | |
962 | - * hash table entry for the PREPARE (with hash calculated from the query | |
963 | - * string), and then a different one with the same query string (but hash | |
964 | - * calculated from the query tree) would be used to accumulate costs of | |
965 | - * ensuing EXECUTEs. This would be confusing, and inconsistent with other | |
966 | - * cases where planning time is not included at all. | |
967 | - * | |
968 | - * Likewise, we don't track execution of DEALLOCATE. | |
969 | - */ | |
970 | - if (pgss_track_utility && pgss_enabled() && | |
971 | - !IsA(parsetree, ExecuteStmt) && | |
972 | - !IsA(parsetree, PrepareStmt) && | |
973 | - !IsA(parsetree, DeallocateStmt)) | |
974 | - { | |
975 | - instr_time start; | |
976 | - instr_time duration; | |
977 | - uint64 rows; | |
978 | - BufferUsage bufusage_start, | |
979 | - bufusage; | |
980 | - uint32 queryId; | |
981 | - | |
982 | - bufusage_start = pgBufferUsage; | |
983 | - INSTR_TIME_SET_CURRENT(start); | |
984 | - | |
985 | - nested_level++; | |
986 | - PG_TRY(); | |
987 | - { | |
988 | - if (prev_ProcessUtility) | |
989 | - prev_ProcessUtility(parsetree, queryString, | |
990 | - context, params, | |
991 | - dest, completionTag); | |
992 | - else | |
993 | - standard_ProcessUtility(parsetree, queryString, | |
994 | - context, params, | |
995 | - dest, completionTag); | |
996 | - nested_level--; | |
997 | - } | |
998 | - PG_CATCH(); | |
999 | - { | |
1000 | - nested_level--; | |
1001 | - PG_RE_THROW(); | |
1002 | - } | |
1003 | - PG_END_TRY(); | |
1004 | - | |
1005 | - INSTR_TIME_SET_CURRENT(duration); | |
1006 | - INSTR_TIME_SUBTRACT(duration, start); | |
1007 | - | |
1008 | - /* parse command tag to retrieve the number of affected rows. */ | |
1009 | - if (completionTag && | |
1010 | - strncmp(completionTag, "COPY ", 5) == 0) | |
1011 | - { | |
1012 | -#ifdef HAVE_STRTOULL | |
1013 | - rows = strtoull(completionTag + 5, NULL, 10); | |
1014 | -#else | |
1015 | - rows = strtoul(completionTag + 5, NULL, 10); | |
1016 | -#endif | |
1017 | - } | |
1018 | - else | |
1019 | - rows = 0; | |
1020 | - | |
1021 | - /* calc differences of buffer counters. */ | |
1022 | - bufusage.shared_blks_hit = | |
1023 | - pgBufferUsage.shared_blks_hit - bufusage_start.shared_blks_hit; | |
1024 | - bufusage.shared_blks_read = | |
1025 | - pgBufferUsage.shared_blks_read - bufusage_start.shared_blks_read; | |
1026 | - bufusage.shared_blks_dirtied = | |
1027 | - pgBufferUsage.shared_blks_dirtied - bufusage_start.shared_blks_dirtied; | |
1028 | - bufusage.shared_blks_written = | |
1029 | - pgBufferUsage.shared_blks_written - bufusage_start.shared_blks_written; | |
1030 | - bufusage.local_blks_hit = | |
1031 | - pgBufferUsage.local_blks_hit - bufusage_start.local_blks_hit; | |
1032 | - bufusage.local_blks_read = | |
1033 | - pgBufferUsage.local_blks_read - bufusage_start.local_blks_read; | |
1034 | - bufusage.local_blks_dirtied = | |
1035 | - pgBufferUsage.local_blks_dirtied - bufusage_start.local_blks_dirtied; | |
1036 | - bufusage.local_blks_written = | |
1037 | - pgBufferUsage.local_blks_written - bufusage_start.local_blks_written; | |
1038 | - bufusage.temp_blks_read = | |
1039 | - pgBufferUsage.temp_blks_read - bufusage_start.temp_blks_read; | |
1040 | - bufusage.temp_blks_written = | |
1041 | - pgBufferUsage.temp_blks_written - bufusage_start.temp_blks_written; | |
1042 | - bufusage.blk_read_time = pgBufferUsage.blk_read_time; | |
1043 | - INSTR_TIME_SUBTRACT(bufusage.blk_read_time, bufusage_start.blk_read_time); | |
1044 | - bufusage.blk_write_time = pgBufferUsage.blk_write_time; | |
1045 | - INSTR_TIME_SUBTRACT(bufusage.blk_write_time, bufusage_start.blk_write_time); | |
1046 | - | |
1047 | - /* For utility statements, we just hash the query string directly */ | |
1048 | - queryId = pgss_hash_string(queryString); | |
1049 | - | |
1050 | - pgss_store(queryString, | |
1051 | - queryId, | |
1052 | - INSTR_TIME_GET_MILLISEC(duration), | |
1053 | - rows, | |
1054 | - &bufusage, | |
1055 | - NULL); | |
1056 | - } | |
1057 | - else | |
1058 | - { | |
1059 | - if (prev_ProcessUtility) | |
1060 | - prev_ProcessUtility(parsetree, queryString, | |
1061 | - context, params, | |
1062 | - dest, completionTag); | |
1063 | - else | |
1064 | - standard_ProcessUtility(parsetree, queryString, | |
1065 | - context, params, | |
1066 | - dest, completionTag); | |
1067 | - } | |
1068 | -} | |
1069 | - | |
1070 | -/* | |
1071 | - * Calculate hash value for a key | |
1072 | - */ | |
1073 | -static uint32 | |
1074 | -pgss_hash_fn(const void *key, Size keysize) | |
1075 | -{ | |
1076 | - const pgssHashKey *k = (const pgssHashKey *) key; | |
1077 | - | |
1078 | - return hash_uint32((uint32) k->userid) ^ | |
1079 | - hash_uint32((uint32) k->dbid) ^ | |
1080 | - hash_uint32((uint32) k->queryid); | |
1081 | -} | |
1082 | - | |
1083 | -/* | |
1084 | - * Compare two keys - zero means match | |
1085 | - */ | |
1086 | -static int | |
1087 | -pgss_match_fn(const void *key1, const void *key2, Size keysize) | |
1088 | -{ | |
1089 | - const pgssHashKey *k1 = (const pgssHashKey *) key1; | |
1090 | - const pgssHashKey *k2 = (const pgssHashKey *) key2; | |
1091 | - | |
1092 | - if (k1->userid == k2->userid && | |
1093 | - k1->dbid == k2->dbid && | |
1094 | - k1->queryid == k2->queryid) | |
1095 | - return 0; | |
1096 | - else | |
1097 | - return 1; | |
1098 | -} | |
1099 | - | |
1100 | -/* | |
1101 | - * Given an arbitrarily long query string, produce a hash for the purposes of | |
1102 | - * identifying the query, without normalizing constants. Used when hashing | |
1103 | - * utility statements. | |
1104 | - */ | |
1105 | -static uint32 | |
1106 | -pgss_hash_string(const char *str) | |
1107 | -{ | |
1108 | - return hash_any((const unsigned char *) str, strlen(str)); | |
1109 | -} | |
1110 | - | |
1111 | -/* | |
1112 | - * Store some statistics for a statement. | |
1113 | - * | |
1114 | - * If jstate is not NULL then we're trying to create an entry for which | |
1115 | - * we have no statistics as yet; we just want to record the normalized | |
1116 | - * query string. total_time, rows, bufusage are ignored in this case. | |
1117 | - */ | |
1118 | -static void | |
1119 | -pgss_store(const char *query, uint32 queryId, | |
1120 | - double total_time, uint64 rows, | |
1121 | - const BufferUsage *bufusage, | |
1122 | - pgssJumbleState *jstate) | |
1123 | -{ | |
1124 | - pgssHashKey key; | |
1125 | - pgssEntry *entry; | |
1126 | - char *norm_query = NULL; | |
1127 | - int encoding = GetDatabaseEncoding(); | |
1128 | - int query_len; | |
1129 | - | |
1130 | - Assert(query != NULL); | |
1131 | - | |
1132 | - /* Safety check... */ | |
1133 | - if (!pgss || !pgss_hash) | |
1134 | - return; | |
1135 | - | |
1136 | - query_len = strlen(query); | |
1137 | - | |
1138 | - /* Set up key for hashtable search */ | |
1139 | - key.userid = GetUserId(); | |
1140 | - key.dbid = MyDatabaseId; | |
1141 | - key.queryid = queryId; | |
1142 | - | |
1143 | - /* Lookup the hash table entry with shared lock. */ | |
1144 | - LWLockAcquire(pgss->lock, LW_SHARED); | |
1145 | - | |
1146 | - entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL); | |
1147 | - | |
1148 | - /* Create new entry, if not present */ | |
1149 | - if (!entry) | |
1150 | - { | |
1151 | - Size query_offset; | |
1152 | - int gc_count; | |
1153 | - bool stored; | |
1154 | - bool do_gc; | |
1155 | - | |
1156 | - /* | |
1157 | - * Create a new, normalized query string if caller asked. We don't | |
1158 | - * need to hold the lock while doing this work. (Note: in any case, | |
1159 | - * it's possible that someone else creates a duplicate hashtable entry | |
1160 | - * in the interval where we don't hold the lock below. That case is | |
1161 | - * handled by entry_alloc.) | |
1162 | - */ | |
1163 | - if (jstate) | |
1164 | - { | |
1165 | - LWLockRelease(pgss->lock); | |
1166 | - norm_query = generate_normalized_query(jstate, query, | |
1167 | - &query_len, | |
1168 | - encoding); | |
1169 | - LWLockAcquire(pgss->lock, LW_SHARED); | |
1170 | - } | |
1171 | - | |
1172 | - /* Append new query text to file with only shared lock held */ | |
1173 | - stored = qtext_store(norm_query ? norm_query : query, query_len, | |
1174 | - &query_offset, &gc_count); | |
1175 | - | |
1176 | - /* | |
1177 | - * Determine whether we need to garbage collect external query texts | |
1178 | - * while the shared lock is still held. This micro-optimization | |
1179 | - * avoids taking the time to decide this while holding exclusive lock. | |
1180 | - */ | |
1181 | - do_gc = need_gc_qtexts(); | |
1182 | - | |
1183 | - /* Need exclusive lock to make a new hashtable entry - promote */ | |
1184 | - LWLockRelease(pgss->lock); | |
1185 | - LWLockAcquire(pgss->lock, LW_EXCLUSIVE); | |
1186 | - | |
1187 | - /* | |
1188 | - * A garbage collection may have occurred while we weren't holding the | |
1189 | - * lock. In the unlikely event that this happens, the query text we | |
1190 | - * stored above will have been garbage collected, so write it again. | |
1191 | - * This should be infrequent enough that doing it while holding | |
1192 | - * exclusive lock isn't a performance problem. | |
1193 | - */ | |
1194 | - if (!stored || pgss->gc_count != gc_count) | |
1195 | - stored = qtext_store(norm_query ? norm_query : query, query_len, | |
1196 | - &query_offset, NULL); | |
1197 | - | |
1198 | - /* If we failed to write to the text file, give up */ | |
1199 | - if (!stored) | |
1200 | - goto done; | |
1201 | - | |
1202 | - /* OK to create a new hashtable entry */ | |
1203 | - entry = entry_alloc(&key, query_offset, query_len, encoding, | |
1204 | - jstate != NULL); | |
1205 | - | |
1206 | - /* If needed, perform garbage collection while exclusive lock held */ | |
1207 | - if (do_gc) | |
1208 | - gc_qtexts(); | |
1209 | - } | |
1210 | - | |
1211 | - /* Increment the counts, except when jstate is not NULL */ | |
1212 | - if (!jstate) | |
1213 | - { | |
1214 | - /* | |
1215 | - * Grab the spinlock while updating the counters (see comment about | |
1216 | - * locking rules at the head of the file) | |
1217 | - */ | |
1218 | - volatile pgssEntry *e = (volatile pgssEntry *) entry; | |
1219 | - | |
1220 | - SpinLockAcquire(&e->mutex); | |
1221 | - | |
1222 | - /* "Unstick" entry if it was previously sticky */ | |
1223 | - if (e->counters.calls == 0) | |
1224 | - e->counters.usage = USAGE_INIT; | |
1225 | - | |
1226 | - e->counters.calls += 1; | |
1227 | - e->counters.total_time += total_time; | |
1228 | - e->counters.rows += rows; | |
1229 | - e->counters.shared_blks_hit += bufusage->shared_blks_hit; | |
1230 | - e->counters.shared_blks_read += bufusage->shared_blks_read; | |
1231 | - e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied; | |
1232 | - e->counters.shared_blks_written += bufusage->shared_blks_written; | |
1233 | - e->counters.local_blks_hit += bufusage->local_blks_hit; | |
1234 | - e->counters.local_blks_read += bufusage->local_blks_read; | |
1235 | - e->counters.local_blks_dirtied += bufusage->local_blks_dirtied; | |
1236 | - e->counters.local_blks_written += bufusage->local_blks_written; | |
1237 | - e->counters.temp_blks_read += bufusage->temp_blks_read; | |
1238 | - e->counters.temp_blks_written += bufusage->temp_blks_written; | |
1239 | - e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time); | |
1240 | - e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time); | |
1241 | - e->counters.usage += USAGE_EXEC(total_time); | |
1242 | - | |
1243 | - SpinLockRelease(&e->mutex); | |
1244 | - } | |
1245 | - | |
1246 | -done: | |
1247 | - LWLockRelease(pgss->lock); | |
1248 | - | |
1249 | - /* We postpone this clean-up until we're out of the lock */ | |
1250 | - if (norm_query) | |
1251 | - pfree(norm_query); | |
1252 | -} | |
1253 | - | |
1254 | -/* | |
1255 | - * Reset all statement statistics. | |
1256 | - */ | |
1257 | -Datum | |
1258 | -pg_stat_statements_reset(PG_FUNCTION_ARGS) | |
1259 | -{ | |
1260 | - if (!pgss || !pgss_hash) | |
1261 | - ereport(ERROR, | |
1262 | - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), | |
1263 | - errmsg("pg_stat_statements must be loaded via shared_preload_libraries"))); | |
1264 | - entry_reset(); | |
1265 | - PG_RETURN_VOID(); | |
1266 | -} | |
1267 | - | |
1268 | -/* Number of output arguments (columns) for various API versions */ | |
1269 | -#define PG_STAT_STATEMENTS_COLS_V1_0 14 | |
1270 | -#define PG_STAT_STATEMENTS_COLS_V1_1 18 | |
1271 | -#define PG_STAT_STATEMENTS_COLS_V1_2 19 | |
1272 | -#define PG_STAT_STATEMENTS_COLS 19 /* maximum of above */ | |
1273 | - | |
1274 | -/* | |
1275 | - * Retrieve statement statistics. | |
1276 | - * | |
1277 | - * The SQL API of this function has changed multiple times, and will likely | |
1278 | - * do so again in future. To support the case where a newer version of this | |
1279 | - * loadable module is being used with an old SQL declaration of the function, | |
1280 | - * we continue to support the older API versions. For 1.2 and later, the | |
1281 | - * expected API version is identified by embedding it in the C name of the | |
1282 | - * function. Unfortunately we weren't bright enough to do that for 1.1. | |
1283 | - */ | |
1284 | -Datum | |
1285 | -pg_stat_statements_1_2(PG_FUNCTION_ARGS) | |
1286 | -{ | |
1287 | - bool showtext = PG_GETARG_BOOL(0); | |
1288 | - | |
1289 | - pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext); | |
1290 | - | |
1291 | - return (Datum) 0; | |
1292 | -} | |
1293 | - | |
1294 | -/* | |
1295 | - * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1. | |
1296 | - * This can be removed someday, perhaps. | |
1297 | - */ | |
1298 | -Datum | |
1299 | -pg_stat_statements(PG_FUNCTION_ARGS) | |
1300 | -{ | |
1301 | - /* If it's really API 1.1, we'll figure that out below */ | |
1302 | - pg_stat_statements_internal(fcinfo, PGSS_V1_0, true); | |
1303 | - | |
1304 | - return (Datum) 0; | |
1305 | -} | |
1306 | - | |
1307 | -/* Common code for all versions of pg_stat_statements() */ | |
1308 | -static void | |
1309 | -pg_stat_statements_internal(FunctionCallInfo fcinfo, | |
1310 | - pgssVersion api_version, | |
1311 | - bool showtext) | |
1312 | -{ | |
1313 | - ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; | |
1314 | - TupleDesc tupdesc; | |
1315 | - Tuplestorestate *tupstore; | |
1316 | - MemoryContext per_query_ctx; | |
1317 | - MemoryContext oldcontext; | |
1318 | - Oid userid = GetUserId(); | |
1319 | - bool is_superuser = superuser(); | |
1320 | - char *qbuffer = NULL; | |
1321 | - Size qbuffer_size = 0; | |
1322 | - Size extent = 0; | |
1323 | - int gc_count = 0; | |
1324 | - HASH_SEQ_STATUS hash_seq; | |
1325 | - pgssEntry *entry; | |
1326 | - | |
1327 | - /* hash table must exist already */ | |
1328 | - if (!pgss || !pgss_hash) | |
1329 | - ereport(ERROR, | |
1330 | - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), | |
1331 | - errmsg("pg_stat_statements must be loaded via shared_preload_libraries"))); | |
1332 | - | |
1333 | - /* check to see if caller supports us returning a tuplestore */ | |
1334 | - if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) | |
1335 | - ereport(ERROR, | |
1336 | - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), | |
1337 | - errmsg("set-valued function called in context that cannot accept a set"))); | |
1338 | - if (!(rsinfo->allowedModes & SFRM_Materialize)) | |
1339 | - ereport(ERROR, | |
1340 | - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), | |
1341 | - errmsg("materialize mode required, but it is not " \ | |
1342 | - "allowed in this context"))); | |
1343 | - | |
1344 | - /* Switch into long-lived context to construct returned data structures */ | |
1345 | - per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; | |
1346 | - oldcontext = MemoryContextSwitchTo(per_query_ctx); | |
1347 | - | |
1348 | - /* Build a tuple descriptor for our result type */ | |
1349 | - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) | |
1350 | - elog(ERROR, "return type must be a row type"); | |
1351 | - | |
1352 | - /* | |
1353 | - * Check we have the expected number of output arguments. Aside from | |
1354 | - * being a good safety check, we need a kluge here to detect API version | |
1355 | - * 1.1, which was wedged into the code in an ill-considered way. | |
1356 | - */ | |
1357 | - switch (tupdesc->natts) | |
1358 | - { | |
1359 | - case PG_STAT_STATEMENTS_COLS_V1_0: | |
1360 | - if (api_version != PGSS_V1_0) | |
1361 | - elog(ERROR, "incorrect number of output arguments"); | |
1362 | - break; | |
1363 | - case PG_STAT_STATEMENTS_COLS_V1_1: | |
1364 | - /* pg_stat_statements() should have told us 1.0 */ | |
1365 | - if (api_version != PGSS_V1_0) | |
1366 | - elog(ERROR, "incorrect number of output arguments"); | |
1367 | - api_version = PGSS_V1_1; | |
1368 | - break; | |
1369 | - case PG_STAT_STATEMENTS_COLS_V1_2: | |
1370 | - if (api_version != PGSS_V1_2) | |
1371 | - elog(ERROR, "incorrect number of output arguments"); | |
1372 | - break; | |
1373 | - default: | |
1374 | - elog(ERROR, "incorrect number of output arguments"); | |
1375 | - } | |
1376 | - | |
1377 | - tupstore = tuplestore_begin_heap(true, false, work_mem); | |
1378 | - rsinfo->returnMode = SFRM_Materialize; | |
1379 | - rsinfo->setResult = tupstore; | |
1380 | - rsinfo->setDesc = tupdesc; | |
1381 | - | |
1382 | - MemoryContextSwitchTo(oldcontext); | |
1383 | - | |
1384 | - /* | |
1385 | - * We'd like to load the query text file (if needed) while not holding any | |
1386 | - * lock on pgss->lock. In the worst case we'll have to do this again | |
1387 | - * after we have the lock, but it's unlikely enough to make this a win | |
1388 | - * despite occasional duplicated work. We need to reload if anybody | |
1389 | - * writes to the file (either a retail qtext_store(), or a garbage | |
1390 | - * collection) between this point and where we've gotten shared lock. If | |
1391 | - * a qtext_store is actually in progress when we look, we might as well | |
1392 | - * skip the speculative load entirely. | |
1393 | - */ | |
1394 | - if (showtext) | |
1395 | - { | |
1396 | - int n_writers; | |
1397 | - | |
1398 | - /* Take the mutex so we can examine variables */ | |
1399 | - { | |
1400 | - volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; | |
1401 | - | |
1402 | - SpinLockAcquire(&s->mutex); | |
1403 | - extent = s->extent; | |
1404 | - n_writers = s->n_writers; | |
1405 | - gc_count = s->gc_count; | |
1406 | - SpinLockRelease(&s->mutex); | |
1407 | - } | |
1408 | - | |
1409 | - /* No point in loading file now if there are active writers */ | |
1410 | - if (n_writers == 0) | |
1411 | - qbuffer = qtext_load_file(&qbuffer_size); | |
1412 | - } | |
1413 | - | |
1414 | - /* | |
1415 | - * Get shared lock, load or reload the query text file if we must, and | |
1416 | - * iterate over the hashtable entries. | |
1417 | - * | |
1418 | - * With a large hash table, we might be holding the lock rather longer | |
1419 | - * than one could wish. However, this only blocks creation of new hash | |
1420 | - * table entries, and the larger the hash table the less likely that is to | |
1421 | - * be needed. So we can hope this is okay. Perhaps someday we'll decide | |
1422 | - * we need to partition the hash table to limit the time spent holding any | |
1423 | - * one lock. | |
1424 | - */ | |
1425 | - LWLockAcquire(pgss->lock, LW_SHARED); | |
1426 | - | |
1427 | - if (showtext) | |
1428 | - { | |
1429 | - /* | |
1430 | - * Here it is safe to examine extent and gc_count without taking the | |
1431 | - * mutex. Note that although other processes might change | |
1432 | - * pgss->extent just after we look at it, the strings they then write | |
1433 | - * into the file cannot yet be referenced in the hashtable, so we | |
1434 | - * don't care whether we see them or not. | |
1435 | - * | |
1436 | - * If qtext_load_file fails, we just press on; we'll return NULL for | |
1437 | - * every query text. | |
1438 | - */ | |
1439 | - if (qbuffer == NULL || | |
1440 | - pgss->extent != extent || | |
1441 | - pgss->gc_count != gc_count) | |
1442 | - { | |
1443 | - if (qbuffer) | |
1444 | - free(qbuffer); | |
1445 | - qbuffer = qtext_load_file(&qbuffer_size); | |
1446 | - } | |
1447 | - } | |
1448 | - | |
1449 | - hash_seq_init(&hash_seq, pgss_hash); | |
1450 | - while ((entry = hash_seq_search(&hash_seq)) != NULL) | |
1451 | - { | |
1452 | - Datum values[PG_STAT_STATEMENTS_COLS]; | |
1453 | - bool nulls[PG_STAT_STATEMENTS_COLS]; | |
1454 | - int i = 0; | |
1455 | - Counters tmp; | |
1456 | - int64 queryid = entry->key.queryid; | |
1457 | - | |
1458 | - memset(values, 0, sizeof(values)); | |
1459 | - memset(nulls, 0, sizeof(nulls)); | |
1460 | - | |
1461 | - values[i++] = ObjectIdGetDatum(entry->key.userid); | |
1462 | - values[i++] = ObjectIdGetDatum(entry->key.dbid); | |
1463 | - | |
1464 | - if (is_superuser || entry->key.userid == userid) | |
1465 | - { | |
1466 | - if (api_version >= PGSS_V1_2) | |
1467 | - values[i++] = Int64GetDatumFast(queryid); | |
1468 | - | |
1469 | - if (showtext) | |
1470 | - { | |
1471 | - char *qstr = qtext_fetch(entry->query_offset, | |
1472 | - entry->query_len, | |
1473 | - qbuffer, | |
1474 | - qbuffer_size); | |
1475 | - | |
1476 | - if (qstr) | |
1477 | - { | |
1478 | - char *enc; | |
1479 | - | |
1480 | - enc = pg_any_to_server(qstr, | |
1481 | - entry->query_len, | |
1482 | - entry->encoding); | |
1483 | - | |
1484 | - values[i++] = CStringGetTextDatum(enc); | |
1485 | - | |
1486 | - if (enc != qstr) | |
1487 | - pfree(enc); | |
1488 | - } | |
1489 | - else | |
1490 | - { | |
1491 | - /* Just return a null if we fail to find the text */ | |
1492 | - nulls[i++] = true; | |
1493 | - } | |
1494 | - } | |
1495 | - else | |
1496 | - { | |
1497 | - /* Query text not requested */ | |
1498 | - nulls[i++] = true; | |
1499 | - } | |
1500 | - } | |
1501 | - else | |
1502 | - { | |
1503 | - /* Don't show queryid */ | |
1504 | - if (api_version >= PGSS_V1_2) | |
1505 | - nulls[i++] = true; | |
1506 | - | |
1507 | - /* | |
1508 | - * Don't show query text, but hint as to the reason for not doing | |
1509 | - * so if it was requested | |
1510 | - */ | |
1511 | - if (showtext) | |
1512 | - values[i++] = CStringGetTextDatum("<insufficient privilege>"); | |
1513 | - else | |
1514 | - nulls[i++] = true; | |
1515 | - } | |
1516 | - | |
1517 | - /* copy counters to a local variable to keep locking time short */ | |
1518 | - { | |
1519 | - volatile pgssEntry *e = (volatile pgssEntry *) entry; | |
1520 | - | |
1521 | - SpinLockAcquire(&e->mutex); | |
1522 | - tmp = e->counters; | |
1523 | - SpinLockRelease(&e->mutex); | |
1524 | - } | |
1525 | - | |
1526 | - /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */ | |
1527 | - if (tmp.calls == 0) | |
1528 | - continue; | |
1529 | - | |
1530 | - values[i++] = Int64GetDatumFast(tmp.calls); | |
1531 | - values[i++] = Float8GetDatumFast(tmp.total_time); | |
1532 | - values[i++] = Int64GetDatumFast(tmp.rows); | |
1533 | - values[i++] = Int64GetDatumFast(tmp.shared_blks_hit); | |
1534 | - values[i++] = Int64GetDatumFast(tmp.shared_blks_read); | |
1535 | - if (api_version >= PGSS_V1_1) | |
1536 | - values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied); | |
1537 | - values[i++] = Int64GetDatumFast(tmp.shared_blks_written); | |
1538 | - values[i++] = Int64GetDatumFast(tmp.local_blks_hit); | |
1539 | - values[i++] = Int64GetDatumFast(tmp.local_blks_read); | |
1540 | - if (api_version >= PGSS_V1_1) | |
1541 | - values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied); | |
1542 | - values[i++] = Int64GetDatumFast(tmp.local_blks_written); | |
1543 | - values[i++] = Int64GetDatumFast(tmp.temp_blks_read); | |
1544 | - values[i++] = Int64GetDatumFast(tmp.temp_blks_written); | |
1545 | - if (api_version >= PGSS_V1_1) | |
1546 | - { | |
1547 | - values[i++] = Float8GetDatumFast(tmp.blk_read_time); | |
1548 | - values[i++] = Float8GetDatumFast(tmp.blk_write_time); | |
1549 | - } | |
1550 | - | |
1551 | - Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 : | |
1552 | - api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 : | |
1553 | - api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 : | |
1554 | - -1 /* fail if you forget to update this assert */ )); | |
1555 | - | |
1556 | - tuplestore_putvalues(tupstore, tupdesc, values, nulls); | |
1557 | - } | |
1558 | - | |
1559 | - /* clean up and return the tuplestore */ | |
1560 | - LWLockRelease(pgss->lock); | |
1561 | - | |
1562 | - if (qbuffer) | |
1563 | - free(qbuffer); | |
1564 | - | |
1565 | - tuplestore_donestoring(tupstore); | |
1566 | -} | |
1567 | - | |
1568 | -/* | |
1569 | - * Estimate shared memory space needed. | |
1570 | - */ | |
1571 | -static Size | |
1572 | -pgss_memsize(void) | |
1573 | -{ | |
1574 | - Size size; | |
1575 | - | |
1576 | - size = MAXALIGN(sizeof(pgssSharedState)); | |
1577 | - size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry))); | |
1578 | - | |
1579 | - return size; | |
1580 | -} | |
1581 | - | |
1582 | -/* | |
1583 | - * Allocate a new hashtable entry. | |
1584 | - * caller must hold an exclusive lock on pgss->lock | |
1585 | - * | |
1586 | - * "query" need not be null-terminated; we rely on query_len instead | |
1587 | - * | |
1588 | - * If "sticky" is true, make the new entry artificially sticky so that it will | |
1589 | - * probably still be there when the query finishes execution. We do this by | |
1590 | - * giving it a median usage value rather than the normal value. (Strictly | |
1591 | - * speaking, query strings are normalized on a best effort basis, though it | |
1592 | - * would be difficult to demonstrate this even under artificial conditions.) | |
1593 | - * | |
1594 | - * Note: despite needing exclusive lock, it's not an error for the target | |
1595 | - * entry to already exist. This is because pgss_store releases and | |
1596 | - * reacquires lock after failing to find a match; so someone else could | |
1597 | - * have made the entry while we waited to get exclusive lock. | |
1598 | - */ | |
1599 | -static pgssEntry * | |
1600 | -entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, | |
1601 | - bool sticky) | |
1602 | -{ | |
1603 | - pgssEntry *entry; | |
1604 | - bool found; | |
1605 | - | |
1606 | - /* Make space if needed */ | |
1607 | - while (hash_get_num_entries(pgss_hash) >= pgss_max) | |
1608 | - entry_dealloc(); | |
1609 | - | |
1610 | - /* Find or create an entry with desired hash code */ | |
1611 | - entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found); | |
1612 | - | |
1613 | - if (!found) | |
1614 | - { | |
1615 | - /* New entry, initialize it */ | |
1616 | - | |
1617 | - /* reset the statistics */ | |
1618 | - memset(&entry->counters, 0, sizeof(Counters)); | |
1619 | - /* set the appropriate initial usage count */ | |
1620 | - entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT; | |
1621 | - /* re-initialize the mutex each time ... we assume no one using it */ | |
1622 | - SpinLockInit(&entry->mutex); | |
1623 | - /* ... and don't forget the query text metadata */ | |
1624 | - Assert(query_len >= 0); | |
1625 | - entry->query_offset = query_offset; | |
1626 | - entry->query_len = query_len; | |
1627 | - entry->encoding = encoding; | |
1628 | - } | |
1629 | - | |
1630 | - return entry; | |
1631 | -} | |
1632 | - | |
1633 | -/* | |
1634 | - * qsort comparator for sorting into increasing usage order | |
1635 | - */ | |
1636 | -static int | |
1637 | -entry_cmp(const void *lhs, const void *rhs) | |
1638 | -{ | |
1639 | - double l_usage = (*(pgssEntry *const *) lhs)->counters.usage; | |
1640 | - double r_usage = (*(pgssEntry *const *) rhs)->counters.usage; | |
1641 | - | |
1642 | - if (l_usage < r_usage) | |
1643 | - return -1; | |
1644 | - else if (l_usage > r_usage) | |
1645 | - return +1; | |
1646 | - else | |
1647 | - return 0; | |
1648 | -} | |
1649 | - | |
1650 | -/* | |
1651 | - * Deallocate least used entries. | |
1652 | - * Caller must hold an exclusive lock on pgss->lock. | |
1653 | - */ | |
1654 | -static void | |
1655 | -entry_dealloc(void) | |
1656 | -{ | |
1657 | - HASH_SEQ_STATUS hash_seq; | |
1658 | - pgssEntry **entries; | |
1659 | - pgssEntry *entry; | |
1660 | - int nvictims; | |
1661 | - int i; | |
1662 | - Size totlen = 0; | |
1663 | - | |
1664 | - /* | |
1665 | - * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them. | |
1666 | - * While we're scanning the table, apply the decay factor to the usage | |
1667 | - * values. | |
1668 | - */ | |
1669 | - | |
1670 | - entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *)); | |
1671 | - | |
1672 | - i = 0; | |
1673 | - hash_seq_init(&hash_seq, pgss_hash); | |
1674 | - while ((entry = hash_seq_search(&hash_seq)) != NULL) | |
1675 | - { | |
1676 | - entries[i++] = entry; | |
1677 | - /* "Sticky" entries get a different usage decay rate. */ | |
1678 | - if (entry->counters.calls == 0) | |
1679 | - entry->counters.usage *= STICKY_DECREASE_FACTOR; | |
1680 | - else | |
1681 | - entry->counters.usage *= USAGE_DECREASE_FACTOR; | |
1682 | - /* Accumulate total size, too. */ | |
1683 | - totlen += entry->query_len + 1; | |
1684 | - } | |
1685 | - | |
1686 | - qsort(entries, i, sizeof(pgssEntry *), entry_cmp); | |
1687 | - | |
1688 | - if (i > 0) | |
1689 | - { | |
1690 | - /* Record the (approximate) median usage */ | |
1691 | - pgss->cur_median_usage = entries[i / 2]->counters.usage; | |
1692 | - /* Record the mean query length */ | |
1693 | - pgss->mean_query_len = totlen / i; | |
1694 | - } | |
1695 | - | |
1696 | - nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100); | |
1697 | - nvictims = Min(nvictims, i); | |
1698 | - | |
1699 | - for (i = 0; i < nvictims; i++) | |
1700 | - { | |
1701 | - hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL); | |
1702 | - } | |
1703 | - | |
1704 | - pfree(entries); | |
1705 | -} | |
1706 | - | |
1707 | -/* | |
1708 | - * Given a null-terminated string, allocate a new entry in the external query | |
1709 | - * text file and store the string there. | |
1710 | - * | |
1711 | - * Although we could compute the string length via strlen(), callers already | |
1712 | - * have it handy, so we require them to pass it too. | |
1713 | - * | |
1714 | - * If successful, returns true, and stores the new entry's offset in the file | |
1715 | - * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the | |
1716 | - * number of garbage collections that have occurred so far. | |
1717 | - * | |
1718 | - * On failure, returns false. | |
1719 | - * | |
1720 | - * At least a shared lock on pgss->lock must be held by the caller, so as | |
1721 | - * to prevent a concurrent garbage collection. Share-lock-holding callers | |
1722 | - * should pass a gc_count pointer to obtain the number of garbage collections, | |
1723 | - * so that they can recheck the count after obtaining exclusive lock to | |
1724 | - * detect whether a garbage collection occurred (and removed this entry). | |
1725 | - */ | |
1726 | -static bool | |
1727 | -qtext_store(const char *query, int query_len, | |
1728 | - Size *query_offset, int *gc_count) | |
1729 | -{ | |
1730 | - Size off; | |
1731 | - int fd; | |
1732 | - | |
1733 | - /* | |
1734 | - * We use a spinlock to protect extent/n_writers/gc_count, so that | |
1735 | - * multiple processes may execute this function concurrently. | |
1736 | - */ | |
1737 | - { | |
1738 | - volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; | |
1739 | - | |
1740 | - SpinLockAcquire(&s->mutex); | |
1741 | - off = s->extent; | |
1742 | - s->extent += query_len + 1; | |
1743 | - s->n_writers++; | |
1744 | - if (gc_count) | |
1745 | - *gc_count = s->gc_count; | |
1746 | - SpinLockRelease(&s->mutex); | |
1747 | - } | |
1748 | - | |
1749 | - *query_offset = off; | |
1750 | - | |
1751 | - /* Now write the data into the successfully-reserved part of the file */ | |
1752 | - fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY, | |
1753 | - S_IRUSR | S_IWUSR); | |
1754 | - if (fd < 0) | |
1755 | - goto error; | |
1756 | - | |
1757 | - if (lseek(fd, off, SEEK_SET) != off) | |
1758 | - goto error; | |
1759 | - | |
1760 | - if (write(fd, query, query_len + 1) != query_len + 1) | |
1761 | - goto error; | |
1762 | - | |
1763 | - CloseTransientFile(fd); | |
1764 | - | |
1765 | - /* Mark our write complete */ | |
1766 | - { | |
1767 | - volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; | |
1768 | - | |
1769 | - SpinLockAcquire(&s->mutex); | |
1770 | - s->n_writers--; | |
1771 | - SpinLockRelease(&s->mutex); | |
1772 | - } | |
1773 | - | |
1774 | - return true; | |
1775 | - | |
1776 | -error: | |
1777 | - ereport(LOG, | |
1778 | - (errcode_for_file_access(), | |
1779 | - errmsg("could not write pg_stat_statement file \"%s\": %m", | |
1780 | - PGSS_TEXT_FILE))); | |
1781 | - | |
1782 | - if (fd >= 0) | |
1783 | - CloseTransientFile(fd); | |
1784 | - | |
1785 | - /* Mark our write complete */ | |
1786 | - { | |
1787 | - volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; | |
1788 | - | |
1789 | - SpinLockAcquire(&s->mutex); | |
1790 | - s->n_writers--; | |
1791 | - SpinLockRelease(&s->mutex); | |
1792 | - } | |
1793 | - | |
1794 | - return false; | |
1795 | -} | |
1796 | - | |
1797 | -/* | |
1798 | - * Read the external query text file into a malloc'd buffer. | |
1799 | - * | |
1800 | - * Returns NULL (without throwing an error) if unable to read, eg | |
1801 | - * file not there or insufficient memory. | |
1802 | - * | |
1803 | - * On success, the buffer size is also returned into *buffer_size. | |
1804 | - * | |
1805 | - * This can be called without any lock on pgss->lock, but in that case | |
1806 | - * the caller is responsible for verifying that the result is sane. | |
1807 | - */ | |
1808 | -static char * | |
1809 | -qtext_load_file(Size *buffer_size) | |
1810 | -{ | |
1811 | - char *buf; | |
1812 | - int fd; | |
1813 | - struct stat stat; | |
1814 | - | |
1815 | - fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY | PG_BINARY, 0); | |
1816 | - if (fd < 0) | |
1817 | - { | |
1818 | - if (errno != ENOENT) | |
1819 | - ereport(LOG, | |
1820 | - (errcode_for_file_access(), | |
1821 | - errmsg("could not read pg_stat_statement file \"%s\": %m", | |
1822 | - PGSS_TEXT_FILE))); | |
1823 | - return NULL; | |
1824 | - } | |
1825 | - | |
1826 | - /* Get file length */ | |
1827 | - if (fstat(fd, &stat)) | |
1828 | - { | |
1829 | - ereport(LOG, | |
1830 | - (errcode_for_file_access(), | |
1831 | - errmsg("could not stat pg_stat_statement file \"%s\": %m", | |
1832 | - PGSS_TEXT_FILE))); | |
1833 | - CloseTransientFile(fd); | |
1834 | - return NULL; | |
1835 | - } | |
1836 | - | |
1837 | - /* Allocate buffer; beware that off_t might be wider than size_t */ | |
1838 | - if (stat.st_size <= MaxAllocSize) | |
1839 | - buf = (char *) malloc(stat.st_size); | |
1840 | - else | |
1841 | - buf = NULL; | |
1842 | - if (buf == NULL) | |
1843 | - { | |
1844 | - ereport(LOG, | |
1845 | - (errcode(ERRCODE_OUT_OF_MEMORY), | |
1846 | - errmsg("out of memory"))); | |
1847 | - CloseTransientFile(fd); | |
1848 | - return NULL; | |
1849 | - } | |
1850 | - | |
1851 | - /* | |
1852 | - * OK, slurp in the file. If we get a short read and errno doesn't get | |
1853 | - * set, the reason is probably that garbage collection truncated the file | |
1854 | - * since we did the fstat(), so we don't log a complaint --- but we don't | |
1855 | - * return the data, either, since it's most likely corrupt due to | |
1856 | - * concurrent writes from garbage collection. | |
1857 | - */ | |
1858 | - errno = 0; | |
1859 | - if (read(fd, buf, stat.st_size) != stat.st_size) | |
1860 | - { | |
1861 | - if (errno) | |
1862 | - ereport(LOG, | |
1863 | - (errcode_for_file_access(), | |
1864 | - errmsg("could not read pg_stat_statement file \"%s\": %m", | |
1865 | - PGSS_TEXT_FILE))); | |
1866 | - free(buf); | |
1867 | - CloseTransientFile(fd); | |
1868 | - return NULL; | |
1869 | - } | |
1870 | - | |
1871 | - CloseTransientFile(fd); | |
1872 | - | |
1873 | - *buffer_size = stat.st_size; | |
1874 | - return buf; | |
1875 | -} | |
1876 | - | |
1877 | -/* | |
1878 | - * Locate a query text in the file image previously read by qtext_load_file(). | |
1879 | - * | |
1880 | - * We validate the given offset/length, and return NULL if bogus. Otherwise, | |
1881 | - * the result points to a null-terminated string within the buffer. | |
1882 | - */ | |
1883 | -static char * | |
1884 | -qtext_fetch(Size query_offset, int query_len, | |
1885 | - char *buffer, Size buffer_size) | |
1886 | -{ | |
1887 | - /* File read failed? */ | |
1888 | - if (buffer == NULL) | |
1889 | - return NULL; | |
1890 | - /* Bogus offset/length? */ | |
1891 | - if (query_len < 0 || | |
1892 | - query_offset + query_len >= buffer_size) | |
1893 | - return NULL; | |
1894 | - /* As a further sanity check, make sure there's a trailing null */ | |
1895 | - if (buffer[query_offset + query_len] != '\0') | |
1896 | - return NULL; | |
1897 | - /* Looks OK */ | |
1898 | - return buffer + query_offset; | |
1899 | -} | |
1900 | - | |
1901 | -/* | |
1902 | - * Do we need to garbage-collect the external query text file? | |
1903 | - * | |
1904 | - * Caller should hold at least a shared lock on pgss->lock. | |
1905 | - */ | |
1906 | -static bool | |
1907 | -need_gc_qtexts(void) | |
1908 | -{ | |
1909 | - Size extent; | |
1910 | - | |
1911 | - /* Read shared extent pointer */ | |
1912 | - { | |
1913 | - volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; | |
1914 | - | |
1915 | - SpinLockAcquire(&s->mutex); | |
1916 | - extent = s->extent; | |
1917 | - SpinLockRelease(&s->mutex); | |
1918 | - } | |
1919 | - | |
1920 | - /* Don't proceed if file does not exceed 512 bytes per possible entry */ | |
1921 | - if (extent < 512 * pgss_max) | |
1922 | - return false; | |
1923 | - | |
1924 | - /* | |
1925 | - * Don't proceed if file is less than about 50% bloat. Nothing can or | |
1926 | - * should be done in the event of unusually large query texts accounting | |
1927 | - * for file's large size. We go to the trouble of maintaining the mean | |
1928 | - * query length in order to prevent garbage collection from thrashing | |
1929 | - * uselessly. | |
1930 | - */ | |
1931 | - if (extent < pgss->mean_query_len * pgss_max * 2) | |
1932 | - return false; | |
1933 | - | |
1934 | - return true; | |
1935 | -} | |
1936 | - | |
1937 | -/* | |
1938 | - * Garbage-collect orphaned query texts in external file. | |
1939 | - * | |
1940 | - * This won't be called often in the typical case, since it's likely that | |
1941 | - * there won't be too much churn, and besides, a similar compaction process | |
1942 | - * occurs when serializing to disk at shutdown or as part of resetting. | |
1943 | - * Despite this, it seems prudent to plan for the edge case where the file | |
1944 | - * becomes unreasonably large, with no other method of compaction likely to | |
1945 | - * occur in the foreseeable future. | |
1946 | - * | |
1947 | - * The caller must hold an exclusive lock on pgss->lock. | |
1948 | - */ | |
1949 | -static void | |
1950 | -gc_qtexts(void) | |
1951 | -{ | |
1952 | - char *qbuffer; | |
1953 | - Size qbuffer_size; | |
1954 | - FILE *qfile; | |
1955 | - HASH_SEQ_STATUS hash_seq; | |
1956 | - pgssEntry *entry; | |
1957 | - Size extent; | |
1958 | - int nentries; | |
1959 | - | |
1960 | - /* | |
1961 | - * When called from pgss_store, some other session might have proceeded | |
1962 | - * with garbage collection in the no-lock-held interim of lock strength | |
1963 | - * escalation. Check once more that this is actually necessary. | |
1964 | - */ | |
1965 | - if (!need_gc_qtexts()) | |
1966 | - return; | |
1967 | - | |
1968 | - /* | |
1969 | - * Load the old texts file. If we fail (out of memory, for instance) just | |
1970 | - * skip the garbage collection. | |
1971 | - */ | |
1972 | - qbuffer = qtext_load_file(&qbuffer_size); | |
1973 | - if (qbuffer == NULL) | |
1974 | - return; | |
1975 | - | |
1976 | - /* | |
1977 | - * We overwrite the query texts file in place, so as to reduce the risk of | |
1978 | - * an out-of-disk-space failure. Since the file is guaranteed not to get | |
1979 | - * larger, this should always work on traditional filesystems; though we | |
1980 | - * could still lose on copy-on-write filesystems. | |
1981 | - */ | |
1982 | - qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W); | |
1983 | - if (qfile == NULL) | |
1984 | - { | |
1985 | - ereport(LOG, | |
1986 | - (errcode_for_file_access(), | |
1987 | - errmsg("could not write pg_stat_statement file \"%s\": %m", | |
1988 | - PGSS_TEXT_FILE))); | |
1989 | - goto gc_fail; | |
1990 | - } | |
1991 | - | |
1992 | - extent = 0; | |
1993 | - nentries = 0; | |
1994 | - | |
1995 | - hash_seq_init(&hash_seq, pgss_hash); | |
1996 | - while ((entry = hash_seq_search(&hash_seq)) != NULL) | |
1997 | - { | |
1998 | - int query_len = entry->query_len; | |
1999 | - char *qry = qtext_fetch(entry->query_offset, | |
2000 | - query_len, | |
2001 | - qbuffer, | |
2002 | - qbuffer_size); | |
2003 | - | |
2004 | - if (qry == NULL) | |
2005 | - { | |
2006 | - /* Trouble ... drop the text */ | |
2007 | - entry->query_offset = 0; | |
2008 | - entry->query_len = -1; | |
2009 | - continue; | |
2010 | - } | |
2011 | - | |
2012 | - if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1) | |
2013 | - { | |
2014 | - ereport(LOG, | |
2015 | - (errcode_for_file_access(), | |
2016 | - errmsg("could not write pg_stat_statement file \"%s\": %m", | |
2017 | - PGSS_TEXT_FILE))); | |
2018 | - hash_seq_term(&hash_seq); | |
2019 | - goto gc_fail; | |
2020 | - } | |
2021 | - | |
2022 | - entry->query_offset = extent; | |
2023 | - extent += query_len + 1; | |
2024 | - nentries++; | |
2025 | - } | |
2026 | - | |
2027 | - /* | |
2028 | - * Truncate away any now-unused space. If this fails for some odd reason, | |
2029 | - * we log it, but there's no need to fail. | |
2030 | - */ | |
2031 | - if (ftruncate(fileno(qfile), extent) != 0) | |
2032 | - ereport(LOG, | |
2033 | - (errcode_for_file_access(), | |
2034 | - errmsg("could not truncate pg_stat_statement file \"%s\": %m", | |
2035 | - PGSS_TEXT_FILE))); | |
2036 | - | |
2037 | - if (FreeFile(qfile)) | |
2038 | - { | |
2039 | - ereport(LOG, | |
2040 | - (errcode_for_file_access(), | |
2041 | - errmsg("could not write pg_stat_statement file \"%s\": %m", | |
2042 | - PGSS_TEXT_FILE))); | |
2043 | - qfile = NULL; | |
2044 | - goto gc_fail; | |
2045 | - } | |
2046 | - | |
2047 | - elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu", | |
2048 | - pgss->extent, extent); | |
2049 | - | |
2050 | - /* Reset the shared extent pointer */ | |
2051 | - pgss->extent = extent; | |
2052 | - | |
2053 | - /* | |
2054 | - * Also update the mean query length, to be sure that need_gc_qtexts() | |
2055 | - * won't still think we have a problem. | |
2056 | - */ | |
2057 | - if (nentries > 0) | |
2058 | - pgss->mean_query_len = extent / nentries; | |
2059 | - else | |
2060 | - pgss->mean_query_len = ASSUMED_LENGTH_INIT; | |
2061 | - | |
2062 | - free(qbuffer); | |
2063 | - | |
2064 | - /* | |
2065 | - * OK, count a garbage collection cycle. (Note: even though we have | |
2066 | - * exclusive lock on pgss->lock, we must take pgss->mutex for this, since | |
2067 | - * other processes may examine gc_count while holding only the mutex. | |
2068 | - * Also, we have to advance the count *after* we've rewritten the file, | |
2069 | - * else other processes might not realize they read a stale file.) | |
2070 | - */ | |
2071 | - record_gc_qtexts(); | |
2072 | - | |
2073 | - return; | |
2074 | - | |
2075 | -gc_fail: | |
2076 | - /* clean up resources */ | |
2077 | - if (qfile) | |
2078 | - FreeFile(qfile); | |
2079 | - if (qbuffer) | |
2080 | - free(qbuffer); | |
2081 | - | |
2082 | - /* | |
2083 | - * Since the contents of the external file are now uncertain, mark all | |
2084 | - * hashtable entries as having invalid texts. | |
2085 | - */ | |
2086 | - hash_seq_init(&hash_seq, pgss_hash); | |
2087 | - while ((entry = hash_seq_search(&hash_seq)) != NULL) | |
2088 | - { | |
2089 | - entry->query_offset = 0; | |
2090 | - entry->query_len = -1; | |
2091 | - } | |
2092 | - | |
2093 | - /* Seems like a good idea to bump the GC count even though we failed */ | |
2094 | - record_gc_qtexts(); | |
2095 | -} | |
2096 | - | |
2097 | -/* | |
2098 | - * Release all entries. | |
9 | + *------------------------------------------------------------------------- | |
2099 | 10 | */ |
2100 | -static void | |
2101 | -entry_reset(void) | |
2102 | -{ | |
2103 | - HASH_SEQ_STATUS hash_seq; | |
2104 | - pgssEntry *entry; | |
2105 | - FILE *qfile; | |
2106 | - | |
2107 | - LWLockAcquire(pgss->lock, LW_EXCLUSIVE); | |
2108 | - | |
2109 | - hash_seq_init(&hash_seq, pgss_hash); | |
2110 | - while ((entry = hash_seq_search(&hash_seq)) != NULL) | |
2111 | - { | |
2112 | - hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL); | |
2113 | - } | |
2114 | - | |
2115 | - /* | |
2116 | - * Write new empty query file, perhaps even creating a new one to recover | |
2117 | - * if the file was missing. | |
2118 | - */ | |
2119 | - qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W); | |
2120 | - if (qfile == NULL) | |
2121 | - { | |
2122 | - ereport(LOG, | |
2123 | - (errcode_for_file_access(), | |
2124 | - errmsg("could not create pg_stat_statement file \"%s\": %m", | |
2125 | - PGSS_TEXT_FILE))); | |
2126 | - goto done; | |
2127 | - } | |
2128 | - | |
2129 | - /* If ftruncate fails, log it, but it's not a fatal problem */ | |
2130 | - if (ftruncate(fileno(qfile), 0) != 0) | |
2131 | - ereport(LOG, | |
2132 | - (errcode_for_file_access(), | |
2133 | - errmsg("could not truncate pg_stat_statement file \"%s\": %m", | |
2134 | - PGSS_TEXT_FILE))); | |
11 | +#include "postgres.h" | |
2135 | 12 | |
2136 | - FreeFile(qfile); | |
13 | +#include <sys/stat.h> | |
2137 | 14 | |
2138 | -done: | |
2139 | - pgss->extent = 0; | |
2140 | - /* This counts as a query text garbage collection for our purposes */ | |
2141 | - record_gc_qtexts(); | |
15 | +#include "access/hash.h" | |
16 | +#include "parser/scanner.h" | |
2142 | 17 | |
2143 | - LWLockRelease(pgss->lock); | |
2144 | -} | |
2145 | -#endif | |
18 | +static void AppendJumble(pgssJumbleState *jstate, | |
19 | + const unsigned char *item, Size size); | |
20 | +static void JumbleQuery(pgssJumbleState *jstate, Query *query); | |
21 | +static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable); | |
22 | +static void JumbleExpr(pgssJumbleState *jstate, Node *node); | |
23 | +static void RecordConstLocation(pgssJumbleState *jstate, int location); | |
24 | +static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query); | |
25 | +static int comp_location(const void *a, const void *b); | |
2146 | 26 | |
2147 | 27 | /* |
2148 | 28 | * AppendJumble: Append a value that is substantive in a given query to |
@@ -2209,8 +89,10 @@ JumbleQuery(pgssJumbleState *jstate, Query *query) | ||
2209 | 89 | JumbleRangeTable(jstate, query->rtable); |
2210 | 90 | JumbleExpr(jstate, (Node *) query->jointree); |
2211 | 91 | JumbleExpr(jstate, (Node *) query->targetList); |
92 | + JumbleExpr(jstate, (Node *) query->onConflict); | |
2212 | 93 | JumbleExpr(jstate, (Node *) query->returningList); |
2213 | 94 | JumbleExpr(jstate, (Node *) query->groupClause); |
95 | + JumbleExpr(jstate, (Node *) query->groupingSets); | |
2214 | 96 | JumbleExpr(jstate, query->havingQual); |
2215 | 97 | JumbleExpr(jstate, (Node *) query->windowClause); |
2216 | 98 | JumbleExpr(jstate, (Node *) query->distinctClause); |
@@ -2239,6 +121,7 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable) | ||
2239 | 121 | { |
2240 | 122 | case RTE_RELATION: |
2241 | 123 | APP_JUMB(rte->relid); |
124 | + JumbleExpr(jstate, (Node *) rte->tablesample); | |
2242 | 125 | break; |
2243 | 126 | case RTE_SUBQUERY: |
2244 | 127 | JumbleQuery(jstate, rte->subquery); |
@@ -2341,6 +224,13 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) | ||
2341 | 224 | JumbleExpr(jstate, (Node *) expr->aggfilter); |
2342 | 225 | } |
2343 | 226 | break; |
227 | + case T_GroupingFunc: | |
228 | + { | |
229 | + GroupingFunc *grpnode = (GroupingFunc *) node; | |
230 | + | |
231 | + JumbleExpr(jstate, (Node *) grpnode->refs); | |
232 | + } | |
233 | + break; | |
2344 | 234 | case T_WindowFunc: |
2345 | 235 | { |
2346 | 236 | WindowFunc *expr = (WindowFunc *) node; |
@@ -2576,6 +466,15 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) | ||
2576 | 466 | APP_JUMB(ce->cursor_param); |
2577 | 467 | } |
2578 | 468 | break; |
469 | + case T_InferenceElem: | |
470 | + { | |
471 | + InferenceElem *ie = (InferenceElem *) node; | |
472 | + | |
473 | + APP_JUMB(ie->infercollid); | |
474 | + APP_JUMB(ie->inferopclass); | |
475 | + JumbleExpr(jstate, ie->expr); | |
476 | + } | |
477 | + break; | |
2579 | 478 | case T_TargetEntry: |
2580 | 479 | { |
2581 | 480 | TargetEntry *tle = (TargetEntry *) node; |
@@ -2612,12 +511,32 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) | ||
2612 | 511 | JumbleExpr(jstate, from->quals); |
2613 | 512 | } |
2614 | 513 | break; |
514 | + case T_OnConflictExpr: | |
515 | + { | |
516 | + OnConflictExpr *conf = (OnConflictExpr *) node; | |
517 | + | |
518 | + APP_JUMB(conf->action); | |
519 | + JumbleExpr(jstate, (Node *) conf->arbiterElems); | |
520 | + JumbleExpr(jstate, conf->arbiterWhere); | |
521 | + JumbleExpr(jstate, (Node *) conf->onConflictSet); | |
522 | + JumbleExpr(jstate, conf->onConflictWhere); | |
523 | + APP_JUMB(conf->constraint); | |
524 | + APP_JUMB(conf->exclRelIndex); | |
525 | + JumbleExpr(jstate, (Node *) conf->exclRelTlist); | |
526 | + } | |
527 | + break; | |
2615 | 528 | case T_List: |
2616 | 529 | foreach(temp, (List *) node) |
2617 | 530 | { |
2618 | 531 | JumbleExpr(jstate, (Node *) lfirst(temp)); |
2619 | 532 | } |
2620 | 533 | break; |
534 | + case T_IntList: | |
535 | + foreach(temp, (List *) node) | |
536 | + { | |
537 | + APP_JUMB(lfirst_int(temp)); | |
538 | + } | |
539 | + break; | |
2621 | 540 | case T_SortGroupClause: |
2622 | 541 | { |
2623 | 542 | SortGroupClause *sgc = (SortGroupClause *) node; |
@@ -2628,6 +547,13 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) | ||
2628 | 547 | APP_JUMB(sgc->nulls_first); |
2629 | 548 | } |
2630 | 549 | break; |
550 | + case T_GroupingSet: | |
551 | + { | |
552 | + GroupingSet *gsnode = (GroupingSet *) node; | |
553 | + | |
554 | + JumbleExpr(jstate, (Node *) gsnode->content); | |
555 | + } | |
556 | + break; | |
2631 | 557 | case T_WindowClause: |
2632 | 558 | { |
2633 | 559 | WindowClause *wc = (WindowClause *) node; |
@@ -2666,6 +592,15 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) | ||
2666 | 592 | JumbleExpr(jstate, rtfunc->funcexpr); |
2667 | 593 | } |
2668 | 594 | break; |
595 | + case T_TableSampleClause: | |
596 | + { | |
597 | + TableSampleClause *tsc = (TableSampleClause *) node; | |
598 | + | |
599 | + APP_JUMB(tsc->tsmhandler); | |
600 | + JumbleExpr(jstate, (Node *) tsc->args); | |
601 | + JumbleExpr(jstate, (Node *) tsc->repeatable); | |
602 | + } | |
603 | + break; | |
2669 | 604 | default: |
2670 | 605 | /* Only a warning, since we can stumble along anyway */ |
2671 | 606 | elog(WARNING, "unrecognized node type: %d", |
@@ -2827,6 +762,9 @@ fill_in_constant_lengths(pgssJumbleState *jstate, const char *query) | ||
2827 | 762 | ScanKeywords, |
2828 | 763 | NumScanKeywords); |
2829 | 764 | |
765 | + /* we don't want to re-emit any escape string warnings */ | |
766 | + yyextra.escape_string_warning = false; | |
767 | + | |
2830 | 768 | /* Search for each constant, in sequence */ |
2831 | 769 | for (i = 0; i < jstate->clocations_count; i++) |
2832 | 770 | { |
@@ -338,7 +338,7 @@ EXPLAIN (COSTS false) SELECT * FROM t1 FULL OUTER JOIN t2 ON (t1.id = t2.id); | ||
338 | 338 | /*+NestLoop(t1 t2)*/ |
339 | 339 | EXPLAIN (COSTS false) SELECT * FROM t1 FULL OUTER JOIN t2 ON (t1.id = t2.id); |
340 | 340 | |
341 | --- inherite table test | |
341 | +-- inheritance tables test | |
342 | 342 | SET constraint_exclusion TO off; |
343 | 343 | EXPLAIN (COSTS false) SELECT * FROM p1 WHERE id >= 50 AND id <= 51 AND p1.ctid = '(1,1)'; |
344 | 344 | SET constraint_exclusion TO on; |