OSDN > Desarrollador > fujii_masao > Chamber > pg_hint_plan > Commit

pg_hint_plan
Fork

R/O
HTTP
SSH
HTTPS

Commit

Commit MetaInfo

Revisión	8edf1db866961ff432fe9b89ff708e38267b4505 (tree)
Tiempo	2016-01-15 14:12:14
Autor	Kyotaro Horiguchi <horiguchi.kyotaro@lab....>
Commiter	Kyotaro Horiguchi

Log Message

Support PostgreSQL 9.5.0.

This branch PG95 has been a bit too early so it needs an additional
merge from master branch to complete to support PostgreSQL 9.5.

Cambiar Resumen

modified: COPYRIGHT (diff)
modified: COPYRIGHT.postgresql (diff)
modified: Makefile (diff)
delete: SPECS/pg_hint_plan94.spec
modified: core.c (diff)
modified: expected/pg_hint_plan.out (diff)
delete: pg_hint_plan--1.1.2.sql => pg_hint_plan--1.1.3.sql
modified: pg_hint_plan.c (diff)
modified: pg_hint_plan.control (diff)
modified: pg_stat_statements.c (diff)
modified: sql/pg_hint_plan.sql (diff)

Diferencia incremental

--- a/COPYRIGHT

+++ b/COPYRIGHT

		@@ -1,4 +1,4 @@
1		-Copyright (c) 2012-2014, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
	1	+Copyright (c) 2012-2016, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
2	2	All rights reserved.
3	3
4	4	Redistribution and use in source and binary forms, with or without

--- a/COPYRIGHT.postgresql

+++ b/COPYRIGHT.postgresql

		@@ -2,7 +2,7 @@ core.c and make_join_rel.c are parts of PostgreSQL Database Management System.
2	2	(formerly known as Postgres, then as Postgres95)
3	3	Copyright holders of those files are following organizations:
4	4
5		-Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
	5	+Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
6	6
7	7	Portions Copyright (c) 1994, The Regents of the University of California
8	8

--- a/Makefile

+++ b/Makefile

		@@ -1,11 +1,11 @@
1	1	#
2	2	# pg_hint_plan: Makefile
3	3	#
4		-# Copyright (c) 2012-2014, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
	4	+# Copyright (c) 2012-2015, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
5	5	#
6	6
7	7	MODULES = pg_hint_plan
8		-HINTPLANVER = 1.1.2
	8	+HINTPLANVER = 1.1.3
9	9
10	10	REGRESS = init base_plan pg_hint_plan ut-init ut-A ut-S ut-J ut-L ut-G ut-R ut-fdw ut-fini
11	11

		@@ -14,7 +14,7 @@ REGRESSION_EXPECTED = expected/init.out expected/base_plan.out expected/pg_hint_
14	14	REGRESS_OPTS = --encoding=UTF8
15	15
16	16	EXTENSION = pg_hint_plan
17		-DATA = pg_hint_plan--1.1.2.sql
	17	+DATA = pg_hint_plan--1.1.3.sql
18	18
19	19	EXTRA_CLEAN = sql/ut-fdw.sql expected/ut-fdw.out
20	20

		@@ -23,8 +23,8 @@ PGXS := $(shell $(PG_CONFIG) --pgxs)
23	23	include $(PGXS)
24	24
25	25	STARBALL = pg_dbms_stats-$(DBMSSTATSVER).tar.gz
26		-STARBALL94 = pg_hint_plan94-$(HINTPLANVER).tar.gz
27		-STARBALLS = $(STARBALL) $(STARBALL94)
	26	+STARBALL95 = pg_hint_plan95-$(HINTPLANVER).tar.gz
	27	+STARBALLS = $(STARBALL) $(STARBALL95)
28	28
29	29	TARSOURCES = Makefile .c .h \
30	30	pg_hint_plan--*.sql \

		@@ -34,7 +34,7 @@ TARSOURCES = Makefile .c .h \
34	34
35	35	installcheck: $(REGRESSION_EXPECTED)
36	36
37		-rpms: rpm94
	37	+rpms: rpm95
38	38
39	39	# pg_hint_plan.c includes core.c and make_join_rel.c
40	40	pg_hint_plan.o: core.c make_join_rel.c # pg_stat_statements.c

		@@ -49,7 +49,7 @@ $(STARBALLS): $(TARSOURCES)
49	49	tar -chzf $@ $(addprefix $(subst .tar.gz,,$@)/, $^)
50	50	rm $(subst .tar.gz,,$@)
51	51
52		-rpm94: $(STARBALL94)
53		- MAKE_ROOT=`pwd` rpmbuild -bb SPECS/pg_hint_plan94.spec
	52	+rpm95: $(STARBALL95)
	53	+ MAKE_ROOT=`pwd` rpmbuild -bb SPECS/pg_hint_plan95.spec
54	54
55	55

--- a/SPECS/pg_hint_plan94.spec

+++ /dev/null

		@@ -1,84 +0,0 @@
1		-# SPEC file for pg_hint_plan
2		-# Copyright(C) 2012-2014 NIPPON TELEGRAPH AND TELEPHONE CORPORATION
3		-
4		-%define _pgdir /usr/pgsql-9.4
5		-%define _bindir %{_pgdir}/bin
6		-%define _libdir %{_pgdir}/lib
7		-%define _datadir %{_pgdir}/share
8		-%if "%(echo ${MAKE_ROOT})" != ""
9		- %define _rpmdir %(echo ${MAKE_ROOT})/RPMS
10		- %define _sourcedir %(echo ${MAKE_ROOT})
11		-%endif
12		-
13		-## Set general information for pg_hint_plan.
14		-Summary: Optimizer hint for PostgreSQL 9.4
15		-Name: pg_hint_plan94
16		-Version: 1.1.2
17		-Release: 1%{?dist}
18		-License: BSD
19		-Group: Applications/Databases
20		-Source0: %{name}-%{version}.tar.gz
21		-#URL: http://example.com/pg_hint_plan/
22		-BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-%(%{__id_u} -n)
23		-Vendor: NIPPON TELEGRAPH AND TELEPHONE CORPORATION
24		-
25		-## We use postgresql-devel package
26		-BuildRequires: postgresql94-devel
27		-Requires: postgresql94-libs
28		-
29		-## Description for "pg_hint_plan"
30		-%description
31		-pg_hint_plan provides capability to force arbitrary plan to PostgreSQL' planner
32		-to optimize queries by hand directly.
33		-
34		-If you have query plan better than which PostgreSQL chooses, you can force your
35		-plan by adding special comment block with optimizer hint before the query you
36		-want to optimize. You can control scan method, join method, join order, and
37		-planner-related GUC parameters during planning.
38		-
39		-Note that this package is available for only PostgreSQL 9.4.
40		-
41		-## pre work for build pg_hint_plan
42		-%prep
43		-PATH=/usr/pgsql-9.4/bin:$PATH
44		-if [ "${MAKE_ROOT}" != "" ]; then
45		- pushd ${MAKE_ROOT}
46		- make clean %{name}-%{version}.tar.gz
47		- popd
48		-fi
49		-if [ ! -d %{_rpmdir} ]; then mkdir -p %{_rpmdir}; fi
50		-%setup -q
51		-
52		-## Set variables for build environment
53		-%build
54		-PATH=/usr/pgsql-9.4/bin:$PATH
55		-make USE_PGXS=1 %{?_smp_mflags}
56		-
57		-## Set variables for install
58		-%install
59		-rm -rf %{buildroot}
60		-install -d %{buildroot}%{_libdir}
61		-install pg_hint_plan.so %{buildroot}%{_libdir}/pg_hint_plan.so
62		-install -d %{buildroot}%{_datadir}/extension
63		-install -m 644 pg_hint_plan--1.1.2.sql %{buildroot}%{_datadir}/extension/pg_hint_plan--1.1.2.sql
64		-install -m 644 pg_hint_plan.control %{buildroot}%{_datadir}/extension/pg_hint_plan.control
65		-
66		-%clean
67		-rm -rf %{buildroot}
68		-
69		-%files
70		-%defattr(0755,root,root)
71		-%{_libdir}/pg_hint_plan.so
72		-%defattr(0644,root,root)
73		-%{_datadir}/extension/pg_hint_plan--1.1.2.sql
74		-%{_datadir}/extension/pg_hint_plan.control
75		-
76		-# History of pg_hint_plan.
77		-%changelog
78		-* Thu Dec 17 2014 Kyotaro Horiguchi
79		-- Support 9.4. New rev 1.1.2.
80		-* Mon Sep 02 2013 Takashi Suzuki
81		-- Initial cut for 1.1.0
82		-* Mon Sep 24 2012 Shigeru Hanada <shigeru.hanada@gmail.com>
83		-- Initial cut for 1.0.0
84		-

--- a/core.c

+++ b/core.c

		@@ -20,7 +20,7 @@
20	20	* mark_dummy_rel()
21	21	* restriction_is_constant_false()
22	22	*
23		- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
	23	+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
24	24	* Portions Copyright (c) 1994, Regents of the University of California
25	25	*
26	26	*-------------------------------------------------------------------------

		@@ -215,9 +215,6 @@ set_append_rel_pathlist(PlannerInfo root, RelOptInfo rel,
215	215	add_path(rel, (Path *)
216	216	create_append_path(rel, subpaths, required_outer));
217	217	}
218		-
219		- /* Select cheapest paths */
220		- set_cheapest(rel);
221	218	}
222	219
223	220	/*

		@@ -720,7 +717,7 @@ join_search_one_level(PlannerInfo *root, int level)
720	717	*/
721	718	if (joinrels[level] == NIL &&
722	719	root->join_info_list == NIL &&
723		- root->lateral_info_list == NIL)
	720	+ !root->hasLateralRTEs)
724	721	elog(ERROR, "failed to build any %d-way joins", level);
725	722	}
726	723	}

		@@ -819,9 +816,7 @@ join_is_legal(PlannerInfo root, RelOptInfo rel1, RelOptInfo *rel2,
819	816	SpecialJoinInfo *match_sjinfo;
820	817	bool reversed;
821	818	bool unique_ified;
822		- bool is_valid_inner;
823		- bool lateral_fwd;
824		- bool lateral_rev;
	819	+ bool must_be_leftjoin;
825	820	ListCell *l;
826	821
827	822	/*

		@@ -834,12 +829,12 @@ join_is_legal(PlannerInfo root, RelOptInfo rel1, RelOptInfo *rel2,
834	829	/*
835	830	* If we have any special joins, the proposed join might be illegal; and
836	831	* in any case we have to determine its join type. Scan the join info
837		- * list for conflicts.
	832	+ * list for matches and conflicts.
838	833	*/
839	834	match_sjinfo = NULL;
840	835	reversed = false;
841	836	unique_ified = false;
842		- is_valid_inner = true;
	837	+ must_be_leftjoin = false;
843	838
844	839	foreach(l, root->join_info_list)
845	840	{

		@@ -890,7 +885,8 @@ join_is_legal(PlannerInfo root, RelOptInfo rel1, RelOptInfo *rel2,
890	885	* If one input contains min_lefthand and the other contains
891	886	* min_righthand, then we can perform the SJ at this join.
892	887	*
893		- * Barf if we get matches to more than one SJ (is that possible?)
	888	+ * Reject if we get matches to more than one SJ; that implies we're
	889	+ * considering something that's not really valid.
894	890	*/
895	891	if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
896	892	bms_is_subset(sjinfo->min_righthand, rel2->relids))

		@@ -955,90 +951,168 @@ join_is_legal(PlannerInfo root, RelOptInfo rel1, RelOptInfo *rel2,
955	951	}
956	952	else
957	953	{
958		- /*----------
959		- * Otherwise, the proposed join overlaps the RHS but isn't
960		- * a valid implementation of this SJ. It might still be
961		- * a legal join, however. If both inputs overlap the RHS,
962		- * assume that it's OK. Since the inputs presumably got past
963		- * this function's checks previously, they can't overlap the
964		- * LHS and their violations of the RHS boundary must represent
965		- * SJs that have been determined to commute with this one.
966		- * We have to allow this to work correctly in cases like
967		- * (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
968		- * when the c/d join has been determined to commute with the join
969		- * to a, and hence d is not part of min_righthand for the upper
970		- * join. It should be legal to join b to c/d but this will appear
971		- * as a violation of the upper join's RHS.
972		- * Furthermore, if one input overlaps the RHS and the other does
973		- * not, we should still allow the join if it is a valid
974		- * implementation of some other SJ. We have to allow this to
975		- * support the associative identity
976		- * (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
977		- * since joining B directly to C violates the lower SJ's RHS.
978		- * We assume that make_outerjoininfo() set things up correctly
979		- * so that we'll only match to some SJ if the join is valid.
980		- * Set flag here to check at bottom of loop.
981		- *----------
	954	+ /*
	955	+ * Otherwise, the proposed join overlaps the RHS but isn't a valid
	956	+ * implementation of this SJ. But don't panic quite yet: the RHS
	957	+ * violation might have occurred previously, in one or both input
	958	+ * relations, in which case we must have previously decided that
	959	+ * it was OK to commute some other SJ with this one. If we need
	960	+ * to perform this join to finish building up the RHS, rejecting
	961	+ * it could lead to not finding any plan at all. (This can occur
	962	+ * because of the heuristics elsewhere in this file that postpone
	963	+ * clauseless joins: we might not consider doing a clauseless join
	964	+ * within the RHS until after we've performed other, validly
	965	+ * commutable SJs with one or both sides of the clauseless join.)
	966	+ * This consideration boils down to the rule that if both inputs
	967	+ * overlap the RHS, we can allow the join --- they are either
	968	+ * fully within the RHS, or represent previously-allowed joins to
	969	+ * rels outside it.
982	970	*/
983		- if (sjinfo->jointype != JOIN_SEMI &&
984		- bms_overlap(rel1->relids, sjinfo->min_righthand) &&
	971	+ if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
985	972	bms_overlap(rel2->relids, sjinfo->min_righthand))
986		- {
987		- /* seems OK */
988		- Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
989		- }
990		- else
991		- is_valid_inner = false;
	973	+ continue; /* assume valid previous violation of RHS */
	974	+
	975	+ /*
	976	+ * The proposed join could still be legal, but only if we're
	977	+ * allowed to associate it into the RHS of this SJ. That means
	978	+ * this SJ must be a LEFT join (not SEMI or ANTI, and certainly
	979	+ * not FULL) and the proposed join must not overlap the LHS.
	980	+ */
	981	+ if (sjinfo->jointype != JOIN_LEFT \|\|
	982	+ bms_overlap(joinrelids, sjinfo->min_lefthand))
	983	+ return false; /* invalid join path */
	984	+
	985	+ /*
	986	+ * To be valid, the proposed join must be a LEFT join; otherwise
	987	+ * it can't associate into this SJ's RHS. But we may not yet have
	988	+ * found the SpecialJoinInfo matching the proposed join, so we
	989	+ * can't test that yet. Remember the requirement for later.
	990	+ */
	991	+ must_be_leftjoin = true;
992	992	}
993	993	}
994	994
995	995	/*
996		- * Fail if violated some SJ's RHS and didn't match to another SJ. However,
997		- * "matching" to a semijoin we are implementing by unique-ification
998		- * doesn't count (think: it's really an inner join).
	996	+ * Fail if violated any SJ's RHS and didn't match to a LEFT SJ: the
	997	+ * proposed join can't associate into an SJ's RHS.
	998	+ *
	999	+ * Also, fail if the proposed join's predicate isn't strict; we're
	1000	+ * essentially checking to see if we can apply outer-join identity 3, and
	1001	+ * that's a requirement. (This check may be redundant with checks in
	1002	+ * make_outerjoininfo, but I'm not quite sure, and it's cheap to test.)
999	1003	*/
1000		- if (!is_valid_inner &&
1001		- (match_sjinfo == NULL \|\| unique_ified))
	1004	+ if (must_be_leftjoin &&
	1005	+ (match_sjinfo == NULL \|\|
	1006	+ match_sjinfo->jointype != JOIN_LEFT \|\|
	1007	+ !match_sjinfo->lhs_strict))
1002	1008	return false; /* invalid join path */
1003	1009
1004	1010	/*
1005	1011	* We also have to check for constraints imposed by LATERAL references.
1006		- * The proposed rels could each contain lateral references to the other,
1007		- * in which case the join is impossible. If there are lateral references
1008		- * in just one direction, then the join has to be done with a nestloop
1009		- * with the lateral referencer on the inside. If the join matches an SJ
1010		- * that cannot be implemented by such a nestloop, the join is impossible.
1011	1012	*/
1012		- lateral_fwd = lateral_rev = false;
1013		- foreach(l, root->lateral_info_list)
	1013	+ if (root->hasLateralRTEs)
1014	1014	{
1015		- LateralJoinInfo ljinfo = (LateralJoinInfo ) lfirst(l);
	1015	+ bool lateral_fwd;
	1016	+ bool lateral_rev;
	1017	+ Relids join_lateral_rels;
1016	1018
1017		- if (bms_is_subset(ljinfo->lateral_rhs, rel2->relids) &&
1018		- bms_overlap(ljinfo->lateral_lhs, rel1->relids))
	1019	+ /*
	1020	+ * The proposed rels could each contain lateral references to the
	1021	+ * other, in which case the join is impossible. If there are lateral
	1022	+ * references in just one direction, then the join has to be done with
	1023	+ * a nestloop with the lateral referencer on the inside. If the join
	1024	+ * matches an SJ that cannot be implemented by such a nestloop, the
	1025	+ * join is impossible.
	1026	+ *
	1027	+ * Also, if the lateral reference is only indirect, we should reject
	1028	+ * the join; whatever rel(s) the reference chain goes through must be
	1029	+ * joined to first.
	1030	+ *
	1031	+ * Another case that might keep us from building a valid plan is the
	1032	+ * implementation restriction described by have_dangerous_phv().
	1033	+ */
	1034	+ lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids);
	1035	+ lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids);
	1036	+ if (lateral_fwd && lateral_rev)
	1037	+ return false; /* have lateral refs in both directions */
	1038	+ if (lateral_fwd)
1019	1039	{
1020	1040	/* has to be implemented as nestloop with rel1 on left */
1021		- if (lateral_rev)
1022		- return false; /* have lateral refs in both directions */
1023		- lateral_fwd = true;
1024		- if (!bms_is_subset(ljinfo->lateral_lhs, rel1->relids))
1025		- return false; /* rel1 can't compute the required parameter */
1026	1041	if (match_sjinfo &&
1027		- (reversed \|\| match_sjinfo->jointype == JOIN_FULL))
	1042	+ (reversed \|\|
	1043	+ unique_ified \|\|
	1044	+ match_sjinfo->jointype == JOIN_FULL))
1028	1045	return false; /* not implementable as nestloop */
	1046	+ /* check there is a direct reference from rel2 to rel1 */
	1047	+ if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids))
	1048	+ return false; /* only indirect refs, so reject */
	1049	+ /* check we won't have a dangerous PHV */
	1050	+ if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids))
	1051	+ return false; /* might be unable to handle required PHV */
1029	1052	}
1030		- if (bms_is_subset(ljinfo->lateral_rhs, rel1->relids) &&
1031		- bms_overlap(ljinfo->lateral_lhs, rel2->relids))
	1053	+ else if (lateral_rev)
1032	1054	{
1033	1055	/* has to be implemented as nestloop with rel2 on left */
1034		- if (lateral_fwd)
1035		- return false; /* have lateral refs in both directions */
1036		- lateral_rev = true;
1037		- if (!bms_is_subset(ljinfo->lateral_lhs, rel2->relids))
1038		- return false; /* rel2 can't compute the required parameter */
1039	1056	if (match_sjinfo &&
1040		- (!reversed \|\| match_sjinfo->jointype == JOIN_FULL))
	1057	+ (!reversed \|\|
	1058	+ unique_ified \|\|
	1059	+ match_sjinfo->jointype == JOIN_FULL))
1041	1060	return false; /* not implementable as nestloop */
	1061	+ /* check there is a direct reference from rel1 to rel2 */
	1062	+ if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids))
	1063	+ return false; /* only indirect refs, so reject */
	1064	+ /* check we won't have a dangerous PHV */
	1065	+ if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids))
	1066	+ return false; /* might be unable to handle required PHV */
	1067	+ }
	1068	+
	1069	+ /*
	1070	+ * LATERAL references could also cause problems later on if we accept
	1071	+ * this join: if the join's minimum parameterization includes any rels
	1072	+ * that would have to be on the inside of an outer join with this join
	1073	+ * rel, then it's never going to be possible to build the complete
	1074	+ * query using this join. We should reject this join not only because
	1075	+ * it'll save work, but because if we don't, the clauseless-join
	1076	+ * heuristics might think that legality of this join means that some
	1077	+ * other join rel need not be formed, and that could lead to failure
	1078	+ * to find any plan at all. We have to consider not only rels that
	1079	+ * are directly on the inner side of an OJ with the joinrel, but also
	1080	+ * ones that are indirectly so, so search to find all such rels.
	1081	+ */
	1082	+ join_lateral_rels = min_join_parameterization(root, joinrelids,
	1083	+ rel1, rel2);
	1084	+ if (join_lateral_rels)
	1085	+ {
	1086	+ Relids join_plus_rhs = bms_copy(joinrelids);
	1087	+ bool more;
	1088	+
	1089	+ do
	1090	+ {
	1091	+ more = false;
	1092	+ foreach(l, root->join_info_list)
	1093	+ {
	1094	+ SpecialJoinInfo sjinfo = (SpecialJoinInfo ) lfirst(l);
	1095	+
	1096	+ if (bms_overlap(sjinfo->min_lefthand, join_plus_rhs) &&
	1097	+ !bms_is_subset(sjinfo->min_righthand, join_plus_rhs))
	1098	+ {
	1099	+ join_plus_rhs = bms_add_members(join_plus_rhs,
	1100	+ sjinfo->min_righthand);
	1101	+ more = true;
	1102	+ }
	1103	+ /* full joins constrain both sides symmetrically */
	1104	+ if (sjinfo->jointype == JOIN_FULL &&
	1105	+ bms_overlap(sjinfo->min_righthand, join_plus_rhs) &&
	1106	+ !bms_is_subset(sjinfo->min_lefthand, join_plus_rhs))
	1107	+ {
	1108	+ join_plus_rhs = bms_add_members(join_plus_rhs,
	1109	+ sjinfo->min_lefthand);
	1110	+ more = true;
	1111	+ }
	1112	+ }
	1113	+ } while (more);
	1114	+ if (bms_overlap(join_plus_rhs, join_lateral_rels))
	1115	+ return false; /* will not be able to join to some RHS rel */
1042	1116	}
1043	1117	}
1044	1118

		@@ -1052,7 +1126,7 @@ join_is_legal(PlannerInfo root, RelOptInfo rel1, RelOptInfo *rel2,
1052	1126	* has_join_restriction
1053	1127	* Detect whether the specified relation has join-order restrictions,
1054	1128	* due to being inside an outer join or an IN (sub-SELECT),
1055		- * or participating in any LATERAL references.
	1129	+ * or participating in any LATERAL references or multi-rel PHVs.
1056	1130	*
1057	1131	* Essentially, this tests whether have_join_order_restriction() could
1058	1132	* succeed with this rel and some other one. It's OK if we sometimes

		@@ -1064,12 +1138,15 @@ has_join_restriction(PlannerInfo root, RelOptInfo rel)
1064	1138	{
1065	1139	ListCell *l;
1066	1140
1067		- foreach(l, root->lateral_info_list)
	1141	+ if (rel->lateral_relids != NULL \|\| rel->lateral_referencers != NULL)
	1142	+ return true;
	1143	+
	1144	+ foreach(l, root->placeholder_list)
1068	1145	{
1069		- LateralJoinInfo ljinfo = (LateralJoinInfo ) lfirst(l);
	1146	+ PlaceHolderInfo phinfo = (PlaceHolderInfo ) lfirst(l);
1070	1147
1071		- if (bms_is_subset(ljinfo->lateral_rhs, rel->relids) \|\|
1072		- bms_overlap(ljinfo->lateral_lhs, rel->relids))
	1148	+ if (bms_is_subset(rel->relids, phinfo->ph_eval_at) &&
	1149	+ !bms_equal(rel->relids, phinfo->ph_eval_at))
1073	1150	return true;
1074	1151	}
1075	1152

--- a/expected/pg_hint_plan.out

+++ b/expected/pg_hint_plan.out

		@@ -3016,7 +3016,7 @@ error hint:
3016	3016	-> Seq Scan on t2
3017	3017	(5 rows)
3018	3018
3019		--- inherite table test
	3019	+-- inheritance tables test
3020	3020	SET constraint_exclusion TO off;
3021	3021	EXPLAIN (COSTS false) SELECT * FROM p1 WHERE id >= 50 AND id <= 51 AND p1.ctid = '(1,1)';
3022	3022	QUERY PLAN

		@@ -7983,7 +7983,7 @@ duplication hint:
7983	7983	error hint:
7984	7984
7985	7985	CONTEXT: SQL statement "/+ SeqScan(t1) / SELECT * FROM t1"
7986		-PL/pgSQL function testfunc() line 3 at EXECUTE statement
	7986	+PL/pgSQL function testfunc() line 3 at EXECUTE
7987	7987	testfunc
7988	7988	----------
7989	7989

--- a/pg_hint_plan--1.1.2.sql

+++ b/pg_hint_plan--1.1.3.sql

		@@ -1,4 +1,4 @@
1		-/* pg_hint_plan/pg_hint_plan--1.1.2.sql */
	1	+/* pg_hint_plan/pg_hint_plan--1.1.3.sql */
2	2
3	3	-- complain if script is sourced in psql, rather than via CREATE EXTENSION
4	4	\echo Use "CREATE EXTENSION pg_hint_plan" to load this file. \quit

--- a/pg_hint_plan.c

+++ b/pg_hint_plan.c

		@@ -3785,6 +3785,8 @@ rebuild_scan_path(HintState hstate, PlannerInfo root, int level,
3785	3785	{
3786	3786	set_plain_rel_pathlist(root, rel, rte);
3787	3787	}
	3788	+
	3789	+ set_cheapest(rel);
3788	3790	}
3789	3791
3790	3792	/*

--- a/pg_hint_plan.control

+++ b/pg_hint_plan.control

		@@ -1,6 +1,6 @@
1	1	# pg_hint_plan extension
2	2
3	3	comment = ''
4		-default_version = '1.1.2'
	4	+default_version = '1.1.3'
5	5	relocatable = false
6	6	schema = hint_plan

--- a/pg_stat_statements.c

+++ b/pg_stat_statements.c

		@@ -1,2148 +1,28 @@
1	1	/*-------------------------------------------------------------------------
2	2	*
3	3	* pg_stat_statements.c
4		- * Track statement execution times across a whole database cluster.
	4	+ *
	5	+ * Part of pg_stat_statements.c in PostgreSQL 9.5.
5	6	*
6		- * Execution costs are totalled for each distinct source query, and kept in
7		- * a shared hashtable. (We track only as many distinct queries as will fit
8		- * in the designated amount of shared memory.)
	7	+ * Copyright (c) 2008-2015, PostgreSQL Global Development Group
9	8	*
10		- * As of Postgres 9.2, this module normalizes query entries. Normalization
11		- * is a process whereby similar queries, typically differing only in their
12		- * constants (though the exact rules are somewhat more subtle than that) are
13		- * recognized as equivalent, and are tracked as a single entry. This is
14		- * particularly useful for non-prepared queries.
15		- *
16		- * Normalization is implemented by fingerprinting queries, selectively
17		- * serializing those fields of each query tree's nodes that are judged to be
18		- * essential to the query. This is referred to as a query jumble. This is
19		- * distinct from a regular serialization in that various extraneous
20		- * information is ignored as irrelevant or not essential to the query, such
21		- * as the collations of Vars and, most notably, the values of constants.
22		- *
23		- * This jumble is acquired at the end of parse analysis of each query, and
24		- * a 32-bit hash of it is stored into the query's Query.queryId field.
25		- * The server then copies this value around, making it available in plan
26		- * tree(s) generated from the query. The executor can then use this value
27		- * to blame query costs on the proper queryId.
28		- *
29		- * To facilitate presenting entries to users, we create "representative" query
30		- * strings in which constants are replaced with '?' characters, to make it
31		- * clearer what a normalized entry can represent. To save on shared memory,
32		- * and to avoid having to truncate oversized query strings, we store these
33		- * strings in a temporary external query-texts file. Offsets into this
34		- * file are kept in shared memory.
35		- *
36		- * Note about locking issues: to create or delete an entry in the shared
37		- * hashtable, one must hold pgss->lock exclusively. Modifying any field
38		- * in an entry except the counters requires the same. To look up an entry,
39		- * one must hold the lock shared. To read or update the counters within
40		- * an entry, one must hold the lock shared or exclusive (so the entry doesn't
41		- * disappear!) and also take the entry's mutex spinlock.
42		- * The shared state variable pgss->extent (the next free spot in the external
43		- * query-text file) should be accessed only while holding either the
44		- * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
45		- * allow reserving file space while holding only shared lock on pgss->lock.
46		- * Rewriting the entire external query-text file, eg for garbage collection,
47		- * requires holding pgss->lock exclusively; this allows individual entries
48		- * in the file to be read or written while holding only shared lock.
49		- *
50		- *
51		- * Copyright (c) 2008-2014, PostgreSQL Global Development Group
52		- *
53		- * IDENTIFICATION
54		- * contrib/pg_stat_statements/pg_stat_statements.c
55		- *
56		- *-------------------------------------------------------------------------
57		- */
58		-#include "postgres.h"
59		-
60		-#include <sys/stat.h>
61		-
62		-#ifdef NOT_USED
63		-#include <unistd.h>
64		-#endif
65		-
66		-#include "access/hash.h"
67		-#ifdef NOT_USED
68		-#include "executor/instrument.h"
69		-#include "funcapi.h"
70		-#include "mb/pg_wchar.h"
71		-#include "miscadmin.h"
72		-#include "parser/analyze.h"
73		-#include "parser/parsetree.h"
74		-#endif
75		-#include "parser/scanner.h"
76		-#ifdef NOT_USED
77		-#include "pgstat.h"
78		-#include "storage/fd.h"
79		-#include "storage/ipc.h"
80		-#include "storage/spin.h"
81		-#include "tcop/utility.h"
82		-#include "utils/builtins.h"
83		-#include "utils/memutils.h"
84		-
85		-PG_MODULE_MAGIC;
86		-
87		-/* Location of permanent stats file (valid when database is shut down) */
88		-#define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
89		-
90		-/*
91		- * Location of external query text file. We don't keep it in the core
92		- * system's stats_temp_directory. The core system can safely use that GUC
93		- * setting, because the statistics collector temp file paths are set only once
94		- * as part of changing the GUC, but pg_stat_statements has no way of avoiding
95		- * race conditions. Besides, we only expect modest, infrequent I/O for query
96		- * strings, so placing the file on a faster filesystem is not compelling.
97		- */
98		-#define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
99		-
100		-/* Magic number identifying the stats file format */
101		-static const uint32 PGSS_FILE_HEADER = 0x20140125;
102		-
103		-/* PostgreSQL major version number, changes in which invalidate all entries */
104		-static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
105		-
106		-/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
107		-#define USAGE_EXEC(duration) (1.0)
108		-#define USAGE_INIT (1.0) /* including initial planning */
109		-#define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
110		-#define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
111		-#define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
112		-#define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
113		-#define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
114		-
115		-#define JUMBLE_SIZE 1024 /* query serialization buffer size */
116		-
117		-/*
118		- * Extension version number, for supporting older extension versions' objects
119		- */
120		-typedef enum pgssVersion
121		-{
122		- PGSS_V1_0 = 0,
123		- PGSS_V1_1,
124		- PGSS_V1_2
125		-} pgssVersion;
126		-
127		-/*
128		- * Hashtable key that defines the identity of a hashtable entry. We separate
129		- * queries by user and by database even if they are otherwise identical.
130		- */
131		-typedef struct pgssHashKey
132		-{
133		- Oid userid; /* user OID */
134		- Oid dbid; /* database OID */
135		- uint32 queryid; /* query identifier */
136		-} pgssHashKey;
137		-
138		-/*
139		- * The actual stats counters kept within pgssEntry.
140		- */
141		-typedef struct Counters
142		-{
143		- int64 calls; /* # of times executed */
144		- double total_time; /* total execution time, in msec */
145		- int64 rows; /* total # of retrieved or affected rows */
146		- int64 shared_blks_hit; /* # of shared buffer hits */
147		- int64 shared_blks_read; /* # of shared disk blocks read */
148		- int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
149		- int64 shared_blks_written; /* # of shared disk blocks written */
150		- int64 local_blks_hit; /* # of local buffer hits */
151		- int64 local_blks_read; /* # of local disk blocks read */
152		- int64 local_blks_dirtied; /* # of local disk blocks dirtied */
153		- int64 local_blks_written; /* # of local disk blocks written */
154		- int64 temp_blks_read; /* # of temp blocks read */
155		- int64 temp_blks_written; /* # of temp blocks written */
156		- double blk_read_time; /* time spent reading, in msec */
157		- double blk_write_time; /* time spent writing, in msec */
158		- double usage; /* usage factor */
159		-} Counters;
160		-
161		-/*
162		- * Statistics per statement
163		- *
164		- * Note: in event of a failure in garbage collection of the query text file,
165		- * we reset query_offset to zero and query_len to -1. This will be seen as
166		- * an invalid state by qtext_fetch().
167		- */
168		-typedef struct pgssEntry
169		-{
170		- pgssHashKey key; /* hash key of entry - MUST BE FIRST */
171		- Counters counters; /* the statistics for this query */
172		- Size query_offset; /* query text offset in external file */
173		- int query_len; /* # of valid bytes in query string */
174		- int encoding; /* query text encoding */
175		- slock_t mutex; /* protects the counters only */
176		-} pgssEntry;
177		-
178		-/*
179		- * Global shared state
180		- */
181		-typedef struct pgssSharedState
182		-{
183		- LWLock lock; / protects hashtable search/modification */
184		- double cur_median_usage; /* current median usage in hashtable */
185		- Size mean_query_len; /* current mean entry text length */
186		- slock_t mutex; /* protects following fields only: */
187		- Size extent; /* current extent of query file */
188		- int n_writers; /* number of active writers to query file */
189		- int gc_count; /* query file garbage collection cycle count */
190		-} pgssSharedState;
191		-
192		-/*
193		- * Struct for tracking locations/lengths of constants during normalization
194		- */
195		-typedef struct pgssLocationLen
196		-{
197		- int location; /* start offset in query text */
198		- int length; /* length in bytes, or -1 to ignore */
199		-} pgssLocationLen;
200		-
201		-/*
202		- * Working state for computing a query jumble and producing a normalized
203		- * query string
204		- */
205		-typedef struct pgssJumbleState
206		-{
207		- /* Jumble of current query tree */
208		- unsigned char *jumble;
209		-
210		- /* Number of bytes used in jumble[] */
211		- Size jumble_len;
212		-
213		- /* Array of locations of constants that should be removed */
214		- pgssLocationLen *clocations;
215		-
216		- /* Allocated length of clocations array */
217		- int clocations_buf_size;
218		-
219		- /* Current number of valid entries in clocations array */
220		- int clocations_count;
221		-} pgssJumbleState;
222		-
223		-/---- Local variables ----/
224		-
225		-/* Current nesting depth of ExecutorRun+ProcessUtility calls */
226		-static int nested_level = 0;
227		-
228		-/* Saved hook values in case of unload */
229		-static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
230		-static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
231		-static ExecutorStart_hook_type prev_ExecutorStart = NULL;
232		-static ExecutorRun_hook_type prev_ExecutorRun = NULL;
233		-static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
234		-static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
235		-static ProcessUtility_hook_type prev_ProcessUtility = NULL;
236		-
237		-/* Links to shared memory state */
238		-static pgssSharedState *pgss = NULL;
239		-static HTAB *pgss_hash = NULL;
240		-
241		-/---- GUC variables ----/
242		-
243		-typedef enum
244		-{
245		- PGSS_TRACK_NONE, /* track no statements */
246		- PGSS_TRACK_TOP, /* only top level statements */
247		- PGSS_TRACK_ALL /* all statements, including nested ones */
248		-} PGSSTrackLevel;
249		-
250		-static const struct config_enum_entry track_options[] =
251		-{
252		- {"none", PGSS_TRACK_NONE, false},
253		- {"top", PGSS_TRACK_TOP, false},
254		- {"all", PGSS_TRACK_ALL, false},
255		- {NULL, 0, false}
256		-};
257		-
258		-static int pgss_max; /* max # statements to track */
259		-static int pgss_track; /* tracking level */
260		-static bool pgss_track_utility; /* whether to track utility commands */
261		-static bool pgss_save; /* whether to save stats across shutdown */
262		-
263		-
264		-#define pgss_enabled() \
265		- (pgss_track == PGSS_TRACK_ALL \|\| \
266		- (pgss_track == PGSS_TRACK_TOP && nested_level == 0))
267		-
268		-#define record_gc_qtexts() \
269		- do { \
270		- volatile pgssSharedState s = (volatile pgssSharedState ) pgss; \
271		- SpinLockAcquire(&s->mutex); \
272		- s->gc_count++; \
273		- SpinLockRelease(&s->mutex); \
274		- } while(0)
275		-
276		-/---- Function declarations ----/
277		-
278		-void _PG_init(void);
279		-void _PG_fini(void);
280		-
281		-PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
282		-PG_FUNCTION_INFO_V1(pg_stat_statements_1_2);
283		-PG_FUNCTION_INFO_V1(pg_stat_statements);
284		-
285		-static void pgss_shmem_startup(void);
286		-static void pgss_shmem_shutdown(int code, Datum arg);
287		-static void pgss_post_parse_analyze(ParseState pstate, Query query);
288		-static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
289		-static void pgss_ExecutorRun(QueryDesc *queryDesc,
290		- ScanDirection direction,
291		- long count);
292		-static void pgss_ExecutorFinish(QueryDesc *queryDesc);
293		-static void pgss_ExecutorEnd(QueryDesc *queryDesc);
294		-static void pgss_ProcessUtility(Node parsetree, const char queryString,
295		- ProcessUtilityContext context, ParamListInfo params,
296		- DestReceiver dest, char completionTag);
297		-static uint32 pgss_hash_fn(const void *key, Size keysize);
298		-static int pgss_match_fn(const void key1, const void key2, Size keysize);
299		-static uint32 pgss_hash_string(const char *str);
300		-static void pgss_store(const char *query, uint32 queryId,
301		- double total_time, uint64 rows,
302		- const BufferUsage *bufusage,
303		- pgssJumbleState *jstate);
304		-static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
305		- pgssVersion api_version,
306		- bool showtext);
307		-static Size pgss_memsize(void);
308		-static pgssEntry entry_alloc(pgssHashKey key, Size query_offset, int query_len,
309		- int encoding, bool sticky);
310		-static void entry_dealloc(void);
311		-static bool qtext_store(const char *query, int query_len,
312		- Size query_offset, int gc_count);
313		-static char qtext_load_file(Size buffer_size);
314		-static char *qtext_fetch(Size query_offset, int query_len,
315		- char *buffer, Size buffer_size);
316		-static bool need_gc_qtexts(void);
317		-static void gc_qtexts(void);
318		-static void entry_reset(void);
319		-#endif
320		-static void AppendJumble(pgssJumbleState *jstate,
321		- const unsigned char *item, Size size);
322		-static void JumbleQuery(pgssJumbleState jstate, Query query);
323		-static void JumbleRangeTable(pgssJumbleState jstate, List rtable);
324		-static void JumbleExpr(pgssJumbleState jstate, Node node);
325		-static void RecordConstLocation(pgssJumbleState *jstate, int location);
326		-#ifdef NOT_USED
327		-static char generate_normalized_query(pgssJumbleState jstate, const char *query,
328		- int *query_len_p, int encoding);
329		-#endif
330		-static void fill_in_constant_lengths(pgssJumbleState jstate, const char query);
331		-static int comp_location(const void a, const void b);
332		-
333		-
334		-#ifdef NOT_USED
335		-/*
336		- * Module load callback
337		- */
338		-void
339		-_PG_init(void)
340		-{
341		- /*
342		- * In order to create our shared memory area, we have to be loaded via
343		- * shared_preload_libraries. If not, fall out without hooking into any of
344		- * the main system. (We don't throw error here because it seems useful to
345		- * allow the pg_stat_statements functions to be created even when the
346		- * module isn't active. The functions must protect themselves against
347		- * being called then, however.)
348		- */
349		- if (!process_shared_preload_libraries_in_progress)
350		- return;
351		-
352		- /*
353		- * Define (or redefine) custom GUC variables.
354		- */
355		- DefineCustomIntVariable("pg_stat_statements.max",
356		- "Sets the maximum number of statements tracked by pg_stat_statements.",
357		- NULL,
358		- &pgss_max,
359		- 5000,
360		- 100,
361		- INT_MAX,
362		- PGC_POSTMASTER,
363		- 0,
364		- NULL,
365		- NULL,
366		- NULL);
367		-
368		- DefineCustomEnumVariable("pg_stat_statements.track",
369		- "Selects which statements are tracked by pg_stat_statements.",
370		- NULL,
371		- &pgss_track,
372		- PGSS_TRACK_TOP,
373		- track_options,
374		- PGC_SUSET,
375		- 0,
376		- NULL,
377		- NULL,
378		- NULL);
379		-
380		- DefineCustomBoolVariable("pg_stat_statements.track_utility",
381		- "Selects whether utility commands are tracked by pg_stat_statements.",
382		- NULL,
383		- &pgss_track_utility,
384		- true,
385		- PGC_SUSET,
386		- 0,
387		- NULL,
388		- NULL,
389		- NULL);
390		-
391		- DefineCustomBoolVariable("pg_stat_statements.save",
392		- "Save pg_stat_statements statistics across server shutdowns.",
393		- NULL,
394		- &pgss_save,
395		- true,
396		- PGC_SIGHUP,
397		- 0,
398		- NULL,
399		- NULL,
400		- NULL);
401		-
402		- EmitWarningsOnPlaceholders("pg_stat_statements");
403		-
404		- /*
405		- * Request additional shared resources. (These are no-ops if we're not in
406		- * the postmaster process.) We'll allocate or attach to the shared
407		- * resources in pgss_shmem_startup().
408		- */
409		- RequestAddinShmemSpace(pgss_memsize());
410		- RequestAddinLWLocks(1);
411		-
412		- /*
413		- * Install hooks.
414		- */
415		- prev_shmem_startup_hook = shmem_startup_hook;
416		- shmem_startup_hook = pgss_shmem_startup;
417		- prev_post_parse_analyze_hook = post_parse_analyze_hook;
418		- post_parse_analyze_hook = pgss_post_parse_analyze;
419		- prev_ExecutorStart = ExecutorStart_hook;
420		- ExecutorStart_hook = pgss_ExecutorStart;
421		- prev_ExecutorRun = ExecutorRun_hook;
422		- ExecutorRun_hook = pgss_ExecutorRun;
423		- prev_ExecutorFinish = ExecutorFinish_hook;
424		- ExecutorFinish_hook = pgss_ExecutorFinish;
425		- prev_ExecutorEnd = ExecutorEnd_hook;
426		- ExecutorEnd_hook = pgss_ExecutorEnd;
427		- prev_ProcessUtility = ProcessUtility_hook;
428		- ProcessUtility_hook = pgss_ProcessUtility;
429		-}
430		-
431		-/*
432		- * Module unload callback
433		- */
434		-void
435		-_PG_fini(void)
436		-{
437		- /* Uninstall hooks. */
438		- shmem_startup_hook = prev_shmem_startup_hook;
439		- post_parse_analyze_hook = prev_post_parse_analyze_hook;
440		- ExecutorStart_hook = prev_ExecutorStart;
441		- ExecutorRun_hook = prev_ExecutorRun;
442		- ExecutorFinish_hook = prev_ExecutorFinish;
443		- ExecutorEnd_hook = prev_ExecutorEnd;
444		- ProcessUtility_hook = prev_ProcessUtility;
445		-}
446		-
447		-/*
448		- * shmem_startup hook: allocate or attach to shared memory,
449		- * then load any pre-existing statistics from file.
450		- * Also create and load the query-texts file, which is expected to exist
451		- * (even if empty) while the module is enabled.
452		- */
453		-static void
454		-pgss_shmem_startup(void)
455		-{
456		- bool found;
457		- HASHCTL info;
458		- FILE *file = NULL;
459		- FILE *qfile = NULL;
460		- uint32 header;
461		- int32 num;
462		- int32 pgver;
463		- int32 i;
464		- int buffer_size;
465		- char *buffer = NULL;
466		-
467		- if (prev_shmem_startup_hook)
468		- prev_shmem_startup_hook();
469		-
470		- /* reset in case this is a restart within the postmaster */
471		- pgss = NULL;
472		- pgss_hash = NULL;
473		-
474		- /*
475		- * Create or attach to the shared memory state, including hash table
476		- */
477		- LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
478		-
479		- pgss = ShmemInitStruct("pg_stat_statements",
480		- sizeof(pgssSharedState),
481		- &found);
482		-
483		- if (!found)
484		- {
485		- /* First time through ... */
486		- pgss->lock = LWLockAssign();
487		- pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
488		- pgss->mean_query_len = ASSUMED_LENGTH_INIT;
489		- SpinLockInit(&pgss->mutex);
490		- pgss->extent = 0;
491		- pgss->n_writers = 0;
492		- pgss->gc_count = 0;
493		- }
494		-
495		- memset(&info, 0, sizeof(info));
496		- info.keysize = sizeof(pgssHashKey);
497		- info.entrysize = sizeof(pgssEntry);
498		- info.hash = pgss_hash_fn;
499		- info.match = pgss_match_fn;
500		- pgss_hash = ShmemInitHash("pg_stat_statements hash",
501		- pgss_max, pgss_max,
502		- &info,
503		- HASH_ELEM \| HASH_FUNCTION \| HASH_COMPARE);
504		-
505		- LWLockRelease(AddinShmemInitLock);
506		-
507		- /*
508		- * If we're in the postmaster (or a standalone backend...), set up a shmem
509		- * exit hook to dump the statistics to disk.
510		- */
511		- if (!IsUnderPostmaster)
512		- on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
513		-
514		- /*
515		- * Done if some other process already completed our initialization.
516		- */
517		- if (found)
518		- return;
519		-
520		- /*
521		- * Note: we don't bother with locks here, because there should be no other
522		- * processes running when this code is reached.
523		- */
524		-
525		- /* Unlink query text file possibly left over from crash */
526		- unlink(PGSS_TEXT_FILE);
527		-
528		- /* Allocate new query text temp file */
529		- qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
530		- if (qfile == NULL)
531		- goto write_error;
532		-
533		- /*
534		- * If we were told not to load old statistics, we're done. (Note we do
535		- * not try to unlink any old dump file in this case. This seems a bit
536		- * questionable but it's the historical behavior.)
537		- */
538		- if (!pgss_save)
539		- {
540		- FreeFile(qfile);
541		- return;
542		- }
543		-
544		- /*
545		- * Attempt to load old statistics from the dump file.
546		- */
547		- file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
548		- if (file == NULL)
549		- {
550		- if (errno != ENOENT)
551		- goto read_error;
552		- /* No existing persisted stats file, so we're done */
553		- FreeFile(qfile);
554		- return;
555		- }
556		-
557		- buffer_size = 2048;
558		- buffer = (char *) palloc(buffer_size);
559		-
560		- if (fread(&header, sizeof(uint32), 1, file) != 1 \|\|
561		- fread(&pgver, sizeof(uint32), 1, file) != 1 \|\|
562		- fread(&num, sizeof(int32), 1, file) != 1)
563		- goto read_error;
564		-
565		- if (header != PGSS_FILE_HEADER \|\|
566		- pgver != PGSS_PG_MAJOR_VERSION)
567		- goto data_error;
568		-
569		- for (i = 0; i < num; i++)
570		- {
571		- pgssEntry temp;
572		- pgssEntry *entry;
573		- Size query_offset;
574		-
575		- if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
576		- goto read_error;
577		-
578		- /* Encoding is the only field we can easily sanity-check */
579		- if (!PG_VALID_BE_ENCODING(temp.encoding))
580		- goto data_error;
581		-
582		- /* Resize buffer as needed */
583		- if (temp.query_len >= buffer_size)
584		- {
585		- buffer_size = Max(buffer_size * 2, temp.query_len + 1);
586		- buffer = repalloc(buffer, buffer_size);
587		- }
588		-
589		- if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
590		- goto read_error;
591		-
592		- /* Should have a trailing null, but let's make sure */
593		- buffer[temp.query_len] = '\0';
594		-
595		- /* Skip loading "sticky" entries */
596		- if (temp.counters.calls == 0)
597		- continue;
598		-
599		- /* Store the query text */
600		- query_offset = pgss->extent;
601		- if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
602		- goto write_error;
603		- pgss->extent += temp.query_len + 1;
604		-
605		- /* make the hashtable entry (discards old entries if too many) */
606		- entry = entry_alloc(&temp.key, query_offset, temp.query_len,
607		- temp.encoding,
608		- false);
609		-
610		- /* copy in the actual stats */
611		- entry->counters = temp.counters;
612		- }
613		-
614		- pfree(buffer);
615		- FreeFile(file);
616		- FreeFile(qfile);
617		-
618		- /*
619		- * Remove the persisted stats file so it's not included in
620		- * backups/replication slaves, etc. A new file will be written on next
621		- * shutdown.
622		- *
623		- * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
624		- * because we remove that file on startup; it acts inversely to
625		- * PGSS_DUMP_FILE, in that it is only supposed to be around when the
626		- * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
627		- * when the server is not running. Leaving the file creates no danger of
628		- * a newly restored database having a spurious record of execution costs,
629		- * which is what we're really concerned about here.
630		- */
631		- unlink(PGSS_DUMP_FILE);
632		-
633		- return;
634		-
635		-read_error:
636		- ereport(LOG,
637		- (errcode_for_file_access(),
638		- errmsg("could not read pg_stat_statement file \"%s\": %m",
639		- PGSS_DUMP_FILE)));
640		- goto fail;
641		-data_error:
642		- ereport(LOG,
643		- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
644		- errmsg("ignoring invalid data in pg_stat_statement file \"%s\"",
645		- PGSS_DUMP_FILE)));
646		- goto fail;
647		-write_error:
648		- ereport(LOG,
649		- (errcode_for_file_access(),
650		- errmsg("could not write pg_stat_statement file \"%s\": %m",
651		- PGSS_TEXT_FILE)));
652		-fail:
653		- if (buffer)
654		- pfree(buffer);
655		- if (file)
656		- FreeFile(file);
657		- if (qfile)
658		- FreeFile(qfile);
659		- /* If possible, throw away the bogus file; ignore any error */
660		- unlink(PGSS_DUMP_FILE);
661		-
662		- /*
663		- * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
664		- * server is running with pg_stat_statements enabled
665		- */
666		-}
667		-
668		-/*
669		- * shmem_shutdown hook: Dump statistics into file.
670		- *
671		- * Note: we don't bother with acquiring lock, because there should be no
672		- * other processes running when this is called.
673		- */
674		-static void
675		-pgss_shmem_shutdown(int code, Datum arg)
676		-{
677		- FILE *file;
678		- char *qbuffer = NULL;
679		- Size qbuffer_size = 0;
680		- HASH_SEQ_STATUS hash_seq;
681		- int32 num_entries;
682		- pgssEntry *entry;
683		-
684		- /* Don't try to dump during a crash. */
685		- if (code)
686		- return;
687		-
688		- /* Safety check ... shouldn't get here unless shmem is set up. */
689		- if (!pgss \|\| !pgss_hash)
690		- return;
691		-
692		- /* Don't dump if told not to. */
693		- if (!pgss_save)
694		- return;
695		-
696		- file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
697		- if (file == NULL)
698		- goto error;
699		-
700		- if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
701		- goto error;
702		- if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
703		- goto error;
704		- num_entries = hash_get_num_entries(pgss_hash);
705		- if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
706		- goto error;
707		-
708		- qbuffer = qtext_load_file(&qbuffer_size);
709		- if (qbuffer == NULL)
710		- goto error;
711		-
712		- /*
713		- * When serializing to disk, we store query texts immediately after their
714		- * entry data. Any orphaned query texts are thereby excluded.
715		- */
716		- hash_seq_init(&hash_seq, pgss_hash);
717		- while ((entry = hash_seq_search(&hash_seq)) != NULL)
718		- {
719		- int len = entry->query_len;
720		- char *qstr = qtext_fetch(entry->query_offset, len,
721		- qbuffer, qbuffer_size);
722		-
723		- if (qstr == NULL)
724		- continue; /* Ignore any entries with bogus texts */
725		-
726		- if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 \|\|
727		- fwrite(qstr, 1, len + 1, file) != len + 1)
728		- {
729		- /* note: we assume hash_seq_term won't change errno */
730		- hash_seq_term(&hash_seq);
731		- goto error;
732		- }
733		- }
734		-
735		- free(qbuffer);
736		- qbuffer = NULL;
737		-
738		- if (FreeFile(file))
739		- {
740		- file = NULL;
741		- goto error;
742		- }
743		-
744		- /*
745		- * Rename file into place, so we atomically replace any old one.
746		- */
747		- if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0)
748		- ereport(LOG,
749		- (errcode_for_file_access(),
750		- errmsg("could not rename pg_stat_statement file \"%s\": %m",
751		- PGSS_DUMP_FILE ".tmp")));
752		-
753		- /* Unlink query-texts file; it's not needed while shutdown */
754		- unlink(PGSS_TEXT_FILE);
755		-
756		- return;
757		-
758		-error:
759		- ereport(LOG,
760		- (errcode_for_file_access(),
761		- errmsg("could not write pg_stat_statement file \"%s\": %m",
762		- PGSS_DUMP_FILE ".tmp")));
763		- if (qbuffer)
764		- free(qbuffer);
765		- if (file)
766		- FreeFile(file);
767		- unlink(PGSS_DUMP_FILE ".tmp");
768		- unlink(PGSS_TEXT_FILE);
769		-}
770		-
771		-/*
772		- * Post-parse-analysis hook: mark query with a queryId
773		- */
774		-static void
775		-pgss_post_parse_analyze(ParseState pstate, Query query)
776		-{
777		- pgssJumbleState jstate;
778		-
779		- if (prev_post_parse_analyze_hook)
780		- prev_post_parse_analyze_hook(pstate, query);
781		-
782		- /* Assert we didn't do this already */
783		- Assert(query->queryId == 0);
784		-
785		- /* Safety check... */
786		- if (!pgss \|\| !pgss_hash)
787		- return;
788		-
789		- /*
790		- * Utility statements get queryId zero. We do this even in cases where
791		- * the statement contains an optimizable statement for which a queryId
792		- * could be derived (such as EXPLAIN or DECLARE CURSOR). For such cases,
793		- * runtime control will first go through ProcessUtility and then the
794		- * executor, and we don't want the executor hooks to do anything, since we
795		- * are already measuring the statement's costs at the utility level.
796		- */
797		- if (query->utilityStmt)
798		- {
799		- query->queryId = 0;
800		- return;
801		- }
802		-
803		- /* Set up workspace for query jumbling */
804		- jstate.jumble = (unsigned char *) palloc(JUMBLE_SIZE);
805		- jstate.jumble_len = 0;
806		- jstate.clocations_buf_size = 32;
807		- jstate.clocations = (pgssLocationLen *)
808		- palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
809		- jstate.clocations_count = 0;
810		-
811		- /* Compute query ID and mark the Query node with it */
812		- JumbleQuery(&jstate, query);
813		- query->queryId = hash_any(jstate.jumble, jstate.jumble_len);
814		-
815		- /*
816		- * If we are unlucky enough to get a hash of zero, use 1 instead, to
817		- * prevent confusion with the utility-statement case.
818		- */
819		- if (query->queryId == 0)
820		- query->queryId = 1;
821		-
822		- /*
823		- * If we were able to identify any ignorable constants, we immediately
824		- * create a hash table entry for the query, so that we can record the
825		- * normalized form of the query string. If there were no such constants,
826		- * the normalized string would be the same as the query text anyway, so
827		- * there's no need for an early entry.
828		- */
829		- if (jstate.clocations_count > 0)
830		- pgss_store(pstate->p_sourcetext,
831		- query->queryId,
832		- 0,
833		- 0,
834		- NULL,
835		- &jstate);
836		-}
837		-
838		-/*
839		- * ExecutorStart hook: start up tracking if needed
840		- */
841		-static void
842		-pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
843		-{
844		- if (prev_ExecutorStart)
845		- prev_ExecutorStart(queryDesc, eflags);
846		- else
847		- standard_ExecutorStart(queryDesc, eflags);
848		-
849		- /*
850		- * If query has queryId zero, don't track it. This prevents double
851		- * counting of optimizable statements that are directly contained in
852		- * utility statements.
853		- */
854		- if (pgss_enabled() && queryDesc->plannedstmt->queryId != 0)
855		- {
856		- /*
857		- * Set up to track total elapsed time in ExecutorRun. Make sure the
858		- * space is allocated in the per-query context so it will go away at
859		- * ExecutorEnd.
860		- */
861		- if (queryDesc->totaltime == NULL)
862		- {
863		- MemoryContext oldcxt;
864		-
865		- oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
866		- queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL);
867		- MemoryContextSwitchTo(oldcxt);
868		- }
869		- }
870		-}
871		-
872		-/*
873		- * ExecutorRun hook: all we need do is track nesting depth
874		- */
875		-static void
876		-pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
877		-{
878		- nested_level++;
879		- PG_TRY();
880		- {
881		- if (prev_ExecutorRun)
882		- prev_ExecutorRun(queryDesc, direction, count);
883		- else
884		- standard_ExecutorRun(queryDesc, direction, count);
885		- nested_level--;
886		- }
887		- PG_CATCH();
888		- {
889		- nested_level--;
890		- PG_RE_THROW();
891		- }
892		- PG_END_TRY();
893		-}
894		-
895		-/*
896		- * ExecutorFinish hook: all we need do is track nesting depth
897		- */
898		-static void
899		-pgss_ExecutorFinish(QueryDesc *queryDesc)
900		-{
901		- nested_level++;
902		- PG_TRY();
903		- {
904		- if (prev_ExecutorFinish)
905		- prev_ExecutorFinish(queryDesc);
906		- else
907		- standard_ExecutorFinish(queryDesc);
908		- nested_level--;
909		- }
910		- PG_CATCH();
911		- {
912		- nested_level--;
913		- PG_RE_THROW();
914		- }
915		- PG_END_TRY();
916		-}
917		-
918		-/*
919		- * ExecutorEnd hook: store results if needed
920		- */
921		-static void
922		-pgss_ExecutorEnd(QueryDesc *queryDesc)
923		-{
924		- uint32 queryId = queryDesc->plannedstmt->queryId;
925		-
926		- if (queryId != 0 && queryDesc->totaltime && pgss_enabled())
927		- {
928		- /*
929		- * Make sure stats accumulation is done. (Note: it's okay if several
930		- * levels of hook all do this.)
931		- */
932		- InstrEndLoop(queryDesc->totaltime);
933		-
934		- pgss_store(queryDesc->sourceText,
935		- queryId,
936		- queryDesc->totaltime->total * 1000.0, /* convert to msec */
937		- queryDesc->estate->es_processed,
938		- &queryDesc->totaltime->bufusage,
939		- NULL);
940		- }
941		-
942		- if (prev_ExecutorEnd)
943		- prev_ExecutorEnd(queryDesc);
944		- else
945		- standard_ExecutorEnd(queryDesc);
946		-}
947		-
948		-/*
949		- * ProcessUtility hook
950		- */
951		-static void
952		-pgss_ProcessUtility(Node parsetree, const char queryString,
953		- ProcessUtilityContext context, ParamListInfo params,
954		- DestReceiver dest, char completionTag)
955		-{
956		- /*
957		- * If it's an EXECUTE statement, we don't track it and don't increment the
958		- * nesting level. This allows the cycles to be charged to the underlying
959		- * PREPARE instead (by the Executor hooks), which is much more useful.
960		- *
961		- * We also don't track execution of PREPARE. If we did, we would get one
962		- * hash table entry for the PREPARE (with hash calculated from the query
963		- * string), and then a different one with the same query string (but hash
964		- * calculated from the query tree) would be used to accumulate costs of
965		- * ensuing EXECUTEs. This would be confusing, and inconsistent with other
966		- * cases where planning time is not included at all.
967		- *
968		- * Likewise, we don't track execution of DEALLOCATE.
969		- */
970		- if (pgss_track_utility && pgss_enabled() &&
971		- !IsA(parsetree, ExecuteStmt) &&
972		- !IsA(parsetree, PrepareStmt) &&
973		- !IsA(parsetree, DeallocateStmt))
974		- {
975		- instr_time start;
976		- instr_time duration;
977		- uint64 rows;
978		- BufferUsage bufusage_start,
979		- bufusage;
980		- uint32 queryId;
981		-
982		- bufusage_start = pgBufferUsage;
983		- INSTR_TIME_SET_CURRENT(start);
984		-
985		- nested_level++;
986		- PG_TRY();
987		- {
988		- if (prev_ProcessUtility)
989		- prev_ProcessUtility(parsetree, queryString,
990		- context, params,
991		- dest, completionTag);
992		- else
993		- standard_ProcessUtility(parsetree, queryString,
994		- context, params,
995		- dest, completionTag);
996		- nested_level--;
997		- }
998		- PG_CATCH();
999		- {
1000		- nested_level--;
1001		- PG_RE_THROW();
1002		- }
1003		- PG_END_TRY();
1004		-
1005		- INSTR_TIME_SET_CURRENT(duration);
1006		- INSTR_TIME_SUBTRACT(duration, start);
1007		-
1008		- /* parse command tag to retrieve the number of affected rows. */
1009		- if (completionTag &&
1010		- strncmp(completionTag, "COPY ", 5) == 0)
1011		- {
1012		-#ifdef HAVE_STRTOULL
1013		- rows = strtoull(completionTag + 5, NULL, 10);
1014		-#else
1015		- rows = strtoul(completionTag + 5, NULL, 10);
1016		-#endif
1017		- }
1018		- else
1019		- rows = 0;
1020		-
1021		- /* calc differences of buffer counters. */
1022		- bufusage.shared_blks_hit =
1023		- pgBufferUsage.shared_blks_hit - bufusage_start.shared_blks_hit;
1024		- bufusage.shared_blks_read =
1025		- pgBufferUsage.shared_blks_read - bufusage_start.shared_blks_read;
1026		- bufusage.shared_blks_dirtied =
1027		- pgBufferUsage.shared_blks_dirtied - bufusage_start.shared_blks_dirtied;
1028		- bufusage.shared_blks_written =
1029		- pgBufferUsage.shared_blks_written - bufusage_start.shared_blks_written;
1030		- bufusage.local_blks_hit =
1031		- pgBufferUsage.local_blks_hit - bufusage_start.local_blks_hit;
1032		- bufusage.local_blks_read =
1033		- pgBufferUsage.local_blks_read - bufusage_start.local_blks_read;
1034		- bufusage.local_blks_dirtied =
1035		- pgBufferUsage.local_blks_dirtied - bufusage_start.local_blks_dirtied;
1036		- bufusage.local_blks_written =
1037		- pgBufferUsage.local_blks_written - bufusage_start.local_blks_written;
1038		- bufusage.temp_blks_read =
1039		- pgBufferUsage.temp_blks_read - bufusage_start.temp_blks_read;
1040		- bufusage.temp_blks_written =
1041		- pgBufferUsage.temp_blks_written - bufusage_start.temp_blks_written;
1042		- bufusage.blk_read_time = pgBufferUsage.blk_read_time;
1043		- INSTR_TIME_SUBTRACT(bufusage.blk_read_time, bufusage_start.blk_read_time);
1044		- bufusage.blk_write_time = pgBufferUsage.blk_write_time;
1045		- INSTR_TIME_SUBTRACT(bufusage.blk_write_time, bufusage_start.blk_write_time);
1046		-
1047		- /* For utility statements, we just hash the query string directly */
1048		- queryId = pgss_hash_string(queryString);
1049		-
1050		- pgss_store(queryString,
1051		- queryId,
1052		- INSTR_TIME_GET_MILLISEC(duration),
1053		- rows,
1054		- &bufusage,
1055		- NULL);
1056		- }
1057		- else
1058		- {
1059		- if (prev_ProcessUtility)
1060		- prev_ProcessUtility(parsetree, queryString,
1061		- context, params,
1062		- dest, completionTag);
1063		- else
1064		- standard_ProcessUtility(parsetree, queryString,
1065		- context, params,
1066		- dest, completionTag);
1067		- }
1068		-}
1069		-
1070		-/*
1071		- * Calculate hash value for a key
1072		- */
1073		-static uint32
1074		-pgss_hash_fn(const void *key, Size keysize)
1075		-{
1076		- const pgssHashKey k = (const pgssHashKey ) key;
1077		-
1078		- return hash_uint32((uint32) k->userid) ^
1079		- hash_uint32((uint32) k->dbid) ^
1080		- hash_uint32((uint32) k->queryid);
1081		-}
1082		-
1083		-/*
1084		- * Compare two keys - zero means match
1085		- */
1086		-static int
1087		-pgss_match_fn(const void key1, const void key2, Size keysize)
1088		-{
1089		- const pgssHashKey k1 = (const pgssHashKey ) key1;
1090		- const pgssHashKey k2 = (const pgssHashKey ) key2;
1091		-
1092		- if (k1->userid == k2->userid &&
1093		- k1->dbid == k2->dbid &&
1094		- k1->queryid == k2->queryid)
1095		- return 0;
1096		- else
1097		- return 1;
1098		-}
1099		-
1100		-/*
1101		- * Given an arbitrarily long query string, produce a hash for the purposes of
1102		- * identifying the query, without normalizing constants. Used when hashing
1103		- * utility statements.
1104		- */
1105		-static uint32
1106		-pgss_hash_string(const char *str)
1107		-{
1108		- return hash_any((const unsigned char *) str, strlen(str));
1109		-}
1110		-
1111		-/*
1112		- * Store some statistics for a statement.
1113		- *
1114		- * If jstate is not NULL then we're trying to create an entry for which
1115		- * we have no statistics as yet; we just want to record the normalized
1116		- * query string. total_time, rows, bufusage are ignored in this case.
1117		- */
1118		-static void
1119		-pgss_store(const char *query, uint32 queryId,
1120		- double total_time, uint64 rows,
1121		- const BufferUsage *bufusage,
1122		- pgssJumbleState *jstate)
1123		-{
1124		- pgssHashKey key;
1125		- pgssEntry *entry;
1126		- char *norm_query = NULL;
1127		- int encoding = GetDatabaseEncoding();
1128		- int query_len;
1129		-
1130		- Assert(query != NULL);
1131		-
1132		- /* Safety check... */
1133		- if (!pgss \|\| !pgss_hash)
1134		- return;
1135		-
1136		- query_len = strlen(query);
1137		-
1138		- /* Set up key for hashtable search */
1139		- key.userid = GetUserId();
1140		- key.dbid = MyDatabaseId;
1141		- key.queryid = queryId;
1142		-
1143		- /* Lookup the hash table entry with shared lock. */
1144		- LWLockAcquire(pgss->lock, LW_SHARED);
1145		-
1146		- entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1147		-
1148		- /* Create new entry, if not present */
1149		- if (!entry)
1150		- {
1151		- Size query_offset;
1152		- int gc_count;
1153		- bool stored;
1154		- bool do_gc;
1155		-
1156		- /*
1157		- * Create a new, normalized query string if caller asked. We don't
1158		- * need to hold the lock while doing this work. (Note: in any case,
1159		- * it's possible that someone else creates a duplicate hashtable entry
1160		- * in the interval where we don't hold the lock below. That case is
1161		- * handled by entry_alloc.)
1162		- */
1163		- if (jstate)
1164		- {
1165		- LWLockRelease(pgss->lock);
1166		- norm_query = generate_normalized_query(jstate, query,
1167		- &query_len,
1168		- encoding);
1169		- LWLockAcquire(pgss->lock, LW_SHARED);
1170		- }
1171		-
1172		- /* Append new query text to file with only shared lock held */
1173		- stored = qtext_store(norm_query ? norm_query : query, query_len,
1174		- &query_offset, &gc_count);
1175		-
1176		- /*
1177		- * Determine whether we need to garbage collect external query texts
1178		- * while the shared lock is still held. This micro-optimization
1179		- * avoids taking the time to decide this while holding exclusive lock.
1180		- */
1181		- do_gc = need_gc_qtexts();
1182		-
1183		- /* Need exclusive lock to make a new hashtable entry - promote */
1184		- LWLockRelease(pgss->lock);
1185		- LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1186		-
1187		- /*
1188		- * A garbage collection may have occurred while we weren't holding the
1189		- * lock. In the unlikely event that this happens, the query text we
1190		- * stored above will have been garbage collected, so write it again.
1191		- * This should be infrequent enough that doing it while holding
1192		- * exclusive lock isn't a performance problem.
1193		- */
1194		- if (!stored \|\| pgss->gc_count != gc_count)
1195		- stored = qtext_store(norm_query ? norm_query : query, query_len,
1196		- &query_offset, NULL);
1197		-
1198		- /* If we failed to write to the text file, give up */
1199		- if (!stored)
1200		- goto done;
1201		-
1202		- /* OK to create a new hashtable entry */
1203		- entry = entry_alloc(&key, query_offset, query_len, encoding,
1204		- jstate != NULL);
1205		-
1206		- /* If needed, perform garbage collection while exclusive lock held */
1207		- if (do_gc)
1208		- gc_qtexts();
1209		- }
1210		-
1211		- /* Increment the counts, except when jstate is not NULL */
1212		- if (!jstate)
1213		- {
1214		- /*
1215		- * Grab the spinlock while updating the counters (see comment about
1216		- * locking rules at the head of the file)
1217		- */
1218		- volatile pgssEntry e = (volatile pgssEntry ) entry;
1219		-
1220		- SpinLockAcquire(&e->mutex);
1221		-
1222		- /* "Unstick" entry if it was previously sticky */
1223		- if (e->counters.calls == 0)
1224		- e->counters.usage = USAGE_INIT;
1225		-
1226		- e->counters.calls += 1;
1227		- e->counters.total_time += total_time;
1228		- e->counters.rows += rows;
1229		- e->counters.shared_blks_hit += bufusage->shared_blks_hit;
1230		- e->counters.shared_blks_read += bufusage->shared_blks_read;
1231		- e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1232		- e->counters.shared_blks_written += bufusage->shared_blks_written;
1233		- e->counters.local_blks_hit += bufusage->local_blks_hit;
1234		- e->counters.local_blks_read += bufusage->local_blks_read;
1235		- e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1236		- e->counters.local_blks_written += bufusage->local_blks_written;
1237		- e->counters.temp_blks_read += bufusage->temp_blks_read;
1238		- e->counters.temp_blks_written += bufusage->temp_blks_written;
1239		- e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
1240		- e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
1241		- e->counters.usage += USAGE_EXEC(total_time);
1242		-
1243		- SpinLockRelease(&e->mutex);
1244		- }
1245		-
1246		-done:
1247		- LWLockRelease(pgss->lock);
1248		-
1249		- /* We postpone this clean-up until we're out of the lock */
1250		- if (norm_query)
1251		- pfree(norm_query);
1252		-}
1253		-
1254		-/*
1255		- * Reset all statement statistics.
1256		- */
1257		-Datum
1258		-pg_stat_statements_reset(PG_FUNCTION_ARGS)
1259		-{
1260		- if (!pgss \|\| !pgss_hash)
1261		- ereport(ERROR,
1262		- (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1263		- errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1264		- entry_reset();
1265		- PG_RETURN_VOID();
1266		-}
1267		-
1268		-/* Number of output arguments (columns) for various API versions */
1269		-#define PG_STAT_STATEMENTS_COLS_V1_0 14
1270		-#define PG_STAT_STATEMENTS_COLS_V1_1 18
1271		-#define PG_STAT_STATEMENTS_COLS_V1_2 19
1272		-#define PG_STAT_STATEMENTS_COLS 19 /* maximum of above */
1273		-
1274		-/*
1275		- * Retrieve statement statistics.
1276		- *
1277		- * The SQL API of this function has changed multiple times, and will likely
1278		- * do so again in future. To support the case where a newer version of this
1279		- * loadable module is being used with an old SQL declaration of the function,
1280		- * we continue to support the older API versions. For 1.2 and later, the
1281		- * expected API version is identified by embedding it in the C name of the
1282		- * function. Unfortunately we weren't bright enough to do that for 1.1.
1283		- */
1284		-Datum
1285		-pg_stat_statements_1_2(PG_FUNCTION_ARGS)
1286		-{
1287		- bool showtext = PG_GETARG_BOOL(0);
1288		-
1289		- pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
1290		-
1291		- return (Datum) 0;
1292		-}
1293		-
1294		-/*
1295		- * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1296		- * This can be removed someday, perhaps.
1297		- */
1298		-Datum
1299		-pg_stat_statements(PG_FUNCTION_ARGS)
1300		-{
1301		- /* If it's really API 1.1, we'll figure that out below */
1302		- pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
1303		-
1304		- return (Datum) 0;
1305		-}
1306		-
1307		-/* Common code for all versions of pg_stat_statements() */
1308		-static void
1309		-pg_stat_statements_internal(FunctionCallInfo fcinfo,
1310		- pgssVersion api_version,
1311		- bool showtext)
1312		-{
1313		- ReturnSetInfo rsinfo = (ReturnSetInfo ) fcinfo->resultinfo;
1314		- TupleDesc tupdesc;
1315		- Tuplestorestate *tupstore;
1316		- MemoryContext per_query_ctx;
1317		- MemoryContext oldcontext;
1318		- Oid userid = GetUserId();
1319		- bool is_superuser = superuser();
1320		- char *qbuffer = NULL;
1321		- Size qbuffer_size = 0;
1322		- Size extent = 0;
1323		- int gc_count = 0;
1324		- HASH_SEQ_STATUS hash_seq;
1325		- pgssEntry *entry;
1326		-
1327		- /* hash table must exist already */
1328		- if (!pgss \|\| !pgss_hash)
1329		- ereport(ERROR,
1330		- (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1331		- errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1332		-
1333		- /* check to see if caller supports us returning a tuplestore */
1334		- if (rsinfo == NULL \|\| !IsA(rsinfo, ReturnSetInfo))
1335		- ereport(ERROR,
1336		- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1337		- errmsg("set-valued function called in context that cannot accept a set")));
1338		- if (!(rsinfo->allowedModes & SFRM_Materialize))
1339		- ereport(ERROR,
1340		- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1341		- errmsg("materialize mode required, but it is not " \
1342		- "allowed in this context")));
1343		-
1344		- /* Switch into long-lived context to construct returned data structures */
1345		- per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1346		- oldcontext = MemoryContextSwitchTo(per_query_ctx);
1347		-
1348		- /* Build a tuple descriptor for our result type */
1349		- if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1350		- elog(ERROR, "return type must be a row type");
1351		-
1352		- /*
1353		- * Check we have the expected number of output arguments. Aside from
1354		- * being a good safety check, we need a kluge here to detect API version
1355		- * 1.1, which was wedged into the code in an ill-considered way.
1356		- */
1357		- switch (tupdesc->natts)
1358		- {
1359		- case PG_STAT_STATEMENTS_COLS_V1_0:
1360		- if (api_version != PGSS_V1_0)
1361		- elog(ERROR, "incorrect number of output arguments");
1362		- break;
1363		- case PG_STAT_STATEMENTS_COLS_V1_1:
1364		- /* pg_stat_statements() should have told us 1.0 */
1365		- if (api_version != PGSS_V1_0)
1366		- elog(ERROR, "incorrect number of output arguments");
1367		- api_version = PGSS_V1_1;
1368		- break;
1369		- case PG_STAT_STATEMENTS_COLS_V1_2:
1370		- if (api_version != PGSS_V1_2)
1371		- elog(ERROR, "incorrect number of output arguments");
1372		- break;
1373		- default:
1374		- elog(ERROR, "incorrect number of output arguments");
1375		- }
1376		-
1377		- tupstore = tuplestore_begin_heap(true, false, work_mem);
1378		- rsinfo->returnMode = SFRM_Materialize;
1379		- rsinfo->setResult = tupstore;
1380		- rsinfo->setDesc = tupdesc;
1381		-
1382		- MemoryContextSwitchTo(oldcontext);
1383		-
1384		- /*
1385		- * We'd like to load the query text file (if needed) while not holding any
1386		- * lock on pgss->lock. In the worst case we'll have to do this again
1387		- * after we have the lock, but it's unlikely enough to make this a win
1388		- * despite occasional duplicated work. We need to reload if anybody
1389		- * writes to the file (either a retail qtext_store(), or a garbage
1390		- * collection) between this point and where we've gotten shared lock. If
1391		- * a qtext_store is actually in progress when we look, we might as well
1392		- * skip the speculative load entirely.
1393		- */
1394		- if (showtext)
1395		- {
1396		- int n_writers;
1397		-
1398		- /* Take the mutex so we can examine variables */
1399		- {
1400		- volatile pgssSharedState s = (volatile pgssSharedState ) pgss;
1401		-
1402		- SpinLockAcquire(&s->mutex);
1403		- extent = s->extent;
1404		- n_writers = s->n_writers;
1405		- gc_count = s->gc_count;
1406		- SpinLockRelease(&s->mutex);
1407		- }
1408		-
1409		- /* No point in loading file now if there are active writers */
1410		- if (n_writers == 0)
1411		- qbuffer = qtext_load_file(&qbuffer_size);
1412		- }
1413		-
1414		- /*
1415		- * Get shared lock, load or reload the query text file if we must, and
1416		- * iterate over the hashtable entries.
1417		- *
1418		- * With a large hash table, we might be holding the lock rather longer
1419		- * than one could wish. However, this only blocks creation of new hash
1420		- * table entries, and the larger the hash table the less likely that is to
1421		- * be needed. So we can hope this is okay. Perhaps someday we'll decide
1422		- * we need to partition the hash table to limit the time spent holding any
1423		- * one lock.
1424		- */
1425		- LWLockAcquire(pgss->lock, LW_SHARED);
1426		-
1427		- if (showtext)
1428		- {
1429		- /*
1430		- * Here it is safe to examine extent and gc_count without taking the
1431		- * mutex. Note that although other processes might change
1432		- * pgss->extent just after we look at it, the strings they then write
1433		- * into the file cannot yet be referenced in the hashtable, so we
1434		- * don't care whether we see them or not.
1435		- *
1436		- * If qtext_load_file fails, we just press on; we'll return NULL for
1437		- * every query text.
1438		- */
1439		- if (qbuffer == NULL \|\|
1440		- pgss->extent != extent \|\|
1441		- pgss->gc_count != gc_count)
1442		- {
1443		- if (qbuffer)
1444		- free(qbuffer);
1445		- qbuffer = qtext_load_file(&qbuffer_size);
1446		- }
1447		- }
1448		-
1449		- hash_seq_init(&hash_seq, pgss_hash);
1450		- while ((entry = hash_seq_search(&hash_seq)) != NULL)
1451		- {
1452		- Datum values[PG_STAT_STATEMENTS_COLS];
1453		- bool nulls[PG_STAT_STATEMENTS_COLS];
1454		- int i = 0;
1455		- Counters tmp;
1456		- int64 queryid = entry->key.queryid;
1457		-
1458		- memset(values, 0, sizeof(values));
1459		- memset(nulls, 0, sizeof(nulls));
1460		-
1461		- values[i++] = ObjectIdGetDatum(entry->key.userid);
1462		- values[i++] = ObjectIdGetDatum(entry->key.dbid);
1463		-
1464		- if (is_superuser \|\| entry->key.userid == userid)
1465		- {
1466		- if (api_version >= PGSS_V1_2)
1467		- values[i++] = Int64GetDatumFast(queryid);
1468		-
1469		- if (showtext)
1470		- {
1471		- char *qstr = qtext_fetch(entry->query_offset,
1472		- entry->query_len,
1473		- qbuffer,
1474		- qbuffer_size);
1475		-
1476		- if (qstr)
1477		- {
1478		- char *enc;
1479		-
1480		- enc = pg_any_to_server(qstr,
1481		- entry->query_len,
1482		- entry->encoding);
1483		-
1484		- values[i++] = CStringGetTextDatum(enc);
1485		-
1486		- if (enc != qstr)
1487		- pfree(enc);
1488		- }
1489		- else
1490		- {
1491		- /* Just return a null if we fail to find the text */
1492		- nulls[i++] = true;
1493		- }
1494		- }
1495		- else
1496		- {
1497		- /* Query text not requested */
1498		- nulls[i++] = true;
1499		- }
1500		- }
1501		- else
1502		- {
1503		- /* Don't show queryid */
1504		- if (api_version >= PGSS_V1_2)
1505		- nulls[i++] = true;
1506		-
1507		- /*
1508		- * Don't show query text, but hint as to the reason for not doing
1509		- * so if it was requested
1510		- */
1511		- if (showtext)
1512		- values[i++] = CStringGetTextDatum("<insufficient privilege>");
1513		- else
1514		- nulls[i++] = true;
1515		- }
1516		-
1517		- /* copy counters to a local variable to keep locking time short */
1518		- {
1519		- volatile pgssEntry e = (volatile pgssEntry ) entry;
1520		-
1521		- SpinLockAcquire(&e->mutex);
1522		- tmp = e->counters;
1523		- SpinLockRelease(&e->mutex);
1524		- }
1525		-
1526		- /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1527		- if (tmp.calls == 0)
1528		- continue;
1529		-
1530		- values[i++] = Int64GetDatumFast(tmp.calls);
1531		- values[i++] = Float8GetDatumFast(tmp.total_time);
1532		- values[i++] = Int64GetDatumFast(tmp.rows);
1533		- values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1534		- values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1535		- if (api_version >= PGSS_V1_1)
1536		- values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1537		- values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1538		- values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1539		- values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1540		- if (api_version >= PGSS_V1_1)
1541		- values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1542		- values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1543		- values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1544		- values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1545		- if (api_version >= PGSS_V1_1)
1546		- {
1547		- values[i++] = Float8GetDatumFast(tmp.blk_read_time);
1548		- values[i++] = Float8GetDatumFast(tmp.blk_write_time);
1549		- }
1550		-
1551		- Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
1552		- api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
1553		- api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
1554		- -1 /* fail if you forget to update this assert */ ));
1555		-
1556		- tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1557		- }
1558		-
1559		- /* clean up and return the tuplestore */
1560		- LWLockRelease(pgss->lock);
1561		-
1562		- if (qbuffer)
1563		- free(qbuffer);
1564		-
1565		- tuplestore_donestoring(tupstore);
1566		-}
1567		-
1568		-/*
1569		- * Estimate shared memory space needed.
1570		- */
1571		-static Size
1572		-pgss_memsize(void)
1573		-{
1574		- Size size;
1575		-
1576		- size = MAXALIGN(sizeof(pgssSharedState));
1577		- size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
1578		-
1579		- return size;
1580		-}
1581		-
1582		-/*
1583		- * Allocate a new hashtable entry.
1584		- * caller must hold an exclusive lock on pgss->lock
1585		- *
1586		- * "query" need not be null-terminated; we rely on query_len instead
1587		- *
1588		- * If "sticky" is true, make the new entry artificially sticky so that it will
1589		- * probably still be there when the query finishes execution. We do this by
1590		- * giving it a median usage value rather than the normal value. (Strictly
1591		- * speaking, query strings are normalized on a best effort basis, though it
1592		- * would be difficult to demonstrate this even under artificial conditions.)
1593		- *
1594		- * Note: despite needing exclusive lock, it's not an error for the target
1595		- * entry to already exist. This is because pgss_store releases and
1596		- * reacquires lock after failing to find a match; so someone else could
1597		- * have made the entry while we waited to get exclusive lock.
1598		- */
1599		-static pgssEntry *
1600		-entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
1601		- bool sticky)
1602		-{
1603		- pgssEntry *entry;
1604		- bool found;
1605		-
1606		- /* Make space if needed */
1607		- while (hash_get_num_entries(pgss_hash) >= pgss_max)
1608		- entry_dealloc();
1609		-
1610		- /* Find or create an entry with desired hash code */
1611		- entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
1612		-
1613		- if (!found)
1614		- {
1615		- /* New entry, initialize it */
1616		-
1617		- /* reset the statistics */
1618		- memset(&entry->counters, 0, sizeof(Counters));
1619		- /* set the appropriate initial usage count */
1620		- entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
1621		- /* re-initialize the mutex each time ... we assume no one using it */
1622		- SpinLockInit(&entry->mutex);
1623		- /* ... and don't forget the query text metadata */
1624		- Assert(query_len >= 0);
1625		- entry->query_offset = query_offset;
1626		- entry->query_len = query_len;
1627		- entry->encoding = encoding;
1628		- }
1629		-
1630		- return entry;
1631		-}
1632		-
1633		-/*
1634		- * qsort comparator for sorting into increasing usage order
1635		- */
1636		-static int
1637		-entry_cmp(const void lhs, const void rhs)
1638		-{
1639		- double l_usage = ((pgssEntry const *) lhs)->counters.usage;
1640		- double r_usage = ((pgssEntry const *) rhs)->counters.usage;
1641		-
1642		- if (l_usage < r_usage)
1643		- return -1;
1644		- else if (l_usage > r_usage)
1645		- return +1;
1646		- else
1647		- return 0;
1648		-}
1649		-
1650		-/*
1651		- * Deallocate least used entries.
1652		- * Caller must hold an exclusive lock on pgss->lock.
1653		- */
1654		-static void
1655		-entry_dealloc(void)
1656		-{
1657		- HASH_SEQ_STATUS hash_seq;
1658		- pgssEntry **entries;
1659		- pgssEntry *entry;
1660		- int nvictims;
1661		- int i;
1662		- Size totlen = 0;
1663		-
1664		- /*
1665		- * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
1666		- * While we're scanning the table, apply the decay factor to the usage
1667		- * values.
1668		- */
1669		-
1670		- entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
1671		-
1672		- i = 0;
1673		- hash_seq_init(&hash_seq, pgss_hash);
1674		- while ((entry = hash_seq_search(&hash_seq)) != NULL)
1675		- {
1676		- entries[i++] = entry;
1677		- /* "Sticky" entries get a different usage decay rate. */
1678		- if (entry->counters.calls == 0)
1679		- entry->counters.usage *= STICKY_DECREASE_FACTOR;
1680		- else
1681		- entry->counters.usage *= USAGE_DECREASE_FACTOR;
1682		- /* Accumulate total size, too. */
1683		- totlen += entry->query_len + 1;
1684		- }
1685		-
1686		- qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
1687		-
1688		- if (i > 0)
1689		- {
1690		- /* Record the (approximate) median usage */
1691		- pgss->cur_median_usage = entries[i / 2]->counters.usage;
1692		- /* Record the mean query length */
1693		- pgss->mean_query_len = totlen / i;
1694		- }
1695		-
1696		- nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
1697		- nvictims = Min(nvictims, i);
1698		-
1699		- for (i = 0; i < nvictims; i++)
1700		- {
1701		- hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
1702		- }
1703		-
1704		- pfree(entries);
1705		-}
1706		-
1707		-/*
1708		- * Given a null-terminated string, allocate a new entry in the external query
1709		- * text file and store the string there.
1710		- *
1711		- * Although we could compute the string length via strlen(), callers already
1712		- * have it handy, so we require them to pass it too.
1713		- *
1714		- * If successful, returns true, and stores the new entry's offset in the file
1715		- * into query_offset. Also, if gc_count isn't NULL, gc_count is set to the
1716		- * number of garbage collections that have occurred so far.
1717		- *
1718		- * On failure, returns false.
1719		- *
1720		- * At least a shared lock on pgss->lock must be held by the caller, so as
1721		- * to prevent a concurrent garbage collection. Share-lock-holding callers
1722		- * should pass a gc_count pointer to obtain the number of garbage collections,
1723		- * so that they can recheck the count after obtaining exclusive lock to
1724		- * detect whether a garbage collection occurred (and removed this entry).
1725		- */
1726		-static bool
1727		-qtext_store(const char *query, int query_len,
1728		- Size query_offset, int gc_count)
1729		-{
1730		- Size off;
1731		- int fd;
1732		-
1733		- /*
1734		- * We use a spinlock to protect extent/n_writers/gc_count, so that
1735		- * multiple processes may execute this function concurrently.
1736		- */
1737		- {
1738		- volatile pgssSharedState s = (volatile pgssSharedState ) pgss;
1739		-
1740		- SpinLockAcquire(&s->mutex);
1741		- off = s->extent;
1742		- s->extent += query_len + 1;
1743		- s->n_writers++;
1744		- if (gc_count)
1745		- *gc_count = s->gc_count;
1746		- SpinLockRelease(&s->mutex);
1747		- }
1748		-
1749		- *query_offset = off;
1750		-
1751		- /* Now write the data into the successfully-reserved part of the file */
1752		- fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR \| O_CREAT \| PG_BINARY,
1753		- S_IRUSR \| S_IWUSR);
1754		- if (fd < 0)
1755		- goto error;
1756		-
1757		- if (lseek(fd, off, SEEK_SET) != off)
1758		- goto error;
1759		-
1760		- if (write(fd, query, query_len + 1) != query_len + 1)
1761		- goto error;
1762		-
1763		- CloseTransientFile(fd);
1764		-
1765		- /* Mark our write complete */
1766		- {
1767		- volatile pgssSharedState s = (volatile pgssSharedState ) pgss;
1768		-
1769		- SpinLockAcquire(&s->mutex);
1770		- s->n_writers--;
1771		- SpinLockRelease(&s->mutex);
1772		- }
1773		-
1774		- return true;
1775		-
1776		-error:
1777		- ereport(LOG,
1778		- (errcode_for_file_access(),
1779		- errmsg("could not write pg_stat_statement file \"%s\": %m",
1780		- PGSS_TEXT_FILE)));
1781		-
1782		- if (fd >= 0)
1783		- CloseTransientFile(fd);
1784		-
1785		- /* Mark our write complete */
1786		- {
1787		- volatile pgssSharedState s = (volatile pgssSharedState ) pgss;
1788		-
1789		- SpinLockAcquire(&s->mutex);
1790		- s->n_writers--;
1791		- SpinLockRelease(&s->mutex);
1792		- }
1793		-
1794		- return false;
1795		-}
1796		-
1797		-/*
1798		- * Read the external query text file into a malloc'd buffer.
1799		- *
1800		- * Returns NULL (without throwing an error) if unable to read, eg
1801		- * file not there or insufficient memory.
1802		- *
1803		- * On success, the buffer size is also returned into *buffer_size.
1804		- *
1805		- * This can be called without any lock on pgss->lock, but in that case
1806		- * the caller is responsible for verifying that the result is sane.
1807		- */
1808		-static char *
1809		-qtext_load_file(Size *buffer_size)
1810		-{
1811		- char *buf;
1812		- int fd;
1813		- struct stat stat;
1814		-
1815		- fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY \| PG_BINARY, 0);
1816		- if (fd < 0)
1817		- {
1818		- if (errno != ENOENT)
1819		- ereport(LOG,
1820		- (errcode_for_file_access(),
1821		- errmsg("could not read pg_stat_statement file \"%s\": %m",
1822		- PGSS_TEXT_FILE)));
1823		- return NULL;
1824		- }
1825		-
1826		- /* Get file length */
1827		- if (fstat(fd, &stat))
1828		- {
1829		- ereport(LOG,
1830		- (errcode_for_file_access(),
1831		- errmsg("could not stat pg_stat_statement file \"%s\": %m",
1832		- PGSS_TEXT_FILE)));
1833		- CloseTransientFile(fd);
1834		- return NULL;
1835		- }
1836		-
1837		- /* Allocate buffer; beware that off_t might be wider than size_t */
1838		- if (stat.st_size <= MaxAllocSize)
1839		- buf = (char *) malloc(stat.st_size);
1840		- else
1841		- buf = NULL;
1842		- if (buf == NULL)
1843		- {
1844		- ereport(LOG,
1845		- (errcode(ERRCODE_OUT_OF_MEMORY),
1846		- errmsg("out of memory")));
1847		- CloseTransientFile(fd);
1848		- return NULL;
1849		- }
1850		-
1851		- /*
1852		- * OK, slurp in the file. If we get a short read and errno doesn't get
1853		- * set, the reason is probably that garbage collection truncated the file
1854		- * since we did the fstat(), so we don't log a complaint --- but we don't
1855		- * return the data, either, since it's most likely corrupt due to
1856		- * concurrent writes from garbage collection.
1857		- */
1858		- errno = 0;
1859		- if (read(fd, buf, stat.st_size) != stat.st_size)
1860		- {
1861		- if (errno)
1862		- ereport(LOG,
1863		- (errcode_for_file_access(),
1864		- errmsg("could not read pg_stat_statement file \"%s\": %m",
1865		- PGSS_TEXT_FILE)));
1866		- free(buf);
1867		- CloseTransientFile(fd);
1868		- return NULL;
1869		- }
1870		-
1871		- CloseTransientFile(fd);
1872		-
1873		- *buffer_size = stat.st_size;
1874		- return buf;
1875		-}
1876		-
1877		-/*
1878		- * Locate a query text in the file image previously read by qtext_load_file().
1879		- *
1880		- * We validate the given offset/length, and return NULL if bogus. Otherwise,
1881		- * the result points to a null-terminated string within the buffer.
1882		- */
1883		-static char *
1884		-qtext_fetch(Size query_offset, int query_len,
1885		- char *buffer, Size buffer_size)
1886		-{
1887		- /* File read failed? */
1888		- if (buffer == NULL)
1889		- return NULL;
1890		- /* Bogus offset/length? */
1891		- if (query_len < 0 \|\|
1892		- query_offset + query_len >= buffer_size)
1893		- return NULL;
1894		- /* As a further sanity check, make sure there's a trailing null */
1895		- if (buffer[query_offset + query_len] != '\0')
1896		- return NULL;
1897		- /* Looks OK */
1898		- return buffer + query_offset;
1899		-}
1900		-
1901		-/*
1902		- * Do we need to garbage-collect the external query text file?
1903		- *
1904		- * Caller should hold at least a shared lock on pgss->lock.
1905		- */
1906		-static bool
1907		-need_gc_qtexts(void)
1908		-{
1909		- Size extent;
1910		-
1911		- /* Read shared extent pointer */
1912		- {
1913		- volatile pgssSharedState s = (volatile pgssSharedState ) pgss;
1914		-
1915		- SpinLockAcquire(&s->mutex);
1916		- extent = s->extent;
1917		- SpinLockRelease(&s->mutex);
1918		- }
1919		-
1920		- /* Don't proceed if file does not exceed 512 bytes per possible entry */
1921		- if (extent < 512 * pgss_max)
1922		- return false;
1923		-
1924		- /*
1925		- * Don't proceed if file is less than about 50% bloat. Nothing can or
1926		- * should be done in the event of unusually large query texts accounting
1927		- * for file's large size. We go to the trouble of maintaining the mean
1928		- * query length in order to prevent garbage collection from thrashing
1929		- * uselessly.
1930		- */
1931		- if (extent < pgss->mean_query_len * pgss_max * 2)
1932		- return false;
1933		-
1934		- return true;
1935		-}
1936		-
1937		-/*
1938		- * Garbage-collect orphaned query texts in external file.
1939		- *
1940		- * This won't be called often in the typical case, since it's likely that
1941		- * there won't be too much churn, and besides, a similar compaction process
1942		- * occurs when serializing to disk at shutdown or as part of resetting.
1943		- * Despite this, it seems prudent to plan for the edge case where the file
1944		- * becomes unreasonably large, with no other method of compaction likely to
1945		- * occur in the foreseeable future.
1946		- *
1947		- * The caller must hold an exclusive lock on pgss->lock.
1948		- */
1949		-static void
1950		-gc_qtexts(void)
1951		-{
1952		- char *qbuffer;
1953		- Size qbuffer_size;
1954		- FILE *qfile;
1955		- HASH_SEQ_STATUS hash_seq;
1956		- pgssEntry *entry;
1957		- Size extent;
1958		- int nentries;
1959		-
1960		- /*
1961		- * When called from pgss_store, some other session might have proceeded
1962		- * with garbage collection in the no-lock-held interim of lock strength
1963		- * escalation. Check once more that this is actually necessary.
1964		- */
1965		- if (!need_gc_qtexts())
1966		- return;
1967		-
1968		- /*
1969		- * Load the old texts file. If we fail (out of memory, for instance) just
1970		- * skip the garbage collection.
1971		- */
1972		- qbuffer = qtext_load_file(&qbuffer_size);
1973		- if (qbuffer == NULL)
1974		- return;
1975		-
1976		- /*
1977		- * We overwrite the query texts file in place, so as to reduce the risk of
1978		- * an out-of-disk-space failure. Since the file is guaranteed not to get
1979		- * larger, this should always work on traditional filesystems; though we
1980		- * could still lose on copy-on-write filesystems.
1981		- */
1982		- qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
1983		- if (qfile == NULL)
1984		- {
1985		- ereport(LOG,
1986		- (errcode_for_file_access(),
1987		- errmsg("could not write pg_stat_statement file \"%s\": %m",
1988		- PGSS_TEXT_FILE)));
1989		- goto gc_fail;
1990		- }
1991		-
1992		- extent = 0;
1993		- nentries = 0;
1994		-
1995		- hash_seq_init(&hash_seq, pgss_hash);
1996		- while ((entry = hash_seq_search(&hash_seq)) != NULL)
1997		- {
1998		- int query_len = entry->query_len;
1999		- char *qry = qtext_fetch(entry->query_offset,
2000		- query_len,
2001		- qbuffer,
2002		- qbuffer_size);
2003		-
2004		- if (qry == NULL)
2005		- {
2006		- /* Trouble ... drop the text */
2007		- entry->query_offset = 0;
2008		- entry->query_len = -1;
2009		- continue;
2010		- }
2011		-
2012		- if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2013		- {
2014		- ereport(LOG,
2015		- (errcode_for_file_access(),
2016		- errmsg("could not write pg_stat_statement file \"%s\": %m",
2017		- PGSS_TEXT_FILE)));
2018		- hash_seq_term(&hash_seq);
2019		- goto gc_fail;
2020		- }
2021		-
2022		- entry->query_offset = extent;
2023		- extent += query_len + 1;
2024		- nentries++;
2025		- }
2026		-
2027		- /*
2028		- * Truncate away any now-unused space. If this fails for some odd reason,
2029		- * we log it, but there's no need to fail.
2030		- */
2031		- if (ftruncate(fileno(qfile), extent) != 0)
2032		- ereport(LOG,
2033		- (errcode_for_file_access(),
2034		- errmsg("could not truncate pg_stat_statement file \"%s\": %m",
2035		- PGSS_TEXT_FILE)));
2036		-
2037		- if (FreeFile(qfile))
2038		- {
2039		- ereport(LOG,
2040		- (errcode_for_file_access(),
2041		- errmsg("could not write pg_stat_statement file \"%s\": %m",
2042		- PGSS_TEXT_FILE)));
2043		- qfile = NULL;
2044		- goto gc_fail;
2045		- }
2046		-
2047		- elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2048		- pgss->extent, extent);
2049		-
2050		- /* Reset the shared extent pointer */
2051		- pgss->extent = extent;
2052		-
2053		- /*
2054		- * Also update the mean query length, to be sure that need_gc_qtexts()
2055		- * won't still think we have a problem.
2056		- */
2057		- if (nentries > 0)
2058		- pgss->mean_query_len = extent / nentries;
2059		- else
2060		- pgss->mean_query_len = ASSUMED_LENGTH_INIT;
2061		-
2062		- free(qbuffer);
2063		-
2064		- /*
2065		- * OK, count a garbage collection cycle. (Note: even though we have
2066		- * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2067		- * other processes may examine gc_count while holding only the mutex.
2068		- * Also, we have to advance the count after we've rewritten the file,
2069		- * else other processes might not realize they read a stale file.)
2070		- */
2071		- record_gc_qtexts();
2072		-
2073		- return;
2074		-
2075		-gc_fail:
2076		- /* clean up resources */
2077		- if (qfile)
2078		- FreeFile(qfile);
2079		- if (qbuffer)
2080		- free(qbuffer);
2081		-
2082		- /*
2083		- * Since the contents of the external file are now uncertain, mark all
2084		- * hashtable entries as having invalid texts.
2085		- */
2086		- hash_seq_init(&hash_seq, pgss_hash);
2087		- while ((entry = hash_seq_search(&hash_seq)) != NULL)
2088		- {
2089		- entry->query_offset = 0;
2090		- entry->query_len = -1;
2091		- }
2092		-
2093		- /* Seems like a good idea to bump the GC count even though we failed */
2094		- record_gc_qtexts();
2095		-}
2096		-
2097		-/*
2098		- * Release all entries.
	9	+ *-------------------------------------------------------------------------
2099	10	*/
2100		-static void
2101		-entry_reset(void)
2102		-{
2103		- HASH_SEQ_STATUS hash_seq;
2104		- pgssEntry *entry;
2105		- FILE *qfile;
2106		-
2107		- LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
2108		-
2109		- hash_seq_init(&hash_seq, pgss_hash);
2110		- while ((entry = hash_seq_search(&hash_seq)) != NULL)
2111		- {
2112		- hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
2113		- }
2114		-
2115		- /*
2116		- * Write new empty query file, perhaps even creating a new one to recover
2117		- * if the file was missing.
2118		- */
2119		- qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
2120		- if (qfile == NULL)
2121		- {
2122		- ereport(LOG,
2123		- (errcode_for_file_access(),
2124		- errmsg("could not create pg_stat_statement file \"%s\": %m",
2125		- PGSS_TEXT_FILE)));
2126		- goto done;
2127		- }
2128		-
2129		- /* If ftruncate fails, log it, but it's not a fatal problem */
2130		- if (ftruncate(fileno(qfile), 0) != 0)
2131		- ereport(LOG,
2132		- (errcode_for_file_access(),
2133		- errmsg("could not truncate pg_stat_statement file \"%s\": %m",
2134		- PGSS_TEXT_FILE)));
	11	+#include "postgres.h"
2135	12
2136		- FreeFile(qfile);
	13	+#include <sys/stat.h>
2137	14
2138		-done:
2139		- pgss->extent = 0;
2140		- /* This counts as a query text garbage collection for our purposes */
2141		- record_gc_qtexts();
	15	+#include "access/hash.h"
	16	+#include "parser/scanner.h"
2142	17
2143		- LWLockRelease(pgss->lock);
2144		-}
2145		-#endif
	18	+static void AppendJumble(pgssJumbleState *jstate,
	19	+ const unsigned char *item, Size size);
	20	+static void JumbleQuery(pgssJumbleState jstate, Query query);
	21	+static void JumbleRangeTable(pgssJumbleState jstate, List rtable);
	22	+static void JumbleExpr(pgssJumbleState jstate, Node node);
	23	+static void RecordConstLocation(pgssJumbleState *jstate, int location);
	24	+static void fill_in_constant_lengths(pgssJumbleState jstate, const char query);
	25	+static int comp_location(const void a, const void b);
2146	26
2147	27	/*
2148	28	* AppendJumble: Append a value that is substantive in a given query to

		@@ -2209,8 +89,10 @@ JumbleQuery(pgssJumbleState jstate, Query query)
2209	89	JumbleRangeTable(jstate, query->rtable);
2210	90	JumbleExpr(jstate, (Node *) query->jointree);
2211	91	JumbleExpr(jstate, (Node *) query->targetList);
	92	+ JumbleExpr(jstate, (Node *) query->onConflict);
2212	93	JumbleExpr(jstate, (Node *) query->returningList);
2213	94	JumbleExpr(jstate, (Node *) query->groupClause);
	95	+ JumbleExpr(jstate, (Node *) query->groupingSets);
2214	96	JumbleExpr(jstate, query->havingQual);
2215	97	JumbleExpr(jstate, (Node *) query->windowClause);
2216	98	JumbleExpr(jstate, (Node *) query->distinctClause);

		@@ -2239,6 +121,7 @@ JumbleRangeTable(pgssJumbleState jstate, List rtable)
2239	121	{
2240	122	case RTE_RELATION:
2241	123	APP_JUMB(rte->relid);
	124	+ JumbleExpr(jstate, (Node *) rte->tablesample);
2242	125	break;
2243	126	case RTE_SUBQUERY:
2244	127	JumbleQuery(jstate, rte->subquery);

		@@ -2341,6 +224,13 @@ JumbleExpr(pgssJumbleState jstate, Node node)
2341	224	JumbleExpr(jstate, (Node *) expr->aggfilter);
2342	225	}
2343	226	break;
	227	+ case T_GroupingFunc:
	228	+ {
	229	+ GroupingFunc grpnode = (GroupingFunc ) node;
	230	+
	231	+ JumbleExpr(jstate, (Node *) grpnode->refs);
	232	+ }
	233	+ break;
2344	234	case T_WindowFunc:
2345	235	{
2346	236	WindowFunc expr = (WindowFunc ) node;

		@@ -2576,6 +466,15 @@ JumbleExpr(pgssJumbleState jstate, Node node)
2576	466	APP_JUMB(ce->cursor_param);
2577	467	}
2578	468	break;
	469	+ case T_InferenceElem:
	470	+ {
	471	+ InferenceElem ie = (InferenceElem ) node;
	472	+
	473	+ APP_JUMB(ie->infercollid);
	474	+ APP_JUMB(ie->inferopclass);
	475	+ JumbleExpr(jstate, ie->expr);
	476	+ }
	477	+ break;
2579	478	case T_TargetEntry:
2580	479	{
2581	480	TargetEntry tle = (TargetEntry ) node;

		@@ -2612,12 +511,32 @@ JumbleExpr(pgssJumbleState jstate, Node node)
2612	511	JumbleExpr(jstate, from->quals);
2613	512	}
2614	513	break;
	514	+ case T_OnConflictExpr:
	515	+ {
	516	+ OnConflictExpr conf = (OnConflictExpr ) node;
	517	+
	518	+ APP_JUMB(conf->action);
	519	+ JumbleExpr(jstate, (Node *) conf->arbiterElems);
	520	+ JumbleExpr(jstate, conf->arbiterWhere);
	521	+ JumbleExpr(jstate, (Node *) conf->onConflictSet);
	522	+ JumbleExpr(jstate, conf->onConflictWhere);
	523	+ APP_JUMB(conf->constraint);
	524	+ APP_JUMB(conf->exclRelIndex);
	525	+ JumbleExpr(jstate, (Node *) conf->exclRelTlist);
	526	+ }
	527	+ break;
2615	528	case T_List:
2616	529	foreach(temp, (List *) node)
2617	530	{
2618	531	JumbleExpr(jstate, (Node *) lfirst(temp));
2619	532	}
2620	533	break;
	534	+ case T_IntList:
	535	+ foreach(temp, (List *) node)
	536	+ {
	537	+ APP_JUMB(lfirst_int(temp));
	538	+ }
	539	+ break;
2621	540	case T_SortGroupClause:
2622	541	{
2623	542	SortGroupClause sgc = (SortGroupClause ) node;

		@@ -2628,6 +547,13 @@ JumbleExpr(pgssJumbleState jstate, Node node)
2628	547	APP_JUMB(sgc->nulls_first);
2629	548	}
2630	549	break;
	550	+ case T_GroupingSet:
	551	+ {
	552	+ GroupingSet gsnode = (GroupingSet ) node;
	553	+
	554	+ JumbleExpr(jstate, (Node *) gsnode->content);
	555	+ }
	556	+ break;
2631	557	case T_WindowClause:
2632	558	{
2633	559	WindowClause wc = (WindowClause ) node;

		@@ -2666,6 +592,15 @@ JumbleExpr(pgssJumbleState jstate, Node node)
2666	592	JumbleExpr(jstate, rtfunc->funcexpr);
2667	593	}
2668	594	break;
	595	+ case T_TableSampleClause:
	596	+ {
	597	+ TableSampleClause tsc = (TableSampleClause ) node;
	598	+
	599	+ APP_JUMB(tsc->tsmhandler);
	600	+ JumbleExpr(jstate, (Node *) tsc->args);
	601	+ JumbleExpr(jstate, (Node *) tsc->repeatable);
	602	+ }
	603	+ break;
2669	604	default:
2670	605	/* Only a warning, since we can stumble along anyway */
2671	606	elog(WARNING, "unrecognized node type: %d",

		@@ -2827,6 +762,9 @@ fill_in_constant_lengths(pgssJumbleState jstate, const char query)
2827	762	ScanKeywords,
2828	763	NumScanKeywords);
2829	764
	765	+ /* we don't want to re-emit any escape string warnings */
	766	+ yyextra.escape_string_warning = false;
	767	+
2830	768	/* Search for each constant, in sequence */
2831	769	for (i = 0; i < jstate->clocations_count; i++)
2832	770	{

--- a/sql/pg_hint_plan.sql

+++ b/sql/pg_hint_plan.sql

		@@ -338,7 +338,7 @@ EXPLAIN (COSTS false) SELECT * FROM t1 FULL OUTER JOIN t2 ON (t1.id = t2.id);
338	338	/+NestLoop(t1 t2)/
339	339	EXPLAIN (COSTS false) SELECT * FROM t1 FULL OUTER JOIN t2 ON (t1.id = t2.id);
340	340
341		--- inherite table test
	341	+-- inheritance tables test
342	342	SET constraint_exclusion TO off;
343	343	EXPLAIN (COSTS false) SELECT * FROM p1 WHERE id >= 50 AND id <= 51 AND p1.ctid = '(1,1)';
344	344	SET constraint_exclusion TO on;

pg_hint_plan Fork