ultrapossum/module/failover ... (Ultrapossum-cvs 608) - UltraPossum

===================================================================
RCS file: ultrapossum/module/failover/11failover,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5

--- ultrapossum/module/failover/11failover	2004/09/17 02:56:22	1.4
+++ ultrapossum/module/failover/11failover	2004/09/28 14:22:53	1.5
@@ -28,6 +28,7 @@
 # Failover configuration for slurpd
 ULTRAPOSSUM_RECOVERY_CONFIN=$(ultrapossum_getconf ULTRAPOSSUM_RECOVERY_CONFIN $TEMPLATEDIR/recovery.conf.in)
 ULTRAPOSSUM_RECOVERY_CONF=$(ultrapossum_getconf ULTRAPOSSUM_RECOVERY_CONF $CONFDIR/recovery.conf)
+FAILOVER_RECOVERY_PIDFILE=$(ultrapossum_getconf FAILOVER_RECOVERY_PIDFILE "$localstatedir/run/ultrapossum.recovery.pid")
 
 # Mon configuration
 ULTRAPOSSUM_MON_CFIN=$(ultrapossum_getconf ULTRAPOSSUM_MON_CFIN $TEMPLATEDIR/ultrapossum.mon.cf.in)
@@ -86,6 +87,7 @@
 
 if test -f "$HACF"; then
   HEARTBEAT_NICE_FAILBACK=`grep ^nice_failback $HACF | awk -F' ' '{print $2;}'`
+  HEARTBEAT_AUTO_FAILBACK=`grep ^auto_failback $HACF | awk -F' ' '{print $2;}'`
 fi
 
 if test "x$HOST" = "x$BACKUP"; then
@@ -97,19 +99,7 @@
 if test "x$ULTRAPOSSUM_MODULE_FAILOVER" = "xinstalled"; then
 
 wait_master_service() {
-  (for i in 1 2 2 3 3 3 5 5 5 5 10 10 10 10 10 20 20 20
-    do
-      if test "`ldapmaster_status`" = "running"; then
-        exit 0
-      fi
-    if test "`$MODULEDIR/server/pid status $HAPIDFILE`" = "stopped"; then
-      exit 1
-    fi
-    sleep $i
-    echo -n "."
-    done
-    echo "timeout" 1>&2
-    exit 1) || exit 1
+  $MODULEDIR/server/pid wait $SLAPD_PIDFILE
   echo -n " `basename $SLAPD`"
   if test "x$SLURPDSLAVES" != "x"; then
     $MODULEDIR/server/pid wait $SLURPD_PIDFILE
@@ -119,29 +109,24 @@
   echo -n " mon"
 }
 
-wait_nicefailback_service()
+wait_backup_service()
 {
+  # handle initdead case
   (for i in 1 2 2 3 3 3 5 5 5 5 10 10 10 10 10 20 20 20
-    do
-      if test "`$MODULEDIR/server/startup status`" = "running"; then
-        exit 0
-      fi
-    if test "`$MODULEDIR/server/pid status $HAPIDFILE`" = "stopped"; then
-      exit 1
+   do
+    # started as master service
+    if test "`ldapmaster_status`" = "running"; then
+      wait_master_service
+      exit 0
+    # started as slave service
+    elif test "`ldapslave_status`" = "running"; then
+      exit 0
     fi
     sleep $i
     echo -n "."
-    done
-    echo "timeout" 1>&2
-    exit 1) || exit 1
-  if test "`ldapmaster_status`" = "running"; then
-    wait_master_service
-  elif include "$BACKUP" "$SYNCBACKUPS"; then
-    echo -n " `basename $SLAPD`"
-  else   
-    echo -n " "
-    ldapslave_start
-  fi
+   done
+   echo "timeout" 1>&2
+   exit 1 ) || exit 1
 }
 
 ldapmaster_service_start()
@@ -149,16 +134,21 @@
   if test "x$BACKUP" = "x" ; then
     ldapmaster_start
   else
-    if include "$BACKUP" "$SYNCBACKUPS"; then
+    if test "x`$MODULEDIR/server/startup slave_status`" = "xstopped"; then
       ldapslave_start
       echo -n " "
     fi
     /etc/init.d/heartbeat start | grep ERROR 1>&2 && exit 1
+    $MODULEDIR/server/pid wait $HAPIDFILE
     echo -n "heartbeat"
-    if test "x$HOST" = "x$MASTER" && test "x$HEARTBEAT_NICE_FAILBACK" = "x"; then
-      wait_master_service
+    if test "x$HOST" = "x$MASTER"; then
+      if test "x$HEARTBEAT_NICE_FAILBACK" = "x" || test "x$HEARTBEAT_AUTO_FAILBACK" = "xon"; then
+        wait_master_service
+      else
+        wait_backup_service
+      fi
     else
-      wait_nicefailback_service
+      wait_backup_service
     fi
   fi
 }
===================================================================
RCS file: ultrapossum/module/failover/Makefile.am,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- ultrapossum/module/failover/Makefile.am	2004/06/24 08:42:59	1.4
+++ ultrapossum/module/failover/Makefile.am	2004/09/28 14:22:53	1.5
@@ -6,7 +6,6 @@
 
 module_DATA = 11failover
 update_SCRIPTS = update-failover
-
 modulesysconf_DATA = failover.cf
 
 modulelocal_SCRIPTS = \
===================================================================
RCS file: ultrapossum/module/failover/ha.cf.in,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- ultrapossum/module/failover/ha.cf.in	2004/03/13 14:25:54	1.1.1.1
+++ ultrapossum/module/failover/ha.cf.in	2004/09/28 14:22:53	1.2
@@ -1,7 +1,17 @@
 #
 #	There are lots of options in this file.  All you have to have is a set
-#	of nodes listedJ {"node ...}
-#	and one of {serial, udp, or mcast}
+#	of nodes listed {"node ...} one of {serial, bcast, mcast, or ucast},
+#	and a value for "auto_failback".
+#
+#	ATTENTION: As the configuration file is read line by line,
+#		   THE ORDER OF DIRECTIVE MATTERS!
+#
+#	In particular, make sure that the udpport, serial baud rate
+#	etc. are set before the heartbeat media are defined!
+#	debug and log file directives go into effect when they
+#	are encountered.
+#
+#	All will be fine if you keep them ordered as in this example.
 #
 #
 #       Note on logging:
@@ -14,7 +24,7 @@
 #       defined then defaults will be used for debugfile and logfile as
 #       required and messages will be sent there.
 #
-#	File to wirte debug messages to
+#	File to write debug messages to
 #debugfile /var/log/ha-debug
 #
 #
@@ -25,16 +35,34 @@
 #
 #	Facility to use for syslog()/logger 
 #
-#logfacility	local0
+logfacility	local0
+#
 #
+#	A note on specifying "how long" times below...
 #
-#	keepalive: how many seconds between heartbeats
+#	The default time unit is seconds
+#		10 means ten seconds
+#
+#	You can also specify them in milliseconds
+#		1500ms means 1.5 seconds
+#
+#
+#	keepalive: how long between heartbeats?
 #
 #keepalive 2
 #
-#	deadtime: seconds-to-declare-host-dead
+#	deadtime: how long-to-declare-host-dead?
+#
+#		If you set this too low you will get the problematic
+#		split-brain (or cluster partition) problem.
+#		See the FAQ for how to use warntime to tune deadtime.
 #
-#deadtime 10
+#deadtime 30
+#
+#	warntime: how long before issuing "late heartbeat" warning?
+#	See the FAQ for how to use warntime to tune deadtime.
+#
+#warntime 10
 #
 #
 #	Very first dead time (initdead)
@@ -46,24 +74,27 @@
 #
 #initdead 120
 #
-#	hopfudge maximum hop count minus number of nodes in config
-#hopfudge 1
-#	
-#	serial	serialportname ...
-#serial	/dev/ttyS0
 #
+#	What UDP port to use for bcast/ucast communication?
+#
+#udpport	694
 #
 #	Baud rate for serial ports...
 #
 #baud	19200
+#	
+#	serial	serialportname ...
+#serial	/dev/ttyS0	# Linux
+#serial	/dev/cuaa0	# FreeBSD
+#serial	/dev/cua/a	# Solaris
 #
-#	What UDP port to use for communication?
-#
-#udpport	694
 #
-#	What interfaces to heartbeat over?
+#	What interfaces to broadcast heartbeats over?
 #
-#udp	eth0
+#bcast	eth0		# Linux
+#bcast	eth1 eth2	# Linux
+#bcast	le0		# Solaris
+#bcast	le1 le2		# Solaris
 #
 #	Set up a multicast heartbeat medium
 #	mcast [dev] [mcast group] [port] [ttl] [loop]
@@ -71,23 +102,65 @@
 #	[dev]		device to send/rcv heartbeats on
 #	[mcast group]	multicast group to join (class D multicast address
 #			224.0.0.0 - 239.255.255.255)
-#	[port]		udp port to sendto/rcvfrom (no real reason to differ
-#			from the port used for broadcast heartbeats)
+#	[port]		udp port to sendto/rcvfrom (set this value to the
+#			same value as "udpport" above)
 #	[ttl]		the ttl value for outbound heartbeats.  this effects
 #			how far the multicast packet will propagate.  (0-255)
+#			Must be greater than zero.
 #	[loop]		toggles loopback for outbound multicast heartbeats.
 #			if enabled, an outbound packet will be looped back and
 #			received by the interface it was sent on. (0 or 1)
+#			Set this value to zero.
 #		
 #
-#mcast eth0 225.0.0.1 694 1 1
+#mcast eth0 225.0.0.1 694 1 0
+#
+#	Set up a unicast / udp heartbeat medium
+#	ucast [dev] [peer-ip-addr]
+#
+#	[dev]		device to send/rcv heartbeats on
+#	[peer-ip-addr]	IP address of peer to send packets to
+#
+#ucast eth0 192.168.1.2
+#
+#
+#	About boolean values...
+#
+#	Any of the following case-insensitive values will work for true:
+#		true, on, yes, y, 1
+#	Any of the following case-insensitive values will work for false:
+#		false, off, no, n, 0
 #
-#	Watchdog is the watchdog timer.  If our own heart doesn't beat for
-#	a minute, then our machine will reboot.
 #
-#watchdog /dev/watchdog
 #
-#       "Legacy" STONITH support
+#	auto_failback:  determines whether a resource will
+#	automatically fail back to its "primary" node, or remain
+#	on whatever node is serving it until that node fails, or
+#	an administrator intervenes.
+#
+#	The possible values for auto_failback are:
+#		on	- enable automatic failbacks
+#		off	- disable automatic failbacks
+#		legacy	- enable automatic failbacks in systems
+#			where all nodes do not yet support
+#			the auto_failback option.
+#
+#	auto_failback "on" and "off" are backwards compatible with the old
+#		"nice_failback on" setting.
+#
+#	See the FAQ for information on how to convert
+#		from "legacy" to "on" without a flash cut.
+#		(i.e., using a "rolling upgrade" process)
+#
+#	The default value for auto_failback is "legacy", which
+#	will issue a warning at startup.  So, make sure you put
+#	an auto_failback directive in your ha.cf file.
+#	(note: auto_failback can be any boolean or "legacy")
+#
+#auto_failback on
+#
+#
+#       Basic STONITH support
 #       Using this directive assumes that there is one stonith 
 #       device in the cluster.  Parameters to this device are 
 #       read from a configuration file. The format of this line is:
@@ -97,8 +170,6 @@
 #       NOTE: it is up to you to maintain this file on each node in the
 #       cluster!
 #
-#nice_failback	on
-#
 #stonith baytech /etc/ha.d/conf/stonith.baytech
 #
 #       STONITH support
@@ -118,12 +189,99 @@
 #	here, and you make this file publically readable, you're asking
 #	for a denial of service attack ;-)
 #
+#	To get a list of supported stonith devices, run
+#		stonith -L
+#	For detailed information on which stonith devices are supported
+#	and their detailed configuration options, run this command:
+#		stonith -h
 #
 #stonith_host *     baytech 10.0.0.3 mylogin mysecretpassword
 #stonith_host ken3  rps10 /dev/ttyS1 kathy 0 
 #stonith_host kathy rps10 /dev/ttyS1 ken3 0 
+#
+#	Watchdog is the watchdog timer.  If our own heart doesn't beat for
+#	a minute, then our machine will reboot.
+#	NOTE: If you are using the software watchdog, you very likely
+#	wish to load the module with the parameter "nowayout=0" or
+#	compile it without CONFIG_WATCHDOG_NOWAYOUT set. Otherwise even
+#	an orderly shutdown of heartbeat will trigger a reboot, which is
+#	very likely NOT what you want.
+#
+#watchdog /dev/watchdog
 #       
 #	Tell what machines are in the cluster
 #	node	nodename ...	-- must match uname -n
 #node	ken3
 #node	kathy
+#
+#	Less common options...
+#
+#	Treats 10.10.10.254 as a psuedo-cluster-member
+#	Used together with ipfail below...
+#
+#ping 10.10.10.254
+#
+#	Treats 10.10.10.254 and 10.10.10.253 as a psuedo-cluster-member
+#       called group1. If either 10.10.10.254 or 10.10.10.253 are up
+#       then group1 is up
+#	Used together with ipfail below...
+#
+#ping_group group1 10.10.10.254 10.10.10.253
+#
+#	Processes started and stopped with heartbeat.  Restarted unless
+#		they exit with rc=100
+#
+#respawn userid /path/name/to/run
+#respawn hacluster /usr/lib/heartbeat/ipfail
+#
+#	Access control for client api
+#       	default is no access
+#
+#apiauth client-name gid=gidlist uid=uidlist
+#apiauth ipfail gid=haclient uid=hacluster
+
+###########################
+#
+#	Unusual options.
+#
+###########################
+#
+#	hopfudge maximum hop count minus number of nodes in config
+#hopfudge 1
+#
+#	deadping - dead time for ping nodes
+#deadping 30
+#
+#	hbgenmethod - Heartbeat generation number creation method
+#		Normally these are stored on disk and incremented as needed.
+#hbgenmethod time
+#
+#	realtime - enable/disable realtime execution (high priority, etc.)
+#		defaults to on
+#realtime off
+#
+#	debug - set debug level
+#		defaults to zero
+#debug 1
+#
+#	API Authentication - replaces the fifo-permissions-based system of the past
+#
+#
+#	You can put a uid list and/or a gid list.
+#	If you put both, then a process is authorized if it qualifies under either
+#	the uid list, or under the gid list.
+#
+#	The groupname "default" has special meaning.  If it is specified, then
+#	this will be used for authorizing groupless clients, and any client groups
+#	not otherwise specified.
+#
+#apiauth	ipfail uid=hacluster
+#apiauth ccm uid=hacluster
+#apiauth ping gid=haclient uid=alanr,root
+#apiauth default gid=haclient
+
+# message format in the wire, it can be classic or netstring, default is classic
+#msgfmt  netstring
+
+
+
===================================================================
RCS file: ultrapossum/module/failover/recovery,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- ultrapossum/module/failover/recovery	2004/06/22 10:10:07	1.4
+++ ultrapossum/module/failover/recovery	2004/09/28 14:22:53	1.5
@@ -93,3 +93,4 @@
 	rm -rf $TMPDIR/$TMPRPL
     fi
 fi
+



UltraPossum

[Ultrapossum-cvs 608] ultrapossum/module/failover ...