Masato Taruishi
taru****@users*****
2004年 9月 28日 (火) 23:22:53 JST
=================================================================== RCS file: ultrapossum/module/failover/11failover,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- ultrapossum/module/failover/11failover 2004/09/17 02:56:22 1.4 +++ ultrapossum/module/failover/11failover 2004/09/28 14:22:53 1.5 @@ -28,6 +28,7 @@ # Failover configuration for slurpd ULTRAPOSSUM_RECOVERY_CONFIN=$(ultrapossum_getconf ULTRAPOSSUM_RECOVERY_CONFIN $TEMPLATEDIR/recovery.conf.in) ULTRAPOSSUM_RECOVERY_CONF=$(ultrapossum_getconf ULTRAPOSSUM_RECOVERY_CONF $CONFDIR/recovery.conf) +FAILOVER_RECOVERY_PIDFILE=$(ultrapossum_getconf FAILOVER_RECOVERY_PIDFILE "$localstatedir/run/ultrapossum.recovery.pid") # Mon configuration ULTRAPOSSUM_MON_CFIN=$(ultrapossum_getconf ULTRAPOSSUM_MON_CFIN $TEMPLATEDIR/ultrapossum.mon.cf.in) @@ -86,6 +87,7 @@ if test -f "$HACF"; then HEARTBEAT_NICE_FAILBACK=`grep ^nice_failback $HACF | awk -F' ' '{print $2;}'` + HEARTBEAT_AUTO_FAILBACK=`grep ^auto_failback $HACF | awk -F' ' '{print $2;}'` fi if test "x$HOST" = "x$BACKUP"; then @@ -97,19 +99,7 @@ if test "x$ULTRAPOSSUM_MODULE_FAILOVER" = "xinstalled"; then wait_master_service() { - (for i in 1 2 2 3 3 3 5 5 5 5 10 10 10 10 10 20 20 20 - do - if test "`ldapmaster_status`" = "running"; then - exit 0 - fi - if test "`$MODULEDIR/server/pid status $HAPIDFILE`" = "stopped"; then - exit 1 - fi - sleep $i - echo -n "." - done - echo "timeout" 1>&2 - exit 1) || exit 1 + $MODULEDIR/server/pid wait $SLAPD_PIDFILE echo -n " `basename $SLAPD`" if test "x$SLURPDSLAVES" != "x"; then $MODULEDIR/server/pid wait $SLURPD_PIDFILE @@ -119,29 +109,24 @@ echo -n " mon" } -wait_nicefailback_service() +wait_backup_service() { + # handle initdead case (for i in 1 2 2 3 3 3 5 5 5 5 10 10 10 10 10 20 20 20 - do - if test "`$MODULEDIR/server/startup status`" = "running"; then - exit 0 - fi - if test "`$MODULEDIR/server/pid status $HAPIDFILE`" = "stopped"; then - exit 1 + do + # started as master service + if test "`ldapmaster_status`" = "running"; then + wait_master_service + exit 0 + # started as slave service + elif test "`ldapslave_status`" = "running"; then + exit 0 fi sleep $i echo -n "." - done - echo "timeout" 1>&2 - exit 1) || exit 1 - if test "`ldapmaster_status`" = "running"; then - wait_master_service - elif include "$BACKUP" "$SYNCBACKUPS"; then - echo -n " `basename $SLAPD`" - else - echo -n " " - ldapslave_start - fi + done + echo "timeout" 1>&2 + exit 1 ) || exit 1 } ldapmaster_service_start() @@ -149,16 +134,21 @@ if test "x$BACKUP" = "x" ; then ldapmaster_start else - if include "$BACKUP" "$SYNCBACKUPS"; then + if test "x`$MODULEDIR/server/startup slave_status`" = "xstopped"; then ldapslave_start echo -n " " fi /etc/init.d/heartbeat start | grep ERROR 1>&2 && exit 1 + $MODULEDIR/server/pid wait $HAPIDFILE echo -n "heartbeat" - if test "x$HOST" = "x$MASTER" && test "x$HEARTBEAT_NICE_FAILBACK" = "x"; then - wait_master_service + if test "x$HOST" = "x$MASTER"; then + if test "x$HEARTBEAT_NICE_FAILBACK" = "x" || test "x$HEARTBEAT_AUTO_FAILBACK" = "xon"; then + wait_master_service + else + wait_backup_service + fi else - wait_nicefailback_service + wait_backup_service fi fi } =================================================================== RCS file: ultrapossum/module/failover/Makefile.am,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- ultrapossum/module/failover/Makefile.am 2004/06/24 08:42:59 1.4 +++ ultrapossum/module/failover/Makefile.am 2004/09/28 14:22:53 1.5 @@ -6,7 +6,6 @@ module_DATA = 11failover update_SCRIPTS = update-failover - modulesysconf_DATA = failover.cf modulelocal_SCRIPTS = \ =================================================================== RCS file: ultrapossum/module/failover/ha.cf.in,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -r1.1.1.1 -r1.2 --- ultrapossum/module/failover/ha.cf.in 2004/03/13 14:25:54 1.1.1.1 +++ ultrapossum/module/failover/ha.cf.in 2004/09/28 14:22:53 1.2 @@ -1,7 +1,17 @@ # # There are lots of options in this file. All you have to have is a set -# of nodes listedJ {"node ...} -# and one of {serial, udp, or mcast} +# of nodes listed {"node ...} one of {serial, bcast, mcast, or ucast}, +# and a value for "auto_failback". +# +# ATTENTION: As the configuration file is read line by line, +# THE ORDER OF DIRECTIVE MATTERS! +# +# In particular, make sure that the udpport, serial baud rate +# etc. are set before the heartbeat media are defined! +# debug and log file directives go into effect when they +# are encountered. +# +# All will be fine if you keep them ordered as in this example. # # # Note on logging: @@ -14,7 +24,7 @@ # defined then defaults will be used for debugfile and logfile as # required and messages will be sent there. # -# File to wirte debug messages to +# File to write debug messages to #debugfile /var/log/ha-debug # # @@ -25,16 +35,34 @@ # # Facility to use for syslog()/logger # -#logfacility local0 +logfacility local0 +# # +# A note on specifying "how long" times below... # -# keepalive: how many seconds between heartbeats +# The default time unit is seconds +# 10 means ten seconds +# +# You can also specify them in milliseconds +# 1500ms means 1.5 seconds +# +# +# keepalive: how long between heartbeats? # #keepalive 2 # -# deadtime: seconds-to-declare-host-dead +# deadtime: how long-to-declare-host-dead? +# +# If you set this too low you will get the problematic +# split-brain (or cluster partition) problem. +# See the FAQ for how to use warntime to tune deadtime. # -#deadtime 10 +#deadtime 30 +# +# warntime: how long before issuing "late heartbeat" warning? +# See the FAQ for how to use warntime to tune deadtime. +# +#warntime 10 # # # Very first dead time (initdead) @@ -46,24 +74,27 @@ # #initdead 120 # -# hopfudge maximum hop count minus number of nodes in config -#hopfudge 1 -# -# serial serialportname ... -#serial /dev/ttyS0 # +# What UDP port to use for bcast/ucast communication? +# +#udpport 694 # # Baud rate for serial ports... # #baud 19200 +# +# serial serialportname ... +#serial /dev/ttyS0 # Linux +#serial /dev/cuaa0 # FreeBSD +#serial /dev/cua/a # Solaris # -# What UDP port to use for communication? -# -#udpport 694 # -# What interfaces to heartbeat over? +# What interfaces to broadcast heartbeats over? # -#udp eth0 +#bcast eth0 # Linux +#bcast eth1 eth2 # Linux +#bcast le0 # Solaris +#bcast le1 le2 # Solaris # # Set up a multicast heartbeat medium # mcast [dev] [mcast group] [port] [ttl] [loop] @@ -71,23 +102,65 @@ # [dev] device to send/rcv heartbeats on # [mcast group] multicast group to join (class D multicast address # 224.0.0.0 - 239.255.255.255) -# [port] udp port to sendto/rcvfrom (no real reason to differ -# from the port used for broadcast heartbeats) +# [port] udp port to sendto/rcvfrom (set this value to the +# same value as "udpport" above) # [ttl] the ttl value for outbound heartbeats. this effects # how far the multicast packet will propagate. (0-255) +# Must be greater than zero. # [loop] toggles loopback for outbound multicast heartbeats. # if enabled, an outbound packet will be looped back and # received by the interface it was sent on. (0 or 1) +# Set this value to zero. # # -#mcast eth0 225.0.0.1 694 1 1 +#mcast eth0 225.0.0.1 694 1 0 +# +# Set up a unicast / udp heartbeat medium +# ucast [dev] [peer-ip-addr] +# +# [dev] device to send/rcv heartbeats on +# [peer-ip-addr] IP address of peer to send packets to +# +#ucast eth0 192.168.1.2 +# +# +# About boolean values... +# +# Any of the following case-insensitive values will work for true: +# true, on, yes, y, 1 +# Any of the following case-insensitive values will work for false: +# false, off, no, n, 0 # -# Watchdog is the watchdog timer. If our own heart doesn't beat for -# a minute, then our machine will reboot. # -#watchdog /dev/watchdog # -# "Legacy" STONITH support +# auto_failback: determines whether a resource will +# automatically fail back to its "primary" node, or remain +# on whatever node is serving it until that node fails, or +# an administrator intervenes. +# +# The possible values for auto_failback are: +# on - enable automatic failbacks +# off - disable automatic failbacks +# legacy - enable automatic failbacks in systems +# where all nodes do not yet support +# the auto_failback option. +# +# auto_failback "on" and "off" are backwards compatible with the old +# "nice_failback on" setting. +# +# See the FAQ for information on how to convert +# from "legacy" to "on" without a flash cut. +# (i.e., using a "rolling upgrade" process) +# +# The default value for auto_failback is "legacy", which +# will issue a warning at startup. So, make sure you put +# an auto_failback directive in your ha.cf file. +# (note: auto_failback can be any boolean or "legacy") +# +#auto_failback on +# +# +# Basic STONITH support # Using this directive assumes that there is one stonith # device in the cluster. Parameters to this device are # read from a configuration file. The format of this line is: @@ -97,8 +170,6 @@ # NOTE: it is up to you to maintain this file on each node in the # cluster! # -#nice_failback on -# #stonith baytech /etc/ha.d/conf/stonith.baytech # # STONITH support @@ -118,12 +189,99 @@ # here, and you make this file publically readable, you're asking # for a denial of service attack ;-) # +# To get a list of supported stonith devices, run +# stonith -L +# For detailed information on which stonith devices are supported +# and their detailed configuration options, run this command: +# stonith -h # #stonith_host * baytech 10.0.0.3 mylogin mysecretpassword #stonith_host ken3 rps10 /dev/ttyS1 kathy 0 #stonith_host kathy rps10 /dev/ttyS1 ken3 0 +# +# Watchdog is the watchdog timer. If our own heart doesn't beat for +# a minute, then our machine will reboot. +# NOTE: If you are using the software watchdog, you very likely +# wish to load the module with the parameter "nowayout=0" or +# compile it without CONFIG_WATCHDOG_NOWAYOUT set. Otherwise even +# an orderly shutdown of heartbeat will trigger a reboot, which is +# very likely NOT what you want. +# +#watchdog /dev/watchdog # # Tell what machines are in the cluster # node nodename ... -- must match uname -n #node ken3 #node kathy +# +# Less common options... +# +# Treats 10.10.10.254 as a psuedo-cluster-member +# Used together with ipfail below... +# +#ping 10.10.10.254 +# +# Treats 10.10.10.254 and 10.10.10.253 as a psuedo-cluster-member +# called group1. If either 10.10.10.254 or 10.10.10.253 are up +# then group1 is up +# Used together with ipfail below... +# +#ping_group group1 10.10.10.254 10.10.10.253 +# +# Processes started and stopped with heartbeat. Restarted unless +# they exit with rc=100 +# +#respawn userid /path/name/to/run +#respawn hacluster /usr/lib/heartbeat/ipfail +# +# Access control for client api +# default is no access +# +#apiauth client-name gid=gidlist uid=uidlist +#apiauth ipfail gid=haclient uid=hacluster + +########################### +# +# Unusual options. +# +########################### +# +# hopfudge maximum hop count minus number of nodes in config +#hopfudge 1 +# +# deadping - dead time for ping nodes +#deadping 30 +# +# hbgenmethod - Heartbeat generation number creation method +# Normally these are stored on disk and incremented as needed. +#hbgenmethod time +# +# realtime - enable/disable realtime execution (high priority, etc.) +# defaults to on +#realtime off +# +# debug - set debug level +# defaults to zero +#debug 1 +# +# API Authentication - replaces the fifo-permissions-based system of the past +# +# +# You can put a uid list and/or a gid list. +# If you put both, then a process is authorized if it qualifies under either +# the uid list, or under the gid list. +# +# The groupname "default" has special meaning. If it is specified, then +# this will be used for authorizing groupless clients, and any client groups +# not otherwise specified. +# +#apiauth ipfail uid=hacluster +#apiauth ccm uid=hacluster +#apiauth ping gid=haclient uid=alanr,root +#apiauth default gid=haclient + +# message format in the wire, it can be classic or netstring, default is classic +#msgfmt netstring + + + =================================================================== RCS file: ultrapossum/module/failover/recovery,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- ultrapossum/module/failover/recovery 2004/06/22 10:10:07 1.4 +++ ultrapossum/module/failover/recovery 2004/09/28 14:22:53 1.5 @@ -93,3 +93,4 @@ rm -rf $TMPDIR/$TMPRPL fi fi +