[Linux-ha-jp] Stonithの動きについて

Back to archive index

harada ljpgh****@gmail*****
2018年 9月 3日 (月) 14:46:24 JST


原田と申します。

Pacemakerの監視として、
Stonithも追加しているのですが、
下記の動きが正しいのか分からなくなってしまいました。
ご教示をお願いいたします。

構成
A プライマリ
B   セカンダリ
の2台構成
DRBD8+Pacemaker1.1.17 +corosync2.4

パターン1
① Aをシャットダウン
② Bにフェイルオーバー
③ Aを電源ONにすると、Bが再起動される。(Stonith操作?)
そして、Aにフェイルバックされる
④そして、DRBD が
   StandAlone Primary/Unknown  となってしまう。

パターン2
① Bをシャットダウン
② Aへフェイルオーバー
③ Bを電源ON Aが再起動される(Aはプライマリのまま)

CRM設定は下記となります。
property \
    no-quorum-policy="ignore" \
    stonith-enabled="true" \
    startup-fencing="false"

rsc_defaults \
    resource-stickiness="INFINITY" \
    migration-threshold="1"


group grpTrac \
    prmVIP \
    prmFS \
    prmHTTP \
    prmMail

clone clnPing \
    prmPing

clone clnDiskd1 \
    prmDiskd1

clone clnDiskd2 \
    prmDiskd2

ms msDrbd \
    drbd \
    meta \
        resource-stickiness="1" \
        master-max="1" \
        master-node-max="1" \
        clone-max="2" \
        clone-node-max="1" \
        notify="true"

primitive prmVIP ocf:heartbeat:IPaddr2 \
    params \
        ip="XXX.XXX.XXX.XXX" \
        nic="bond0" \
        cidr_netmask="28" \
    op start interval="0s" timeout="60s" on-fail="restart" \
    op monitor interval="10s" timeout="60s" on-fail="restart" \
    op stop interval="0s" timeout="60s" on-fail="fence"

primitive drbd ocf:linbit:drbd \
    params \
        drbdconf="/etc/drbd.conf" \
        drbd_resource="drbd0" \
    op start interval="0s" timeout="240s" on-fail="restart" \
    op monitor role="Master" interval="10s" timeout="20s" on-fail="restart"
\
    op monitor role="Slave" interval="20s" timeout="20s" on-fail="restart" \
    op promote interval="0s" timeout="90s" on-fail="stop" \
    op demote interval="0s" timeout="90s" on-fail="ignore" \
    op stop interval="0s" timeout="100s" on-fail="ignore"

primitive prmFS ocf:heartbeat:Filesystem \
    params \
        fstype="xfs" \
        run_fsck="force" \
        device="/dev/drbd0" \
        directory="/var/www" \
    op start interval="0s" timeout="60s" on-fail="restart" \
    op monitor interval="20s" timeout="40s" on-fail="restart" \
    op stop interval="0s" timeout="60s" on-fail="ignore"

primitive prmHTTP ocf:heartbeat:apache \
    params \
       configfile="/etc/httpd/conf/httpd.conf" \
       statusurl="http://127.0.0.1/server-status" \
    op start interval="0s" timeout="90s" on-fail="restart" \
    op monitor interval="10s" timeout="10s" on-fail="restart" \
    op stop interval="0s" timeout="60s" on-fail="ignore"

primitive prmMail ocf:heartbeat:MailTo \
    params \
        email="harad****@la-j*****" \
        subject="Pacemaker Alert - hogehoge_web" \
    op monitor depth="0" timeout="60s" interval="10s"

primitive prmPing ocf:pacemaker:ping \
    params \
        name="default_ping_set" \
        host_list="XXX.XXX.XXX.XXX" \
        multiplier="100" \
        attempts="2" \
        timeout="2" \
        debug="true" \
    op start interval="0s" timeout="60s" on-fail="restart" \
    op monitor interval="10s" timeout="60s" on-fail="restart" \
    op stop interval="0s" timeout="60s" on-fail="ignore"

primitive prmDiskd1 ocf:pacemaker:diskd \
    params \
        name="diskcheck_status" \
        device="/dev/cl/var_www" \
        options="-e -t 70" \
        interval="10" \
        dampen="2" \
    op start interval="0s" timeout="60s" on-fail="restart" \
    op monitor interval="10s" timeout="60s" on-fail="restart" \
    op stop interval="0s" timeout="60s" on-fail="ignore"

primitive prmDiskd2 ocf:pacemaker:diskd \
    params \
        name="diskcheck_status_internal" \
        device="/dev/cl/lv_meta" \
        options="-e" \
        interval="10" \
        dampen="2" \
op start interval="0s" timeout="60s" on-fail="restart" \
op monitor interval="10s" timeout="60s" on-fail="restart" \
op stop interval="0s" timeout="60s" on-fail="ignore"


primitive stonith1-1 stonith:external/stonith-helper \
        params \
                priority="1" \
                pcmk_monitor_timeout="60s" \
                hostlist="hoge01" \
                dead_check_target="XXX.XXX.XXX.XXX 10.0.0.10 10.0.1.10
XXX.XXX.XXX.XXX" \
                standby_check_command="/usr/sbin/crm_resource -r vip -W |
grep -q `hostname`" \
op monitor interval="10s"

primitive stonith1-2 stonith:external/ipmi \
    params \
        priority="2" \
        userid="hogehoge" \
        passwd="hogehoge" \
        ipaddr="XXX.XXX.XXX.XXX" \
        hostname="hoge01" \
        interface="lanplus" \
op start interval="0s" timeout="60s" on-fail="restart" \
op monitor interval="300s" timeout="60s" on-fail="restart" \
op stop interval="0s" timeout="60s" on-fail="ignore"

primitive stonith1-3 stonith:meatware \
        params \
                priority="3" \
                pcmk_monitor_timeout="600s" \
                hostlist="hoge01" \
op monitor interval="3600s"

primitive stonith2-1 stonith:external/stonith-helper \
        params \
                priority="1" \
                pcmk_monitor_timeout="60s" \
                hostlist="hoge02" \
                dead_check_target="XXX.XXX.XXX.XXX 10.0.0.11 10.0.1.11
XXX.XXX.XXX.XXX" \
                standby_check_command="/usr/sbin/crm_resource -r vip -W |
grep -q `hostname`" \
op monitor interval="10s"


primitive stonith2-2 stonith:external/ipmi \
    params \
        priority="2" \
        userid="hogehoge" \
        passwd="hogehoge" \
        ipaddr="XXX.XXX.XXX.XXX" \
        hostname="hoge02" \
        interface="lanplus" \
op start interval="0s" timeout="60s" on-fail="restart" \
op monitor interval="300s" timeout="60s" on-fail="restart" \
op stop interval="0s" timeout="60s" on-fail="ignore"


primitive stonith2-3 stonith:meatware \
        params \
                priority="3" \
                pcmk_monitor_timeout="600s" \
                hostlist="hoge02" \
op monitor interval="3600s"

group grpStonith1 \
        stonith1-1 \
        stonith1-2 \
        stonith1-3

group grpStonith2 \
        stonith2-1 \
        stonith2-2 \
        stonith2-3

location location-grpStonith1 grpStonith1 \
        rule 200: #uname eq hoge02 \
        rule -inf: #uname eq hoge01


location location-grpStonith2 grpStonith2 \
        rule 200: #uname eq hoge01 \
        rule -inf: #uname eq hoge02



location rsc_location-msDrbd-1 msDrbd \
    rule 200: #uname eq hoge01 \
    rule 100: #uname eq hoge02 \
    rule -INFINITY: not_defined default_ping_set or default_ping_set lt 100
\
    rule -INFINITY: not_defined diskcheck_status or diskcheck_status eq
ERROR \
    rule -INFINITY: not_defined diskcheck_status_internal or
diskcheck_status_internal eq ERROR
location rsc_location-grpTrac-2 grpTrac \
    rule 200: #uname eq hoge01 \
    rule 100: #uname eq hoge02 \
    rule -INFINITY: not_defined default_ping_set or default_ping_set lt 100
\
    rule -INFINITY: not_defined diskcheck_status or diskcheck_status eq
ERROR \
    rule -INFINITY: not_defined diskcheck_status_internal or
diskcheck_status_internal eq ERROR


colocation rsc_colocation-msDrbd-clnPing-1            INFINITY:
msDrbd        clnPing
colocation rsc_colocation-msDrbd-clnDiskd1-2          INFINITY:
msDrbd        clnDiskd1
colocation rsc_colocation-msDrbd-clnDiskd2-3          INFINITY:
msDrbd        clnDiskd2
colocation rsc_colocation-grpTrac-msDrbd-4            INFINITY:
grpTrac       msDrbd:Master

order rsc_order-clnPing-msDrbd-1         0:        clnPing
msDrbd        symmetrical=false
order rsc_order-clnDiskd1-msDrbd-2       0:        clnDiskd1
msDrbd        symmetrical=false
order rsc_order-clnDiskd2-msDrbd-3       0:        clnDiskd2
msDrbd        symmetrical=false
order rsc_order-msDrbd-grpTrac-4         INFINITY: msDrbd:promote
grpTrac:start


よろしくお願いいたします。
-------------- next part --------------
HTML$B$NE:IU%U%!%$%k$rJ]4I$7$^$7$?(B...
URL: https://lists.osdn.me/mailman/archives/linux-ha-japan/attachments/20180903/d520ff8a/attachment-0001.htm 



Linux-ha-japan メーリングリストの案内
Back to archive index