Commit 82b934cf authored by Guillaume Abrioux's avatar Guillaume Abrioux
Browse files

rolling_update: unmask monitor service after a failure

if for some reason the playbook fails after the service was
stopped, disabled and masked and before it got restarted, enabled and
unmasked, the playbook leaves the service masked and which can make users
confused and forces them to unmask the unit manually.

Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1917680

Signed-off-by: default avatarGuillaume Abrioux <gabrioux@redhat.com>
(cherry picked from commit 07029e1b)
parent 653d180e
...@@ -118,150 +118,170 @@ ...@@ -118,150 +118,170 @@
serial: 1 serial: 1
become: True become: True
tasks: tasks:
- name: remove ceph aliases - name: upgrade ceph mon cluster
file: block:
path: /etc/profile.d/ceph-aliases.sh - name: upgrade ceph mon cluster
state: absent block:
when: containerized_deployment | bool - name: remove ceph aliases
file:
- name: set mon_host_count path: /etc/profile.d/ceph-aliases.sh
set_fact: state: absent
mon_host_count: "{{ groups[mon_group_name] | length }}" when: containerized_deployment | bool
- name: fail when less than three monitors
fail:
msg: "Upgrade of cluster with less than three monitors is not supported."
when: mon_host_count | int < 3
- name: select a running monitor
set_fact:
mon_host: "{{ groups[mon_group_name] | difference([inventory_hostname]) | last }}"
- import_role: - name: set mon_host_count
name: ceph-defaults set_fact:
- import_role: mon_host_count: "{{ groups[mon_group_name] | length }}"
name: ceph-facts
- block: - name: fail when less than three monitors
- name: get ceph cluster status fail:
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health -f json" msg: "Upgrade of cluster with less than three monitors is not supported."
register: check_cluster_health when: mon_host_count | int < 3
delegate_to: "{{ mon_host }}"
- block: - name: select a running monitor
- name: display ceph health detail set_fact:
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health detail" mon_host: "{{ groups[mon_group_name] | difference([inventory_hostname]) | last }}"
delegate_to: "{{ mon_host }}"
- import_role:
name: ceph-defaults
- import_role:
name: ceph-facts
- block:
- name: get ceph cluster status
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health -f json"
register: check_cluster_health
delegate_to: "{{ mon_host }}"
- block:
- name: display ceph health detail
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health detail"
delegate_to: "{{ mon_host }}"
- name: fail if cluster isn't in an acceptable state
fail:
msg: "cluster is not in an acceptable state!"
when: (check_cluster_health.stdout | from_json).status == 'HEALTH_ERR'
when: inventory_hostname == groups[mon_group_name] | first
- name: ensure /var/lib/ceph/bootstrap-rbd-mirror is present
file:
path: /var/lib/ceph/bootstrap-rbd-mirror
owner: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
group: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
mode: '755'
state: directory
delegate_to: "{{ item }}"
with_items: "{{ groups[mon_group_name] }}"
when:
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]
- name: create potentially missing keys (rbd and rbd-mirror)
ceph_key:
name: "client.{{ item.0 }}"
dest: "/var/lib/ceph/{{ item.0 }}/"
caps:
mon: "allow profile {{ item.0 }}"
cluster: "{{ cluster }}"
delegate_to: "{{ item.1 }}"
with_nested:
- ['bootstrap-rbd', 'bootstrap-rbd-mirror']
- "{{ groups[mon_group_name] }}" # so the key goes on all the nodes
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
when:
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]
- name: fail if cluster isn't in an acceptable state # NOTE: we mask the service so the RPM can't restart it
fail: # after the package gets upgraded
msg: "cluster is not in an acceptable state!" - name: stop ceph mon - shortname
when: (check_cluster_health.stdout | from_json).status == 'HEALTH_ERR' systemd:
when: inventory_hostname == groups[mon_group_name] | first name: ceph-mon@{{ ansible_facts['hostname'] }}
state: stopped
- name: ensure /var/lib/ceph/bootstrap-rbd-mirror is present enabled: no
file: masked: yes
path: /var/lib/ceph/bootstrap-rbd-mirror ignore_errors: True
owner: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
group: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
mode: '755'
state: directory
delegate_to: "{{ item }}"
with_items: "{{ groups[mon_group_name] }}"
when:
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]
- name: create potentially missing keys (rbd and rbd-mirror)
ceph_key:
name: "client.{{ item.0 }}"
dest: "/var/lib/ceph/{{ item.0 }}/"
caps:
mon: "allow profile {{ item.0 }}"
cluster: "{{ cluster }}"
delegate_to: "{{ item.1 }}"
with_nested:
- ['bootstrap-rbd', 'bootstrap-rbd-mirror']
- "{{ groups[mon_group_name] }}" # so the key goes on all the nodes
environment:
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
when:
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]
# NOTE: we mask the service so the RPM can't restart it # NOTE: we mask the service so the RPM can't restart it
# after the package gets upgraded # after the package gets upgraded
- name: stop ceph mon - shortname - name: stop ceph mon - fqdn
systemd: systemd:
name: ceph-mon@{{ ansible_facts['hostname'] }} name: ceph-mon@{{ ansible_facts['fqdn'] }}
state: stopped state: stopped
enabled: no enabled: no
masked: yes masked: yes
ignore_errors: True ignore_errors: True
# NOTE: we mask the service so the RPM can't restart it # only mask the service for mgr because it must be upgraded
# after the package gets upgraded # after ALL monitors, even when collocated
- name: stop ceph mon - fqdn - name: mask the mgr service
systemd: systemd:
name: ceph-mon@{{ ansible_facts['fqdn'] }} name: ceph-mgr@{{ ansible_facts['hostname'] }}
state: stopped masked: yes
enabled: no when: inventory_hostname in groups[mgr_group_name] | default([])
masked: yes or groups[mgr_group_name] | default([]) | length == 0
ignore_errors: True
# only mask the service for mgr because it must be upgraded - import_role:
# after ALL monitors, even when collocated name: ceph-handler
- name: mask the mgr service - import_role:
systemd: name: ceph-common
name: ceph-mgr@{{ ansible_facts['hostname'] }} when: not containerized_deployment | bool
masked: yes - import_role:
when: inventory_hostname in groups[mgr_group_name] | default([]) name: ceph-container-common
or groups[mgr_group_name] | default([]) | length == 0 when: containerized_deployment | bool
- import_role:
name: ceph-config
- import_role:
name: ceph-mon
- import_role: - name: start ceph mgr
name: ceph-handler systemd:
- import_role: name: ceph-mgr@{{ ansible_facts['hostname'] }}
name: ceph-common state: started
when: not containerized_deployment | bool enabled: yes
- import_role: ignore_errors: True # if no mgr collocated with mons
name: ceph-container-common
when: containerized_deployment | bool - name: non container | waiting for the monitor to join the quorum...
- import_role: command: ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
name: ceph-config register: ceph_health_raw
- import_role: until:
name: ceph-mon - ceph_health_raw.rc == 0
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}"
when: not containerized_deployment | bool
- name: start ceph mgr - name: container | waiting for the containerized monitor to join the quorum...
systemd: command: >
name: ceph-mgr@{{ ansible_facts['hostname'] }} {{ container_binary }} exec ceph-mon-{{ ansible_facts['hostname'] }} ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
state: started register: ceph_health_raw
enabled: yes until:
ignore_errors: True # if no mgr collocated with mons - ceph_health_raw.rc == 0
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
- name: non container | waiting for the monitor to join the quorum... hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
command: ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json retries: "{{ health_mon_check_retries }}"
register: ceph_health_raw delay: "{{ health_mon_check_delay }}"
until: when: containerized_deployment | bool
- ceph_health_raw.rc == 0
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}"
when: not containerized_deployment | bool
- name: container | waiting for the containerized monitor to join the quorum... rescue:
command: > - name: unmask the mon service
{{ container_binary }} exec ceph-mon-{{ ansible_facts['hostname'] }} ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json systemd:
register: ceph_health_raw name: ceph-mon@{{ item }}
until: enabled: yes
- ceph_health_raw.rc == 0 masked: no
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or with_items:
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"]) - "{{ ansible_facts['hostname'] }}"
retries: "{{ health_mon_check_retries }}" - "{{ ansible_facts['fqdn'] }}"
delay: "{{ health_mon_check_delay }}"
when: containerized_deployment | bool
- name: unmask the mgr service
systemd:
name: ceph-mgr@{{ ansible_facts['hostname'] }}
masked: no
when: inventory_hostname in groups[mgr_group_name] | default([])
or groups[mgr_group_name] | default([]) | length == 0
- name: reset mon_host - name: reset mon_host
hosts: "{{ mon_group_name|default('mons') }}" hosts: "{{ mon_group_name|default('mons') }}"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment