Commit bedc0ab6 authored by Dimitri Savineau's avatar Dimitri Savineau Committed by Dimitri Savineau
Browse files

ceph-osd: use OSD id with systemd ceph-disk

When using containerized deployment we have to create the systemd
service unit based on a template.
The current implementation with ceph-disk is using the device name
as paramater to the systemd service and for the container name too.

$ systemctl start ceph-osd@sdb
$ docker ps --filter 'name=ceph-osd-*'
CONTAINER ID IMAGE                        NAMES
065530d0a27f ceph/daemon:latest-luminous  ceph-osd-strg0-sdb

This is the only scenario (compared to non containerized or
ceph-volume based deployment) that isn't using the OSD id.

$ systemctl start ceph-osd@0
$ docker ps --filter 'name=ceph-osd-*'
CONTAINER ID IMAGE                        NAMES
d34552ec157e ceph/daemon:latest-luminous  ceph-osd-0

Also if the device mapping doesn't persist to system reboot (ie sdb
might be remapped to sde) then the OSD service won't come back after
the reboot.

This patch allows to use the OSD id with the ceph-osd systemd service
but requires to activate the OSD manually with ceph-disk first in
order to affect the ID to that OSD.

Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1670734

Signed-off-by: default avatarDimitri Savineau <dsavinea@redhat.com>
parent df46d10c
......@@ -36,10 +36,6 @@ wait_for_socket_in_docker() {
fi
}
get_dev_name() {
echo $1 | sed -r 's/ceph-osd@([a-z]{1,4})\.service/\1/'
}
get_docker_id_from_dev_name() {
local id
local count
......@@ -53,26 +49,17 @@ get_docker_id_from_dev_name() {
echo "$id"
}
# For containerized deployments, the unit file looks like: ceph-osd@sda.service
# For non-containerized deployments, the unit file looks like: ceph-osd@NNN.service where NNN is OSD ID
for unit in $(systemctl list-units | grep -E "loaded * active" | grep -oE "ceph-osd@([0-9]+|[a-z]+).service"); do
# The unit file looks like: ceph-osd@NNN.service where NNN is OSD ID
for unit in $(systemctl list-units | grep -E "loaded * active" | grep -oE "ceph-osd@[0-9]+.service"); do
# First, restart daemon(s)
systemctl restart "${unit}"
# We need to wait because it may take some time for the socket to actually exists
COUNT=10
# Wait and ensure the socket exists after restarting the daemon
{% if containerized_deployment and osd_scenario != 'lvm' -%}
id=$(get_dev_name "$unit")
container_id=$(get_docker_id_from_dev_name "$id")
wait_for_socket_in_docker "$container_id"
osd_id=$whoami
docker_exec="docker exec $container_id"
{% elif containerized_deployment and osd_scenario == 'lvm' %}
osd_id=$(echo ${unit#ceph-osd@} | grep -oE '[0-9]+')
{% if containerized_deployment -%}
container_id=$(get_docker_id_from_dev_name "ceph-osd-${osd_id}")
docker_exec="docker exec $container_id"
{% else %}
osd_id=$(echo ${unit#ceph-osd@} | grep -oE '[0-9]+')
{% endif %}
SOCKET=/var/run/ceph/{{ cluster }}-osd.${osd_id}.asok
while [ $COUNT -ne 0 ]; do
......
......@@ -11,21 +11,52 @@
when:
- ceph_docker_on_openstack
- name: test if the container image has directory {{ container_bin_path }}
command: "docker run --rm --entrypoint=test {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} -d {{ container_bin_path }}"
changed_when: false
failed_when: false
register: test_container_bin_path
when:
- osd_scenario != 'lvm'
- name: with non lvm scenario
when: osd_scenario != 'lvm'
block:
- name: test if the container image has directory {{ container_bin_path }}
command: "docker run --rm --entrypoint=test {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} -d {{ container_bin_path }}"
changed_when: false
failed_when: false
register: test_container_bin_path
- name: test if the container image has the disk_list function
command: "docker run --rm --entrypoint=stat {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} {{ container_bin_path + '/disk_list.sh' if test_container_bin_path.rc == 0 else 'disk_list.sh' }}"
changed_when: false
failed_when: false
register: disk_list
when:
- osd_scenario != 'lvm'
- name: test if the container image has the disk_list function
command: "docker run --rm --entrypoint=stat {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} {{ container_bin_path + '/disk_list.sh' if test_container_bin_path.rc == 0 else 'disk_list.sh' }}"
changed_when: false
failed_when: false
register: disk_list
- name: test activated ceph-disk osds
shell: |
ls /var/lib/ceph/osd/ | sed 's/.*-//'
register: activated_osds
- name: activate containerized osd(s)
shell: |
DOCKER_ENV=$(docker run --rm --net=host --ulimit nofile=1024:1024 \
--privileged=true -v /dev/:/dev/ -v /etc/ceph:/etc/ceph:z \
-e CLUSTER={{ cluster }} -e OSD_DEVICE={{ item }} \
{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} \
disk_list)
docker run --rm --net=host \
--ulimit nofile=1024:1024 \
--ipc=host --pid=host --privileged=true \
-v /etc/ceph:/etc/ceph:z \
-v /var/lib/ceph/:/var/lib/ceph/:z \
-v /dev:/dev \
-v /etc/localtime:/etc/localtime:ro \
-e DEBUG=verbose \
-e CLUSTER={{ cluster }} \
-e CEPH_DAEMON=OSD_CEPH_DISK_ACTIVATE_ONLY \
-e OSD_DEVICE={{ item }} \
${DOCKER_ENV} \
{{ docker_env_args }} \
{{ ceph_osd_docker_prepare_env }} \
{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}
with_items: "{{ devices }}"
when:
- devices is defined
- devices | length > activated_osds.stdout_lines | length
- name: generate ceph osd docker run script
become: true
......@@ -44,18 +75,21 @@
- name: get osd ids
shell: |
ls /var/lib/ceph/osd/ | sed 's/.*-//'
register: osd_ids_non_container
register: ceph_disk_osd_ids
when: osd_scenario != 'lvm'
- name: set_fact docker_exec_start_osd
set_fact:
docker_exec_start_osd: "{{ 'docker run --rm --ulimit nofile=1024:1024 --privileged=true -v /run/lvm/lvmetad.socket:/run/lvm/lvmetad.socket -v /var/run/udev/:/var/run/udev/:z -v /etc/ceph:/etc/ceph:z -v /dev:/dev --entrypoint=ceph-volume ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else 'ceph-volume' }}"
when: osd_scenario == 'lvm'
- name: collect osd ids
shell: >
{{ docker_exec_start_osd }} lvm list --format json
changed_when: false
failed_when: false
register: ceph_osd_ids
register: ceph_volume_osd_ids
when: osd_scenario == 'lvm'
- name: generate systemd unit file
become: true
......@@ -70,13 +104,41 @@
when:
- containerized_deployment
- name: device to ID migration
when:
- containerized_deployment | bool
- osd_scenario != 'lvm'
block:
- name: check ceph-osd service using device name
shell: |
systemctl list-units | grep -E "loaded * active" | grep -coE "ceph-osd@([a-z]+|nvme[0-9]+n[0-9]+).service"
register: ceph_osd_device_name
changed_when: false
failed_when: false
- name: copy systemd-device-to-id.sh script
template:
src: systemd-device-to-id.sh.j2
dest: /tmp/systemd-device-to-id.sh
owner: root
group: root
mode: 0750
when: ceph_osd_device_name.stdout|int != 0
- name: run the systemd-device-to-id.sh script
command: /usr/bin/env bash /tmp/systemd-device-to-id.sh
when: ceph_osd_device_name.stdout|int != 0
with_items: "{{ groups[osd_group_name] }}"
delegate_to: "{{ item }}"
run_once: true
- name: systemd start osd
systemd:
name: ceph-osd@{{ item | regex_replace('/dev/', '') if osd_scenario != 'lvm' and containerized_deployment else item }}
name: ceph-osd@{{ item }}
state: started
enabled: yes
daemon_reload: yes
with_items: "{{ devices if osd_scenario != 'lvm' and containerized_deployment else ((ceph_osd_ids.stdout | from_json).keys() | list) if osd_scenario == 'lvm' and not containerized_deployment else osd_ids_non_container.stdout_lines }}"
with_items: "{{ ((ceph_volume_osd_ids.stdout | from_json).keys() | list) if osd_scenario == 'lvm' else ceph_disk_osd_ids.stdout_lines }}"
- name: ensure systemd service override directory exists
file:
......
......@@ -12,8 +12,20 @@ DOCKER_ENV=""
#############
# FUNCTIONS #
#############
function id_to_device () {
{% if dmcrypt | bool %}
docker run --rm --net=host --ulimit nofile=1024:1024 --ipc=host --pid=host --privileged=true -v /etc/ceph:/etc/ceph:z -v /var/lib/ceph/:/var/lib/ceph/:z -v /dev:/dev -v /etc/localtime:/etc/localtime:ro -e DEBUG=verbose -e CLUSTER={{ cluster }} {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} osd_ceph_disk_dmcrypt_data_map
{% endif %}
DATA_PART=$(docker run --rm --ulimit nofile=1024:1024 --privileged=true -v /dev/:/dev/ -v /etc/ceph:/etc/ceph:z --entrypoint ceph-disk {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} list | grep ", osd\.${1}," | awk '{ print $1 }')
if [[ "${DATA_PART}" =~ ^/dev/(cciss|nvme) ]]; then
OSD_DEVICE=${DATA_PART:0:-2}
else
OSD_DEVICE=${DATA_PART:0:-1}
fi
}
function expose_partitions () {
DOCKER_ENV=$(docker run --rm --net=host --name expose_partitions_${1} --privileged=true -v /dev/:/dev/ -v /etc/ceph:/etc/ceph:z -e CLUSTER={{ cluster }} -e OSD_DEVICE=/dev/${1} {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} disk_list)
DOCKER_ENV=$(docker run --rm --net=host --privileged=true -v /dev/:/dev/ -v /etc/ceph:/etc/ceph:z -e CLUSTER={{ cluster }} -e OSD_DEVICE=${1} {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} disk_list)
}
{% else -%}
# NOTE(leseb): maintains backwards compatibility with old ceph-docker Jewel images
......@@ -54,7 +66,8 @@ function expose_partitions {
{% endif -%}
expose_partitions "$1"
id_to_device "$1"
expose_partitions "${OSD_DEVICE}"
# discover osd_objectstore for ceph-disk based osds
if [[ $DOCKER_ENV =~ "BLUESTORE" ]]; then
......@@ -122,12 +135,11 @@ numactl \
-v /run/lvm/lvmetad.socket:/run/lvm/lvmetad.socket \
-e CEPH_DAEMON=OSD_CEPH_VOLUME_ACTIVATE \
-e OSD_ID="$1" \
--name=ceph-osd-"$1" \
{% else -%}
$DOCKER_ENV \
-e CEPH_DAEMON=OSD_CEPH_DISK_ACTIVATE \
-e OSD_DEVICE=/dev/"${1}" \
--name=ceph-osd-{{ ansible_hostname }}-"${1}" \
-e OSD_DEVICE="${OSD_DEVICE}" \
{% endif -%}
--name=ceph-osd-"$1" \
{{ ceph_osd_docker_extra_env }} \
{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}
#!/bin/bash
DELAY="{{ handler_health_osd_check_delay }}"
CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}"
check_pgs() {
num_pgs=$($docker_exec ceph $CEPH_CLI -s -f json|python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')
if [[ "$num_pgs" == "0" ]]; then
return 0
fi
while [ $RETRIES -ne 0 ]; do
test "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')" -eq "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print sum ( [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if "active+clean" in i["state_name"]])')"
RET=$?
test $RET -eq 0 && return 0
sleep $DELAY
let RETRIES=RETRIES-1
done
# PGs not clean, exiting with return code 1
echo "Error while running 'ceph $CEPH_CLI -s', PGs were not reported as active+clean"
echo "It is possible that the cluster has less OSDs than the replica configuration"
echo "Will refuse to continue"
$docker_exec ceph $CEPH_CLI -s
$docker_exec ceph $CEPH_CLI osd dump
$docker_exec ceph $CEPH_CLI osd tree
$docker_exec ceph $CEPH_CLI osd crush rule dump
exit 1
}
wait_for_socket_in_docker() {
osd_mount_point=$(docker exec "$1" df --output=target | grep '/var/lib/ceph/osd/')
whoami=$(docker exec "$1" cat $osd_mount_point/whoami)
if ! docker exec "$1" timeout 10 bash -c "while [ ! -e /var/run/ceph/{{ cluster }}-osd.${whoami}.asok ]; do sleep 1 ; done"; then
echo "Timed out while trying to look for a Ceph OSD socket."
echo "Abort mission!"
exit 1
fi
}
get_dev_name() {
echo $1 | sed -r 's/ceph-osd@([a-z]{1,4}|nvme[0-9]+n[0-9]+)\.service/\1/'
}
get_docker_id_from_dev_name() {
local id
local count
count=10
while [ $count -ne 0 ]; do
id=$(docker ps -q -f "name=${1}$")
test "$id" != "" && break
sleep $DELAY
let count=count-1
done
echo "$id"
}
for unit in $(systemctl list-units | grep -E "loaded * active" | grep -oE "ceph-osd@([a-z]+|nvme[0-9]+n[0-9]+).service"); do
dev_name=$(get_dev_name "$unit")
container_id=$(get_docker_id_from_dev_name "$dev_name")
wait_for_socket_in_docker "$container_id"
osd_id=$whoami
# Stop and Disable the unit based on device name
systemctl stop ceph-osd@${dev_name}
systemctl disable ceph-osd@${dev_name}
# Enable and Start the unit based on OSD id
systemctl enable ceph-osd@${osd_id}
systemctl start ceph-osd@${osd_id}
container_id=$(get_docker_id_from_dev_name "ceph-osd-${osd_id}")
docker_exec="docker exec $container_id"
SOCKET=/var/run/ceph/{{ cluster }}-osd.${osd_id}.asok
COUNT=10
while [ $COUNT -ne 0 ]; do
RETRIES="{{ handler_health_osd_check_retries }}"
$docker_exec test -S "$SOCKET" && check_pgs && continue 2
sleep $DELAY
let COUNT=COUNT-1
done
# If we reach this point, it means the socket is not present.
echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running. Showing ceph-osd unit logs now:"
journalctl -u "ceph-osd@${osd_id}.service"
exit 1
done
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment