Try   HackMD

Setup Ceph storage via ceph-ansible

Basic guide to setup Ceph Storage via Ceph-Ansible.

tags: ceph

Environment

Ansible Control Node OS: Ubuntu22.04
Ceph Node OS: Ubuntu22.04 (vda)
Username: ubuntu
Nodes:

ans-ceph-deploy (ansible control node)
ans-ceph-mon1 | 2vCPU, 4G RAM
ans-ceph-mon2 | 2vCPU, 4G RAM
ans-ceph-mon3 | 2vCPU, 4G RAM
ans-ceph-osd1 | 2vCPU, 4G RAM, vdb(32GB), vdc(32GB)
ans-ceph-osd2 | 2vCPU, 4G RAM, vdb(32GB), vdc(32GB)
ans-ceph-osd3 | 2vCPU, 4G RAM, vdb(32GB), vdc(32GB)

Networks:

cluster: 192.168.122.0/24
public: 192.168.1.0/24

/etc/hosts on ansible control node

192.168.122.61 ans-ceph-mon1
192.168.122.62 ans-ceph-mon2
192.168.122.63 ans-ceph-mon3
192.168.122.64 ans-ceph-osd1
192.168.122.65 ans-ceph-osd2
192.168.122.66 ans-ceph-osd3

Ansible version:

$ ansible --version
ansible 2.10.17
  config file = None
  configured module search path = ['/home/ubuntu/.ansible/plugins/modules', '/usr/share/ansible/plugins/modules']
  ansible python module location = /home/ubuntu/.local/lib/python3.10/site-packages/ansible
  executable location = /home/ubuntu/.local/bin/ansible
  python version = 3.10.6 (main, Nov 14 2022, 16:10:14) [GCC 11.3.0]

Pre-configuration

Configure following items:
1. update timzone
2. update sudoers (optional)
3. update hostname
4. set public interface IP
5. disable ipv6 (optional)
6. disable daily apt upgrade (optional)
7. install docker

Ansible playbook example to configue all of them.

# filename: hosts
ceph-mon-1 hostname=ceph-mon-1 netplan_config_ip=192.168.1.111/24
ceph-mon-2 hostname=ceph-mon-2 netplan_config_ip=192.168.1.112/24
ceph-mon-3 hostname=ceph-mon-3 netplan_config_ip=192.168.1.113/24
ceph-osd-1 hostname=ceph-osd-1 netplan_config_ip=192.168.1.114/24 netplan_config_if=enp9s0
ceph-osd-2 hostname=ceph-osd-2 netplan_config_ip=192.168.1.115/24 netplan_config_if=enp9s0
ceph-osd-3 hostname=ceph-osd-3 netplan_config_ip=192.168.1.116/24 netplan_config_if=enp9s0


[all:vars]
netplan_config_if = "enp7s0"
netplan_config_file = "/etc/netplan/10-ansible-config.yaml"
# filename: pre-configure.yml
- name: Setup cluster environments
  hosts: all
  become: yes
  vars:
    tz: Australia/Sydney

    sudoers:
      - ubuntu

    root_password: "ubuntu"

    netplan_file: "{{ netplan_config_file }}"

    netplan_if: "{{ netplan_config_if }}"

    netplan_ip: "{{ netplan_config_ip }}"

    netplan_data: |
      network:
        version: 2
        ethernets:
          {{ netplan_if }}:
            dhcp4: false
            dhcp6: false
            dhcp-identifier: mac
            link-local: []
            addresses: [{{ netplan_ip }}]

    ssh_user: root

    ssh_key: |
      ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDIAJq4d9eXn00ZJKqqV8WhxPA59ufmCHzMtAWPvE7h4ofuB712AE0lXcWRJ8xysU3nqkZdCKM84m09wBYJG9Bc195T77fKBlHq6q8y0n4ErB6FW0Bwy7P4rpYlDQ+2a7P2SGy+OWDU05+L1fO1YcZpTAnMuzjjYPSv/ohBTJ+MU7AhsBXoYLWlkjV1CsBHCJ/VuFrAfCnj/LWiT+QyNcshdB08JUsLWcoxLmIwde2s0FPYqPSodIvEkmbQml59BndsCPBrnvN+iLBcR4b0Wk+J9bQkqSspGTnye0ald+nlJPupzwvDfFbL2lyThjU4U+Us6UEm3ptejZBlm4/DEKcFu5RAt0IEtSWoJDhSY96FNOrjo6sMDFdTZeFpwMgOkCh6ZDAYTRjamOdScvH5Sq/7doKsuTjdyb1utIGcKvVUB6NrWJq691o42Q49CCw6sHVUdC8PDrWsg4Z+1nAfA1R1QjgnccKdi3hAomUNapzxYstLjf/sG1AqByeHDoOTuS8= ubuntu@nuc112

  tasks:
    - name: set hostname
      ansible.builtin.hostname:
        name: "{{ hostname }}"

    - name: set timezone "{{ tz }}"
      community.general.timezone:
        name: "{{ tz }}"

    - name: set no password sudo
      ansible.builtin.lineinfile:
        path: /etc/sudoers
        line: "{{ item }} ALL=(ALL) NOPASSWD: ALL"
      loop: "{{ sudoers }}"

    # PermitRootLogin in /etc/ssh/sshd_config has to be yes in order to ssh in with root
    - name: update root password
      ansible.builtin.user:
        name: root
        password: "{{ root_password | password_hash('sha512') }}"

    # Configure 2nd Interface IP
    - name: print netplan config file
      ansible.builtin.debug:
        msg: "{{ netplan_file }}"

    - name: print netplan config data
      ansible.builtin.debug:
        msg: "{{ netplan_data }}"

    - name: write netplan config data
      ansible.builtin.copy:
        content: "{{ netplan_data }}"
        dest: "{{ netplan_file }}"
        owner: root
        group: root
        mode: '0644'
    
    - name: apply netplan config
      ansible.builtin.shell: |
        netplan apply
    
    - name: get "{{ netplan_if }}" address info
      ansible.builtin.shell: |
        ip -4 addr show {{ netplan_if }}
      register: ip_addr_show_out
        
    - name: show "{{ netplan_if }}" address info
      ansible.builtin.debug:
        msg: "{{ ip_addr_show_out.stdout_lines }}"

    # install docker
    - name: install docker
      ansible.builtin.apt:
        name: docker.io
        state: present
        update_cache: yes

    # disable IPv6
    - name: disable ipv6
      ansible.builtin.shell: |
        sysctl -w net.ipv6.conf.all.disable_ipv6=1
        sysctl -w net.ipv6.conf.default.disable_ipv6=1
        sysctl -w net.ipv6.conf.lo.disable_ipv6=1
        sysctl -p
      register: sysctl_out

    - name: print sysctl output
      ansible.builtin.debug:
        msg: "{{ sysctl_out.stdout }}"
    
    - name: add "net.ipv6.conf.all.disable_ipv6 = 1" in /etc/sysctl.conf
      ansible.builtin.lineinfile:
        path: /etc/sysctl.conf
        line: "{{ item }}"
      loop:
        - "net.ipv6.conf.default.disable_ipv6 = 1"
        - "net.ipv6.conf.all.disable_ipv6 = 1"

    - name: disable apt daily timer
      ansible.builtin.systemd:
        name: apt-daily.timer
        state: stopped
        enabled: no

    - name: disable apt daily upgrade timer
      ansible.builtin.systemd:
        name: apt-daily-upgrade.timer
        state: stopped
        enabled: no

    - name: print key
      ansible.builtin.debug:
        msg: "{{ ssh_key }}"

    - name: add ssh key to root
      ansible.builtin.lineinfile:
        path: /root/.ssh/authorized_keys
        line: "{{ ssh_key }}"
      when: ssh_user == 'root'

    - name: add ssh key to non root user
      ansible.builtin.lineinfile:
        path: "/home/{{ ssh_user }}/.ssh/authorized_keys"
        line: "{{ ssh_key }}"
      when: ssh_user != 'root'

Install community general collection

$ ansible-galaxy collection install community general

Run the playbook.

$ ansible-playbook -K -i ./hosts pre-configure.yml

Verify ssh with root account, for example:

$ ssh root@ans-ceph-mon1 "hostname; date; ip addr show enp7s0"
ans-ceph-mon1
Thu Dec 29 11:21:40 PM AEDT 2022
3: enp7s0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
    link/ether 52:54:00:60:26:dc brd ff:ff:ff:ff:ff:ff
    inet 192.168.1.161/24 brd 192.168.1.255 scope global enp7s0
       valid_lft forever preferred_lft forever

Setup

  1. git clone ceph-ansible

    ​​​​ubuntu@ans-ceph-deploy:~$ git clone https://github.com/ceph/ceph-ansible.git
    ​​​​
    ​​​​ubuntu@ans-ceph-deploy:~$ cd ceph-ansible/
    ​​​​ubuntu@ans-ceph-deploy:~/ceph-ansible$ git checkout stable-7.0
    ​​​​Branch 'stable-7.0' set up to track remote branch 'stable-7.0' from 'origin'.
    ​​​​Switched to a new branch 'stable-7.0'
    
  2. install requirements

    ​​​​ubuntu@ans-ceph-deploy:~/ceph-ansible$ pip install -r ./requirements.txt
    ​​​​
    ​​​​ubuntu@ans-ceph-deploy:~/ceph-ansible$ export PATH=$PATH:/home/ubuntu/.local/bin
    
    ​​​​ubuntu@ans-ceph-deploy:~/ceph-ansible$ ansible --version
    ​​​​[WARNING]: log file at /home/ubuntu/ansible/ansible.log is not writeable and we cannot create it, aborting
    
    ​​​​ansible [core 2.12.10]
    ​​​​  config file = /home/ubuntu/ceph-ansible/ansible.cfg
    ​​​​  configured module search path = ['/home/ubuntu/ceph-ansible/library']
    ​​​​  ansible python module location = /home/ubuntu/.local/lib/python3.10/site-packages/ansible
    ​​​​  ansible collection location = /home/ubuntu/.ansible/collections:/usr/share/ansible/collections
    ​​​​  executable location = /home/ubuntu/.local/bin/ansible
    ​​​​  python version = 3.10.6 (main, Nov 14 2022, 16:10:14) [GCC 11.3.0]
    ​​​​  jinja version = 3.0.3
    ​​​​  libyaml = True
    
    ​​​​ubuntu@ans-ceph-deploy:~/ceph-ansible$ ansible-galaxy install -r requirements.yml
    
  3. create configuration
    inventory file

    ​​​​ubuntu@ans-ceph-deploy:~/ceph-ansible$ vim hosts
    
    ​​​​[mons]
    ​​​​ans-ceph-mon1
    ​​​​ans-ceph-mon2
    ​​​​ans-ceph-mon3
    
    ​​​​[osds]
    ​​​​ans-ceph-osd1
    ​​​​ans-ceph-osd2
    ​​​​ans-ceph-osd3
    
    ​​​​[mgrs]
    ​​​​ans-ceph-mon1
    
    ​​​​[rgws]
    ​​​​ans-ceph-mon2
    
    ​​​​[nfss]
    ​​​​ans-ceph-mon2
    
    ​​​​[monitoring]
    ​​​​ans-ceph-mon3
    

    ansible variables

    ​​​​ubuntu@ans-ceph-deploy:~/ceph-ansible/group_vars$ vim all.yml
    ​​​​
    ​​​​ceph_origin: distro
    ​​​​ceph_repository: uca
    ​​​​monitor_interface: enp7s0
    ​​​​public_network: "192.168.1.0/24"
    ​​​​cluster_network: "192.168.122.0/24"
    ​​​​radosgw_interface: enp7s0
    ​​​​dashboard_admin_password: cephadmin
    ​​​​grafana_admin_password: cephadmin
    ​​​​osd_auto_discovery: true
    

    site.yml

    ​​​​ubuntu@ans-ceph-deploy:~/ceph-ansible$ cp ./site.yml.sample ./site.yml
    
  4. run playbook

    ​​​​ubuntu@ans-ceph-deploy:~/ceph-ansible$ ansible-playbook -i hosts site.yml
    
  5. validation

    ​​​​root@ans-ceph-mon1:~# ceph -s
    ​​​​  cluster:
    ​​​​    id:     16260601-977d-43be-8deb-d6e1ca2b4e6d
    ​​​​    health: HEALTH_OK
    
    ​​​​  services:
    ​​​​    mon: 3 daemons, quorum ans-ceph-mon1,ans-ceph-mon2,ans-ceph-mon3 (age 52m)
    ​​​​    mgr: ans-ceph-mon1(active, since 7m)
    ​​​​    osd: 6 osds: 6 up (since 33m), 6 in (since 48m)
    ​​​​    rgw: 1 daemon active (1 hosts, 1 zones)
    
    ​​​​  data:
    ​​​​    pools:   5 pools, 129 pgs
    ​​​​    objects: 195 objects, 454 KiB
    ​​​​    usage:   181 MiB used, 192 GiB / 192 GiB avail
    ​​​​    pgs:     129 active+clean
    
    

Todo

  • Adding OSD
  • Shrinking OSD
  • Purging cluster

Troubleshooting

Random VM crash in Kernel version "5.15.0-56-generic" on "NUC11ATBC4" NUC.

It seems like a bug on the kernel 5.15.0, disable apicv on the KVM host stops VM crashing

root@nuc112:~# cat /etc/modprobe.d/kvm.conf 
options kvm_intel enable_apicv=0

# reboot

root@nuc112:~# cat /sys/module/kvm_intel/parameters/enable_apicv
N
"mons are allowing insecure global_id reclaim"
root@ans-ceph-mon1:~# ceph -s
  cluster:
    id:     16260601-977d-43be-8deb-d6e1ca2b4e6d
    health: HEALTH_WARN
            mons are allowing insecure global_id reclaim
 
  services:
    mon: 3 daemons, quorum ans-ceph-mon1,ans-ceph-mon2,ans-ceph-mon3 (age 51m)
    mgr: ans-ceph-mon1(active, since 5m)
    osd: 6 osds: 6 up (since 32m), 6 in (since 47m)
    rgw: 1 daemon active (1 hosts, 1 zones)
 
  data:
    pools:   5 pools, 129 pgs
    objects: 195 objects, 454 KiB
    usage:   181 MiB used, 192 GiB / 192 GiB avail
    pgs:     129 active+clean
$ ceph config set mon mon_warn_on_insecure_global_id_reclaim false
$ ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false
"xxxx modules have recently crashed"
root@ceph-mon-1:~# ceph -s
  cluster:
    id:     08b5dcee-7f8a-11ed-91f0-1f83fb5d8f19
    health: HEALTH_WARN
            1 mgr modules have recently crashed
 
  services:
    mon: 3 daemons, quorum ceph-mon-1,ceph-mon-2,ceph-mon-3 (age 2d)
    mgr: ceph-mon-1.pxmquf(active, since 2d), standbys: ceph-mon-2.nfchvp
    osd: 6 osds: 6 up (since 2d), 6 in (since 2d)
 
  data:
    pools:   1 pools, 1 pgs
    objects: 2 objects, 449 KiB
    usage:   44 MiB used, 384 GiB / 384 GiB avail
    pgs:     1 active+clean
root@ceph-mon-1:~# ceph crash ls
ID                                                                ENTITY                 NEW  
2022-12-21T16:59:32.230077Z_50ed0f87-452c-4c8a-a5ce-01253b8fe38f  mgr.ceph-mon-1.pxmquf   * 

root@ceph-mon-1:~# ceph crash info 2022-12-21T16:59:32.230077Z_50ed0f87-452c-4c8a-a5ce-01253b8fe38f
{
    "backtrace": [
        "  File \"/usr/share/ceph/mgr/rbd_support/module.py\", line 78, in __init__\n    self.rados.wait_for_latest_osdmap()",
        "  File \"/usr/share/ceph/mgr/mgr_module.py\", line 2127, in rados\n    self._rados.connect()",
        "  File \"rados.pyx\", line 680, in rados.Rados.connect",
        "rados.PermissionError: [errno 1] RADOS permission error (error connecting to the cluster)"
    ],
    "ceph_version": "17.2.5",
    "crash_id": "2022-12-21T16:59:32.230077Z_50ed0f87-452c-4c8a-a5ce-01253b8fe38f",
    "entity_name": "mgr.ceph-mon-1.pxmquf",
    "mgr_module": "rbd_support",
    "mgr_module_caller": "ActivePyModule::load",
    "mgr_python_exception": "PermissionError",
    "os_id": "centos",
    "os_name": "CentOS Stream",
    "os_version": "8",
    "os_version_id": "8",
    "process_name": "ceph-mgr",
    "stack_sig": "4d54e7e2bb4461b4d7440a6a8574e5d2e08024d8a205212857d4cd70b395552e",
    "timestamp": "2022-12-21T16:59:32.230077Z",
    "utsname_hostname": "ceph-mon-1",
    "utsname_machine": "x86_64",
    "utsname_release": "5.15.0-56-generic",
    "utsname_sysname": "Linux",
    "utsname_version": "#62-Ubuntu SMP Tue Nov 22 19:54:14 UTC 2022"
}

root@ceph-mon-1:~# ceph crash archive 2022-12-21T16:59:32.230077Z_50ed0f87-452c-4c8a-a5ce-01253b8fe38f

OR run "ceph crash archive-all"
"x hosts fail cephadm check"
ubuntu@node1:~$ sudo su -
root@node1:~# ceph -s
  cluster:
    id:     368f01ea-8066-11ed-a64e-7dd65abbebdb
    health: HEALTH_WARN
            2 hosts fail cephadm check
 
  services:
    mon: 3 daemons, quorum node1,node3,node2 (age 2d)
    mgr: node3.mkzbpo(active, since 2d), standbys: node1.suwbve, node2.hqoqql
    osd: 9 osds: 9 up (since 2d), 9 in (since 5d)
 
  data:
    pools:   1 pools, 1 pgs
    objects: 0 objects, 0 B
    usage:   9.1 GiB used, 279 GiB / 288 GiB avail
    pgs:     1 active+clean
root@node1:~# ceph orch host ls
HOST   ADDR   LABELS          STATUS   
node1  node1  _admin mon osd  Offline  
node2  node2  mon osd         Offline  
node3  node3  mon osd   

root@node1:~# ceph orch host set-addr node1 192.168.1.151
Updated host 'node1' addr to '192.168.1.151'
root@node1:~# ceph orch host set-addr node2 192.168.1.152
Updated host 'node2' addr to '192.168.1.152'

root@node1:~# ceph orch host ls
HOST   ADDR           LABELS          STATUS  
node1  192.168.1.151  _admin mon osd          
node2  192.168.1.152  mon osd                 
node3  node3          mon osd

root@node1:~# ceph orch host set-addr node3 192.168.1.153
Updated host 'node3' addr to '192.168.1.153'
root@node1:~# ceph orch host ls
HOST   ADDR           LABELS          STATUS  
node1  192.168.1.151  _admin mon osd          
node2  192.168.1.152  mon osd                 
node3  192.168.1.153  mon osd 
low on available space (MON_DISK_LOW)
12/29/22 3:20:00 PM [WRN] overall HEALTH_WARN mon ceph-mon-1 is low on available space
12/29/22 3:14:17 PM [WRN] Health check failed: mon ceph-mon-1 is low on available space (MON_DISK_LOW)
root@ceph-mon-1:/var# df
Filesystem                        1K-blocks    Used Available Use% Mounted on
tmpfs                                401816    1420    400396   1% /run
/dev/mapper/ubuntu--vg-ubuntu--lv  15371208 9823696   4744904  68% /
tmpfs                               2009068       0   2009068   0% /dev/shm
tmpfs                                  5120       0      5120   0% /run/lock
/dev/vda2                           1992552  129652   1741660   7% /boot
overlay                            15371208 9823696   4744904  68% /var/lib/docker/overlay2/6dedb77046f16294df607bbb9cf418932221571afc11b42cc34208e86f15feb6/merged
overlay                            15371208 9823696   4744904  68% /var/lib/docker/overlay2/63a26aeb40448e16559a5c71780af6fdb02ad12153824c694cb2107dc819fe3e/merged
overlay                            15371208 9823696   4744904  68% /var/lib/docker/overlay2/6e50e23811b9299d6d391ec60c31b7dc9483fd7a0f78801aab9a4e4fa822f944/merged
tmpfs                                401812       4    401808   1% /run/user/0
tmpfs                                401812       4    401808   1% /run/user/1000

root@ceph-mon-1:/var# lsblk 
NAME                      MAJ:MIN RM  SIZE RO TYPE MOUNTPOINTS
loop1                       7:1    0 63.2M  1 loop /snap/core20/1738
loop2                       7:2    0 79.9M  1 loop /snap/lxd/22923
loop3                       7:3    0  103M  1 loop /snap/lxd/23541
loop4                       7:4    0 49.6M  1 loop /snap/snapd/17883
loop5                       7:5    0 63.3M  1 loop /snap/core20/1778
vda                       252:0    0   32G  0 disk 
├─vda1                    252:1    0    1M  0 part 
├─vda2                    252:2    0    2G  0 part /boot
└─vda3                    252:3    0   30G  0 part 
  └─ubuntu--vg-ubuntu--lv 253:0    0   15G  0 lvm  /

root@ceph-mon-1:/var/log# vgdisplay 
  --- Volume group ---
  VG Name               ubuntu-vg
  System ID             
  Format                lvm2
  Metadata Areas        1
  Metadata Sequence No  2
  VG Access             read/write
  VG Status             resizable
  MAX LV                0
  Cur LV                1
  Open LV               1
  Max PV                0
  Cur PV                1
  Act PV                1
  VG Size               <30.00 GiB
  PE Size               4.00 MiB
  Total PE              7679
  Alloc PE / Size       3839 / <15.00 GiB
  Free  PE / Size       3840 / 15.00 GiB
  VG UUID               NN4fei-GB61-Hs5R-U9oP-YcHp-5GUr-nRG4mU
  
root@ceph-mon-1:/var/log# lvextend -L +5G /dev/mapper/ubuntu--vg-ubuntu--lv
  Size of logical volume ubuntu-vg/ubuntu-lv changed from <15.00 GiB (3839 extents) to <20.00 GiB (5119 extents).
  Logical volume ubuntu-vg/ubuntu-lv successfully resized.
  

root@ceph-mon-1:/var/log# resize2fs /dev/mapper/ubuntu--vg-ubuntu--lv
resize2fs 1.46.5 (30-Dec-2021)
Filesystem at /dev/mapper/ubuntu--vg-ubuntu--lv is mounted on /; on-line resizing required
old_desc_blocks = 2, new_desc_blocks = 3
The filesystem on /dev/mapper/ubuntu--vg-ubuntu--lv is now 5241856 (4k) blocks long.


root@ceph-mon-1:/var/log# df -h
Filesystem                         Size  Used Avail Use% Mounted on
tmpfs                              393M  1.4M  392M   1% /run
/dev/mapper/ubuntu--vg-ubuntu--lv   20G  9.4G  9.3G  51% /
tmpfs                              2.0G     0  2.0G   0% /dev/shm
tmpfs                              5.0M     0  5.0M   0% /run/lock
/dev/vda2                          2.0G  127M  1.7G   7% /boot
overlay                             20G  9.4G  9.3G  51% /var/lib/docker/overlay2/6dedb77046f16294df607bbb9cf418932221571afc11b42cc34208e86f15feb6/merged
overlay                             20G  9.4G  9.3G  51% /var/lib/docker/overlay2/63a26aeb40448e16559a5c71780af6fdb02ad12153824c694cb2107dc819fe3e/merged
overlay                             20G  9.4G  9.3G  51% /var/lib/docker/overlay2/6e50e23811b9299d6d391ec60c31b7dc9483fd7a0f78801aab9a4e4fa822f944/merged
tmpfs                              393M  4.0K  393M   1% /run/user/0
tmpfs                              393M  4.0K  393M   1% /run/user/1000
Ansible task fail: [ceph-osd : use ceph-volume lvm batch to create bluestore osds]

Ansible returns a task failed on first attempt. Root cause is not clear yet, but re-run playbook might get the task success.

TASK [ceph-osd : use ceph-volume lvm batch to create bluestore osds] *********************************************************************************************************************************************************************
Saturday 31 December 2022  05:32:20 +0000 (0:00:00.234)       0:11:11.809 ***** 
fatal: [ans-ceph-osd2]: FAILED! => changed=true 
  cmd:
  - ceph-volume
  - --cluster
  - ceph
  - lvm
  - batch
  - --bluestore
  - --yes
  - /dev/vdb
  - /dev/vdc
  delta: '0:00:16.546066'
  end: '2022-12-31 16:32:38.137330'
  msg: non-zero return code
  rc: 1
  start: '2022-12-31 16:32:21.591264'
  stderr: |-
    --> DEPRECATION NOTICE
    --> You are using the legacy automatic disk sorting behavior
    --> The Pacific release will change the default to --no-auto
    --> passed data devices: 2 physical, 0 LVM
    --> relative data size: 1.0
    Running command: /usr/bin/ceph-authtool --gen-print-key
    Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring -i - osd new a85e5dda-50f4-4722-bba4-2c255997677f
    Running command: vgcreate --force --yes ceph-9ec2bfc6-c620-4282-a667-ed436a078c4f /dev/vdb
     stdout: Physical volume "/dev/vdb" successfully created.
     stdout: Volume group "ceph-9ec2bfc6-c620-4282-a667-ed436a078c4f" successfully created
    Running command: lvcreate --yes -l 8191 -n osd-block-a85e5dda-50f4-4722-bba4-2c255997677f ceph-9ec2bfc6-c620-4282-a667-ed436a078c4f
     stdout: Logical volume "osd-block-a85e5dda-50f4-4722-bba4-2c255997677f" created.
    Running command: /usr/bin/ceph-authtool --gen-print-key
    Running command: /usr/bin/mount -t tmpfs tmpfs /var/lib/ceph/osd/ceph-0
    --> Executable selinuxenabled not in PATH: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin
    Running command: /usr/bin/chown -h ceph:ceph /dev/ceph-9ec2bfc6-c620-4282-a667-ed436a078c4f/osd-block-a85e5dda-50f4-4722-bba4-2c255997677f
    Running command: /usr/bin/chown -R ceph:ceph /dev/dm-1
    Running command: /usr/bin/ln -s /dev/ceph-9ec2bfc6-c620-4282-a667-ed436a078c4f/osd-block-a85e5dda-50f4-4722-bba4-2c255997677f /var/lib/ceph/osd/ceph-0/block
    Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring mon getmap -o /var/lib/ceph/osd/ceph-0/activate.monmap
     stderr: got monmap epoch 1
    Running command: /usr/bin/ceph-authtool /var/lib/ceph/osd/ceph-0/keyring --create-keyring --name osd.0 --add-key ********
     stdout: creating /var/lib/ceph/osd/ceph-0/keyring
     stdout: added entity osd.0 auth(key=********)
    Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-0/keyring
    Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-0/
    Running command: /usr/bin/ceph-osd --cluster ceph --osd-objectstore bluestore --mkfs -i 0 --monmap /var/lib/ceph/osd/ceph-0/activate.monmap --keyfile - --osd-data /var/lib/ceph/osd/ceph-0/ --osd-uuid a85e5dda-50f4-4722-bba4-2c255997677f --setuser ceph --setgroup ceph
     stderr: 2022-12-31T16:32:25.748+1100 7f70f7d365c0 -1 bluestore(/var/lib/ceph/osd/ceph-0/) _read_fsid unparsable uuid
    --> ceph-volume lvm prepare successful for: /dev/vdb
    Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-0
    Running command: /usr/bin/ceph-bluestore-tool --cluster=ceph prime-osd-dir --dev /dev/ceph-9ec2bfc6-c620-4282-a667-ed436a078c4f/osd-block-a85e5dda-50f4-4722-bba4-2c255997677f --path /var/lib/ceph/osd/ceph-0 --no-mon-config
    Running command: /usr/bin/ln -snf /dev/ceph-9ec2bfc6-c620-4282-a667-ed436a078c4f/osd-block-a85e5dda-50f4-4722-bba4-2c255997677f /var/lib/ceph/osd/ceph-0/block
    Running command: /usr/bin/chown -h ceph:ceph /var/lib/ceph/osd/ceph-0/block
    Running command: /usr/bin/chown -R ceph:ceph /dev/dm-1
    Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-0
    Running command: /usr/bin/systemctl enable ceph-volume@lvm-0-a85e5dda-50f4-4722-bba4-2c255997677f
     stderr: Created symlink /etc/systemd/system/multi-user.target.wants/ceph-volume@lvm-0-a85e5dda-50f4-4722-bba4-2c255997677f.service → /lib/systemd/system/ceph-volume@.service.
    Running command: /usr/bin/systemctl enable --runtime ceph-osd@0
     stderr: Created symlink /run/systemd/system/ceph-osd.target.wants/ceph-osd@0.service → /lib/systemd/system/ceph-osd@.service.
    Running command: /usr/bin/systemctl start ceph-osd@0
    --> ceph-volume lvm activate successful for osd ID: 0
    --> ceph-volume lvm create successful for: /dev/vdb
    Running command: /usr/bin/ceph-authtool --gen-print-key
    Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring -i - osd new 16c9a037-dea3-49f1-adcb-4e4b79432605
    Running command: vgcreate --force --yes ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89 /dev/vdc
     stdout: Physical volume "/dev/vdc" successfully created.
     stdout: Volume group "ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89" successfully created
    Running command: lvcreate --yes -l 8191 -n osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605 ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89
     stdout: Logical volume "osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605" created.
    Running command: /usr/bin/ceph-authtool --gen-print-key
    Running command: /usr/bin/mount -t tmpfs tmpfs /var/lib/ceph/osd/ceph-4
    --> Executable selinuxenabled not in PATH: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin
    Running command: /usr/bin/chown -h ceph:ceph /dev/ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89/osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605
    Running command: /usr/bin/chown -R ceph:ceph /dev/dm-2
    Running command: /usr/bin/ln -s /dev/ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89/osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605 /var/lib/ceph/osd/ceph-4/block
    Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring mon getmap -o /var/lib/ceph/osd/ceph-4/activate.monmap
     stderr: got monmap epoch 1
    Running command: /usr/bin/ceph-authtool /var/lib/ceph/osd/ceph-4/keyring --create-keyring --name osd.4 --add-key ********
     stdout: creating /var/lib/ceph/osd/ceph-4/keyring
     stdout: added entity osd.4 auth(key=********)
    Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-4/keyring
    Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-4/
    Running command: /usr/bin/ceph-osd --cluster ceph --osd-objectstore bluestore --mkfs -i 4 --monmap /var/lib/ceph/osd/ceph-4/activate.monmap --keyfile - --osd-data /var/lib/ceph/osd/ceph-4/ --osd-uuid 16c9a037-dea3-49f1-adcb-4e4b79432605 --setuser ceph --setgroup ceph
     stderr: 2022-12-31T16:32:33.592+1100 7f7e8922f5c0 -1 bluestore(/var/lib/ceph/osd/ceph-4/) _read_fsid unparsable uuid
     stderr: 2022-12-31T16:32:33.644+1100 7f7e8922f5c0 -1 bluefs _replay 0x0: stop: uuid 00000000-0000-0000-0000-000000000000 != super.uuid 722affc6-13a4-4549-93f3-f324e7eeec1d, block dump:
     stderr: 00000000  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
     stderr: *
     stderr: 00000ff0  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
     stderr: 00001000
     stderr: 2022-12-31T16:32:34.904+1100 7f7e8922f5c0 -1 rocksdb: verify_sharding unable to list column families: NotFound:
     stderr: 2022-12-31T16:32:34.904+1100 7f7e8922f5c0 -1 bluestore(/var/lib/ceph/osd/ceph-4/) _open_db erroring opening db:
     stderr: 2022-12-31T16:32:35.428+1100 7f7e8922f5c0 -1 OSD::mkfs: ObjectStore::mkfs failed with error (5) Input/output error
     stderr: 2022-12-31T16:32:35.428+1100 7f7e8922f5c0 -1 [0;31m ** ERROR: error creating empty object store in /var/lib/ceph/osd/ceph-4/: (5) Input/output error[0m
    --> Was unable to complete a new OSD, will rollback changes
    Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring osd purge-new osd.4 --yes-i-really-mean-it
     stderr: purged osd.4
    --> Zapping: /dev/ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89/osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605
    --> Unmounting /var/lib/ceph/osd/ceph-4
    Running command: /usr/bin/umount -v /var/lib/ceph/osd/ceph-4
     stderr: umount:
     stderr: /var/lib/ceph/osd/ceph-4 unmounted
     stderr:
    Running command: /usr/bin/dd if=/dev/zero of=/dev/ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89/osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605 bs=1M count=10 conv=fsync
     stderr: 10+0 records in
    10+0 records out
     stderr: 10485760 bytes (10 MB, 10 MiB) copied, 0.104136 s, 101 MB/s
    --> Only 1 LV left in VG, will proceed to destroy volume group ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89
    Running command: vgremove -v -f ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89
     stderr:
     stderr: Removing ceph--07864d4e--937a--4bd2--b80c--c68ce9a36b89-osd--block--16c9a037--dea3--49f1--adcb--4e4b79432605 (253:2)
     stderr:
     stderr: Archiving volume group "ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89" metadata (seqno 5).
      Releasing logical volume "osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605"
     stderr: Creating volume group backup "/etc/lvm/backup/ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89" (seqno 6).
     stdout: Logical volume "osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605" successfully removed
     stderr: Removing physical volume "/dev/vdc" from volume group "ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89"
     stdout: Volume group "ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89" successfully removed
    --> Zapping successful for OSD: 4
    Traceback (most recent call last):
      File "/usr/sbin/ceph-volume", line 33, in <module>
        sys.exit(load_entry_point('ceph-volume==1.0.0', 'console_scripts', 'ceph-volume')())
      File "/usr/lib/python3/dist-packages/ceph_volume/main.py", line 41, in __init__
        self.main(self.argv)
      File "/usr/lib/python3/dist-packages/ceph_volume/decorators.py", line 59, in newfunc
        return f(*a, **kw)
      File "/usr/lib/python3/dist-packages/ceph_volume/main.py", line 153, in main
        terminal.dispatch(self.mapper, subcommand_args)
      File "/usr/lib/python3/dist-packages/ceph_volume/terminal.py", line 194, in dispatch
        instance.main()
      File "/usr/lib/python3/dist-packages/ceph_volume/devices/lvm/main.py", line 46, in main
        terminal.dispatch(self.mapper, self.argv)
      File "/usr/lib/python3/dist-packages/ceph_volume/terminal.py", line 194, in dispatch
        instance.main()
      File "/usr/lib/python3/dist-packages/ceph_volume/decorators.py", line 16, in is_root
        return func(*a, **kw)
      File "/usr/lib/python3/dist-packages/ceph_volume/devices/lvm/batch.py", line 444, in main
        self._execute(plan)
      File "/usr/lib/python3/dist-packages/ceph_volume/devices/lvm/batch.py", line 463, in _execute
        c.create(argparse.Namespace(**args))
      File "/usr/lib/python3/dist-packages/ceph_volume/decorators.py", line 16, in is_root
        return func(*a, **kw)
      File "/usr/lib/python3/dist-packages/ceph_volume/devices/lvm/create.py", line 26, in create
        prepare_step.safe_prepare(args)
      File "/usr/lib/python3/dist-packages/ceph_volume/devices/lvm/prepare.py", line 252, in safe_prepare
        self.prepare()
      File "/usr/lib/python3/dist-packages/ceph_volume/decorators.py", line 16, in is_root
        return func(*a, **kw)
      File "/usr/lib/python3/dist-packages/ceph_volume/devices/lvm/prepare.py", line 387, in prepare
        prepare_bluestore(
      File "/usr/lib/python3/dist-packages/ceph_volume/devices/lvm/prepare.py", line 115, in prepare_bluestore
        prepare_utils.osd_mkfs_bluestore(
      File "/usr/lib/python3/dist-packages/ceph_volume/util/prepare.py", line 481, in osd_mkfs_bluestore
        raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command)))
    RuntimeError: Command failed with exit code 250: /usr/bin/ceph-osd --cluster ceph --osd-objectstore bluestore --mkfs -i 4 --monmap /var/lib/ceph/osd/ceph-4/activate.monmap --keyfile - --osd-data /var/lib/ceph/osd/ceph-4/ --osd-uuid 16c9a037-dea3-49f1-adcb-4e4b79432605 --setuser ceph --setgroup ceph
  stderr_lines: <omitted>
  stdout: ''
  stdout_lines: <omitted>
Ansible task fail "[ceph-grafana : download ceph grafana dashboards]"

This could be a network issue, re-run playbook again.

TASK [ceph-grafana : download ceph grafana dashboards] ***********************************************************************************************************************************************************************************
Saturday 31 December 2022  05:55:28 +0000 (0:00:02.700)       0:13:30.257 ***** 
failed: [ans-ceph-mon3] (item=ceph-cluster.json) => changed=false 
  ansible_loop_var: item
  dest: /etc/grafana/dashboards/ceph-dashboard/ceph-cluster.json
  elapsed: 5
  item: ceph-cluster.json
  msg: 'Request failed: <urlopen error [Errno -3] Temporary failure in name resolution>'
  url: https://raw.githubusercontent.com/ceph/ceph/quincy/monitoring/ceph-mixin/dashboards_out/ceph-cluster.json

Reference: