<style>.ui-infobar, #doc.markdown-body { max-width: 1200px; }</style> # Setup Ceph storage via ceph-ansible Basic guide to setup Ceph Storage via Ceph-Ansible. ###### tags: `ceph` #### Environment *Ansible Control Node OS*: Ubuntu22.04 *Ceph Node OS*: Ubuntu22.04 (vda) *Username*: ubuntu *Nodes*: ``` ans-ceph-deploy (ansible control node) ans-ceph-mon1 | 2vCPU, 4G RAM ans-ceph-mon2 | 2vCPU, 4G RAM ans-ceph-mon3 | 2vCPU, 4G RAM ans-ceph-osd1 | 2vCPU, 4G RAM, vdb(32GB), vdc(32GB) ans-ceph-osd2 | 2vCPU, 4G RAM, vdb(32GB), vdc(32GB) ans-ceph-osd3 | 2vCPU, 4G RAM, vdb(32GB), vdc(32GB) ``` *Networks*: ``` cluster: 192.168.122.0/24 public: 192.168.1.0/24 ``` */etc/hosts* on ansible control node ``` 192.168.122.61 ans-ceph-mon1 192.168.122.62 ans-ceph-mon2 192.168.122.63 ans-ceph-mon3 192.168.122.64 ans-ceph-osd1 192.168.122.65 ans-ceph-osd2 192.168.122.66 ans-ceph-osd3 ``` *Ansible version*: ``` $ ansible --version ansible 2.10.17 config file = None configured module search path = ['/home/ubuntu/.ansible/plugins/modules', '/usr/share/ansible/plugins/modules'] ansible python module location = /home/ubuntu/.local/lib/python3.10/site-packages/ansible executable location = /home/ubuntu/.local/bin/ansible python version = 3.10.6 (main, Nov 14 2022, 16:10:14) [GCC 11.3.0] ``` #### Pre-configuration Configure following items: *1. update timzone* *2. update sudoers* (optional) *3. update hostname* *4. set public interface IP* *5. disable ipv6* (optional) *6. disable daily apt upgrade* (optional) *7. install docker* Ansible playbook example to configue all of them. ``` # filename: hosts ceph-mon-1 hostname=ceph-mon-1 netplan_config_ip=192.168.1.111/24 ceph-mon-2 hostname=ceph-mon-2 netplan_config_ip=192.168.1.112/24 ceph-mon-3 hostname=ceph-mon-3 netplan_config_ip=192.168.1.113/24 ceph-osd-1 hostname=ceph-osd-1 netplan_config_ip=192.168.1.114/24 netplan_config_if=enp9s0 ceph-osd-2 hostname=ceph-osd-2 netplan_config_ip=192.168.1.115/24 netplan_config_if=enp9s0 ceph-osd-3 hostname=ceph-osd-3 netplan_config_ip=192.168.1.116/24 netplan_config_if=enp9s0 [all:vars] netplan_config_if = "enp7s0" netplan_config_file = "/etc/netplan/10-ansible-config.yaml" ``` ``` # filename: pre-configure.yml - name: Setup cluster environments hosts: all become: yes vars: tz: Australia/Sydney sudoers: - ubuntu root_password: "ubuntu" netplan_file: "{{ netplan_config_file }}" netplan_if: "{{ netplan_config_if }}" netplan_ip: "{{ netplan_config_ip }}" netplan_data: | network: version: 2 ethernets: {{ netplan_if }}: dhcp4: false dhcp6: false dhcp-identifier: mac link-local: [] addresses: [{{ netplan_ip }}] ssh_user: root ssh_key: | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDIAJq4d9eXn00ZJKqqV8WhxPA59ufmCHzMtAWPvE7h4ofuB712AE0lXcWRJ8xysU3nqkZdCKM84m09wBYJG9Bc195T77fKBlHq6q8y0n4ErB6FW0Bwy7P4rpYlDQ+2a7P2SGy+OWDU05+L1fO1YcZpTAnMuzjjYPSv/ohBTJ+MU7AhsBXoYLWlkjV1CsBHCJ/VuFrAfCnj/LWiT+QyNcshdB08JUsLWcoxLmIwde2s0FPYqPSodIvEkmbQml59BndsCPBrnvN+iLBcR4b0Wk+J9bQkqSspGTnye0ald+nlJPupzwvDfFbL2lyThjU4U+Us6UEm3ptejZBlm4/DEKcFu5RAt0IEtSWoJDhSY96FNOrjo6sMDFdTZeFpwMgOkCh6ZDAYTRjamOdScvH5Sq/7doKsuTjdyb1utIGcKvVUB6NrWJq691o42Q49CCw6sHVUdC8PDrWsg4Z+1nAfA1R1QjgnccKdi3hAomUNapzxYstLjf/sG1AqByeHDoOTuS8= ubuntu@nuc112 tasks: - name: set hostname ansible.builtin.hostname: name: "{{ hostname }}" - name: set timezone "{{ tz }}" community.general.timezone: name: "{{ tz }}" - name: set no password sudo ansible.builtin.lineinfile: path: /etc/sudoers line: "{{ item }} ALL=(ALL) NOPASSWD: ALL" loop: "{{ sudoers }}" # PermitRootLogin in /etc/ssh/sshd_config has to be yes in order to ssh in with root - name: update root password ansible.builtin.user: name: root password: "{{ root_password | password_hash('sha512') }}" # Configure 2nd Interface IP - name: print netplan config file ansible.builtin.debug: msg: "{{ netplan_file }}" - name: print netplan config data ansible.builtin.debug: msg: "{{ netplan_data }}" - name: write netplan config data ansible.builtin.copy: content: "{{ netplan_data }}" dest: "{{ netplan_file }}" owner: root group: root mode: '0644' - name: apply netplan config ansible.builtin.shell: | netplan apply - name: get "{{ netplan_if }}" address info ansible.builtin.shell: | ip -4 addr show {{ netplan_if }} register: ip_addr_show_out - name: show "{{ netplan_if }}" address info ansible.builtin.debug: msg: "{{ ip_addr_show_out.stdout_lines }}" # install docker - name: install docker ansible.builtin.apt: name: docker.io state: present update_cache: yes # disable IPv6 - name: disable ipv6 ansible.builtin.shell: | sysctl -w net.ipv6.conf.all.disable_ipv6=1 sysctl -w net.ipv6.conf.default.disable_ipv6=1 sysctl -w net.ipv6.conf.lo.disable_ipv6=1 sysctl -p register: sysctl_out - name: print sysctl output ansible.builtin.debug: msg: "{{ sysctl_out.stdout }}" - name: add "net.ipv6.conf.all.disable_ipv6 = 1" in /etc/sysctl.conf ansible.builtin.lineinfile: path: /etc/sysctl.conf line: "{{ item }}" loop: - "net.ipv6.conf.default.disable_ipv6 = 1" - "net.ipv6.conf.all.disable_ipv6 = 1" - name: disable apt daily timer ansible.builtin.systemd: name: apt-daily.timer state: stopped enabled: no - name: disable apt daily upgrade timer ansible.builtin.systemd: name: apt-daily-upgrade.timer state: stopped enabled: no - name: print key ansible.builtin.debug: msg: "{{ ssh_key }}" - name: add ssh key to root ansible.builtin.lineinfile: path: /root/.ssh/authorized_keys line: "{{ ssh_key }}" when: ssh_user == 'root' - name: add ssh key to non root user ansible.builtin.lineinfile: path: "/home/{{ ssh_user }}/.ssh/authorized_keys" line: "{{ ssh_key }}" when: ssh_user != 'root' ``` Install community general collection ``` $ ansible-galaxy collection install community general ``` Run the playbook. ``` $ ansible-playbook -K -i ./hosts pre-configure.yml ``` Verify ssh with root account, for example: ``` $ ssh root@ans-ceph-mon1 "hostname; date; ip addr show enp7s0" ans-ceph-mon1 Thu Dec 29 11:21:40 PM AEDT 2022 3: enp7s0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000 link/ether 52:54:00:60:26:dc brd ff:ff:ff:ff:ff:ff inet 192.168.1.161/24 brd 192.168.1.255 scope global enp7s0 valid_lft forever preferred_lft forever ``` #### Setup 1. git clone ceph-ansible ``` ubuntu@ans-ceph-deploy:~$ git clone https://github.com/ceph/ceph-ansible.git ubuntu@ans-ceph-deploy:~$ cd ceph-ansible/ ubuntu@ans-ceph-deploy:~/ceph-ansible$ git checkout stable-7.0 Branch 'stable-7.0' set up to track remote branch 'stable-7.0' from 'origin'. Switched to a new branch 'stable-7.0' ``` 2. install requirements ``` ubuntu@ans-ceph-deploy:~/ceph-ansible$ pip install -r ./requirements.txt ubuntu@ans-ceph-deploy:~/ceph-ansible$ export PATH=$PATH:/home/ubuntu/.local/bin ubuntu@ans-ceph-deploy:~/ceph-ansible$ ansible --version [WARNING]: log file at /home/ubuntu/ansible/ansible.log is not writeable and we cannot create it, aborting ansible [core 2.12.10] config file = /home/ubuntu/ceph-ansible/ansible.cfg configured module search path = ['/home/ubuntu/ceph-ansible/library'] ansible python module location = /home/ubuntu/.local/lib/python3.10/site-packages/ansible ansible collection location = /home/ubuntu/.ansible/collections:/usr/share/ansible/collections executable location = /home/ubuntu/.local/bin/ansible python version = 3.10.6 (main, Nov 14 2022, 16:10:14) [GCC 11.3.0] jinja version = 3.0.3 libyaml = True ubuntu@ans-ceph-deploy:~/ceph-ansible$ ansible-galaxy install -r requirements.yml ``` 3. create configuration inventory file ``` ubuntu@ans-ceph-deploy:~/ceph-ansible$ vim hosts [mons] ans-ceph-mon1 ans-ceph-mon2 ans-ceph-mon3 [osds] ans-ceph-osd1 ans-ceph-osd2 ans-ceph-osd3 [mgrs] ans-ceph-mon1 [rgws] ans-ceph-mon2 [nfss] ans-ceph-mon2 [monitoring] ans-ceph-mon3 ``` ansible variables ``` ubuntu@ans-ceph-deploy:~/ceph-ansible/group_vars$ vim all.yml ceph_origin: distro ceph_repository: uca monitor_interface: enp7s0 public_network: "192.168.1.0/24" cluster_network: "192.168.122.0/24" radosgw_interface: enp7s0 dashboard_admin_password: cephadmin grafana_admin_password: cephadmin osd_auto_discovery: true ``` site.yml ``` ubuntu@ans-ceph-deploy:~/ceph-ansible$ cp ./site.yml.sample ./site.yml ``` 4. run playbook ``` ubuntu@ans-ceph-deploy:~/ceph-ansible$ ansible-playbook -i hosts site.yml ``` 5. validation ``` root@ans-ceph-mon1:~# ceph -s cluster: id: 16260601-977d-43be-8deb-d6e1ca2b4e6d health: HEALTH_OK services: mon: 3 daemons, quorum ans-ceph-mon1,ans-ceph-mon2,ans-ceph-mon3 (age 52m) mgr: ans-ceph-mon1(active, since 7m) osd: 6 osds: 6 up (since 33m), 6 in (since 48m) rgw: 1 daemon active (1 hosts, 1 zones) data: pools: 5 pools, 129 pgs objects: 195 objects, 454 KiB usage: 181 MiB used, 192 GiB / 192 GiB avail pgs: 129 active+clean ``` ![](https://i.imgur.com/idwaK8K.png) #### Todo - Adding OSD - Shrinking OSD - Purging cluster #### Troubleshooting ##### Random VM crash in Kernel version "5.15.0-56-generic" on "NUC11ATBC4" NUC. It seems like a bug on the kernel 5.15.0, disable apicv on the KVM host stops VM crashing ``` root@nuc112:~# cat /etc/modprobe.d/kvm.conf options kvm_intel enable_apicv=0 # reboot root@nuc112:~# cat /sys/module/kvm_intel/parameters/enable_apicv N ``` * https://patchwork.kernel.org/project/kvm/patch/20210713142023.106183-9-mlevitsk@redhat.com/#24319635 * https://bugzilla.kernel.org/show_bug.cgi?id=215459 ##### "mons are allowing insecure global_id reclaim" ``` root@ans-ceph-mon1:~# ceph -s cluster: id: 16260601-977d-43be-8deb-d6e1ca2b4e6d health: HEALTH_WARN mons are allowing insecure global_id reclaim services: mon: 3 daemons, quorum ans-ceph-mon1,ans-ceph-mon2,ans-ceph-mon3 (age 51m) mgr: ans-ceph-mon1(active, since 5m) osd: 6 osds: 6 up (since 32m), 6 in (since 47m) rgw: 1 daemon active (1 hosts, 1 zones) data: pools: 5 pools, 129 pgs objects: 195 objects, 454 KiB usage: 181 MiB used, 192 GiB / 192 GiB avail pgs: 129 active+clean ``` ``` $ ceph config set mon mon_warn_on_insecure_global_id_reclaim false $ ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false ``` * https://www.suse.com/support/kb/doc/?id=000019960 * https://github.com/rook/rook/issues/7746 * https://docs.ceph.com/en/latest/security/CVE-2021-20288/ ##### "xxxx modules have recently crashed" ``` root@ceph-mon-1:~# ceph -s cluster: id: 08b5dcee-7f8a-11ed-91f0-1f83fb5d8f19 health: HEALTH_WARN 1 mgr modules have recently crashed services: mon: 3 daemons, quorum ceph-mon-1,ceph-mon-2,ceph-mon-3 (age 2d) mgr: ceph-mon-1.pxmquf(active, since 2d), standbys: ceph-mon-2.nfchvp osd: 6 osds: 6 up (since 2d), 6 in (since 2d) data: pools: 1 pools, 1 pgs objects: 2 objects, 449 KiB usage: 44 MiB used, 384 GiB / 384 GiB avail pgs: 1 active+clean ``` ``` root@ceph-mon-1:~# ceph crash ls ID ENTITY NEW 2022-12-21T16:59:32.230077Z_50ed0f87-452c-4c8a-a5ce-01253b8fe38f mgr.ceph-mon-1.pxmquf * root@ceph-mon-1:~# ceph crash info 2022-12-21T16:59:32.230077Z_50ed0f87-452c-4c8a-a5ce-01253b8fe38f { "backtrace": [ " File \"/usr/share/ceph/mgr/rbd_support/module.py\", line 78, in __init__\n self.rados.wait_for_latest_osdmap()", " File \"/usr/share/ceph/mgr/mgr_module.py\", line 2127, in rados\n self._rados.connect()", " File \"rados.pyx\", line 680, in rados.Rados.connect", "rados.PermissionError: [errno 1] RADOS permission error (error connecting to the cluster)" ], "ceph_version": "17.2.5", "crash_id": "2022-12-21T16:59:32.230077Z_50ed0f87-452c-4c8a-a5ce-01253b8fe38f", "entity_name": "mgr.ceph-mon-1.pxmquf", "mgr_module": "rbd_support", "mgr_module_caller": "ActivePyModule::load", "mgr_python_exception": "PermissionError", "os_id": "centos", "os_name": "CentOS Stream", "os_version": "8", "os_version_id": "8", "process_name": "ceph-mgr", "stack_sig": "4d54e7e2bb4461b4d7440a6a8574e5d2e08024d8a205212857d4cd70b395552e", "timestamp": "2022-12-21T16:59:32.230077Z", "utsname_hostname": "ceph-mon-1", "utsname_machine": "x86_64", "utsname_release": "5.15.0-56-generic", "utsname_sysname": "Linux", "utsname_version": "#62-Ubuntu SMP Tue Nov 22 19:54:14 UTC 2022" } root@ceph-mon-1:~# ceph crash archive 2022-12-21T16:59:32.230077Z_50ed0f87-452c-4c8a-a5ce-01253b8fe38f OR run "ceph crash archive-all" ``` * https://tracker.ceph.com/issues/51629 ##### "x hosts fail cephadm check" ``` ubuntu@node1:~$ sudo su - root@node1:~# ceph -s cluster: id: 368f01ea-8066-11ed-a64e-7dd65abbebdb health: HEALTH_WARN 2 hosts fail cephadm check services: mon: 3 daemons, quorum node1,node3,node2 (age 2d) mgr: node3.mkzbpo(active, since 2d), standbys: node1.suwbve, node2.hqoqql osd: 9 osds: 9 up (since 2d), 9 in (since 5d) data: pools: 1 pools, 1 pgs objects: 0 objects, 0 B usage: 9.1 GiB used, 279 GiB / 288 GiB avail pgs: 1 active+clean ``` ``` root@node1:~# ceph orch host ls HOST ADDR LABELS STATUS node1 node1 _admin mon osd Offline node2 node2 mon osd Offline node3 node3 mon osd root@node1:~# ceph orch host set-addr node1 192.168.1.151 Updated host 'node1' addr to '192.168.1.151' root@node1:~# ceph orch host set-addr node2 192.168.1.152 Updated host 'node2' addr to '192.168.1.152' root@node1:~# ceph orch host ls HOST ADDR LABELS STATUS node1 192.168.1.151 _admin mon osd node2 192.168.1.152 mon osd node3 node3 mon osd root@node1:~# ceph orch host set-addr node3 192.168.1.153 Updated host 'node3' addr to '192.168.1.153' root@node1:~# ceph orch host ls HOST ADDR LABELS STATUS node1 192.168.1.151 _admin mon osd node2 192.168.1.152 mon osd node3 192.168.1.153 mon osd ``` * https://blog.nuvotex.de/ceph-reports-n-hosts-fail-cephadm-check/ * https://forum.proxmox.com/threads/health_warn-1-daemons-have-recently-crashed.63105/ ##### low on available space (MON_DISK_LOW) ``` 12/29/22 3:20:00 PM [WRN] overall HEALTH_WARN mon ceph-mon-1 is low on available space 12/29/22 3:14:17 PM [WRN] Health check failed: mon ceph-mon-1 is low on available space (MON_DISK_LOW) ``` ``` root@ceph-mon-1:/var# df Filesystem 1K-blocks Used Available Use% Mounted on tmpfs 401816 1420 400396 1% /run /dev/mapper/ubuntu--vg-ubuntu--lv 15371208 9823696 4744904 68% / tmpfs 2009068 0 2009068 0% /dev/shm tmpfs 5120 0 5120 0% /run/lock /dev/vda2 1992552 129652 1741660 7% /boot overlay 15371208 9823696 4744904 68% /var/lib/docker/overlay2/6dedb77046f16294df607bbb9cf418932221571afc11b42cc34208e86f15feb6/merged overlay 15371208 9823696 4744904 68% /var/lib/docker/overlay2/63a26aeb40448e16559a5c71780af6fdb02ad12153824c694cb2107dc819fe3e/merged overlay 15371208 9823696 4744904 68% /var/lib/docker/overlay2/6e50e23811b9299d6d391ec60c31b7dc9483fd7a0f78801aab9a4e4fa822f944/merged tmpfs 401812 4 401808 1% /run/user/0 tmpfs 401812 4 401808 1% /run/user/1000 root@ceph-mon-1:/var# lsblk NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS loop1 7:1 0 63.2M 1 loop /snap/core20/1738 loop2 7:2 0 79.9M 1 loop /snap/lxd/22923 loop3 7:3 0 103M 1 loop /snap/lxd/23541 loop4 7:4 0 49.6M 1 loop /snap/snapd/17883 loop5 7:5 0 63.3M 1 loop /snap/core20/1778 vda 252:0 0 32G 0 disk ├─vda1 252:1 0 1M 0 part ├─vda2 252:2 0 2G 0 part /boot └─vda3 252:3 0 30G 0 part └─ubuntu--vg-ubuntu--lv 253:0 0 15G 0 lvm / root@ceph-mon-1:/var/log# vgdisplay --- Volume group --- VG Name ubuntu-vg System ID Format lvm2 Metadata Areas 1 Metadata Sequence No 2 VG Access read/write VG Status resizable MAX LV 0 Cur LV 1 Open LV 1 Max PV 0 Cur PV 1 Act PV 1 VG Size <30.00 GiB PE Size 4.00 MiB Total PE 7679 Alloc PE / Size 3839 / <15.00 GiB Free PE / Size 3840 / 15.00 GiB VG UUID NN4fei-GB61-Hs5R-U9oP-YcHp-5GUr-nRG4mU root@ceph-mon-1:/var/log# lvextend -L +5G /dev/mapper/ubuntu--vg-ubuntu--lv Size of logical volume ubuntu-vg/ubuntu-lv changed from <15.00 GiB (3839 extents) to <20.00 GiB (5119 extents). Logical volume ubuntu-vg/ubuntu-lv successfully resized. root@ceph-mon-1:/var/log# resize2fs /dev/mapper/ubuntu--vg-ubuntu--lv resize2fs 1.46.5 (30-Dec-2021) Filesystem at /dev/mapper/ubuntu--vg-ubuntu--lv is mounted on /; on-line resizing required old_desc_blocks = 2, new_desc_blocks = 3 The filesystem on /dev/mapper/ubuntu--vg-ubuntu--lv is now 5241856 (4k) blocks long. root@ceph-mon-1:/var/log# df -h Filesystem Size Used Avail Use% Mounted on tmpfs 393M 1.4M 392M 1% /run /dev/mapper/ubuntu--vg-ubuntu--lv 20G 9.4G 9.3G 51% / tmpfs 2.0G 0 2.0G 0% /dev/shm tmpfs 5.0M 0 5.0M 0% /run/lock /dev/vda2 2.0G 127M 1.7G 7% /boot overlay 20G 9.4G 9.3G 51% /var/lib/docker/overlay2/6dedb77046f16294df607bbb9cf418932221571afc11b42cc34208e86f15feb6/merged overlay 20G 9.4G 9.3G 51% /var/lib/docker/overlay2/63a26aeb40448e16559a5c71780af6fdb02ad12153824c694cb2107dc819fe3e/merged overlay 20G 9.4G 9.3G 51% /var/lib/docker/overlay2/6e50e23811b9299d6d391ec60c31b7dc9483fd7a0f78801aab9a4e4fa822f944/merged tmpfs 393M 4.0K 393M 1% /run/user/0 tmpfs 393M 4.0K 393M 1% /run/user/1000 ``` ##### Ansible task fail: [ceph-osd : use ceph-volume lvm batch to create bluestore osds] Ansible returns a task failed on first attempt. Root cause is not clear yet, but re-run playbook might get the task success. ``` TASK [ceph-osd : use ceph-volume lvm batch to create bluestore osds] ********************************************************************************************************************************************************************* Saturday 31 December 2022 05:32:20 +0000 (0:00:00.234) 0:11:11.809 ***** fatal: [ans-ceph-osd2]: FAILED! => changed=true cmd: - ceph-volume - --cluster - ceph - lvm - batch - --bluestore - --yes - /dev/vdb - /dev/vdc delta: '0:00:16.546066' end: '2022-12-31 16:32:38.137330' msg: non-zero return code rc: 1 start: '2022-12-31 16:32:21.591264' stderr: |- --> DEPRECATION NOTICE --> You are using the legacy automatic disk sorting behavior --> The Pacific release will change the default to --no-auto --> passed data devices: 2 physical, 0 LVM --> relative data size: 1.0 Running command: /usr/bin/ceph-authtool --gen-print-key Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring -i - osd new a85e5dda-50f4-4722-bba4-2c255997677f Running command: vgcreate --force --yes ceph-9ec2bfc6-c620-4282-a667-ed436a078c4f /dev/vdb stdout: Physical volume "/dev/vdb" successfully created. stdout: Volume group "ceph-9ec2bfc6-c620-4282-a667-ed436a078c4f" successfully created Running command: lvcreate --yes -l 8191 -n osd-block-a85e5dda-50f4-4722-bba4-2c255997677f ceph-9ec2bfc6-c620-4282-a667-ed436a078c4f stdout: Logical volume "osd-block-a85e5dda-50f4-4722-bba4-2c255997677f" created. Running command: /usr/bin/ceph-authtool --gen-print-key Running command: /usr/bin/mount -t tmpfs tmpfs /var/lib/ceph/osd/ceph-0 --> Executable selinuxenabled not in PATH: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin Running command: /usr/bin/chown -h ceph:ceph /dev/ceph-9ec2bfc6-c620-4282-a667-ed436a078c4f/osd-block-a85e5dda-50f4-4722-bba4-2c255997677f Running command: /usr/bin/chown -R ceph:ceph /dev/dm-1 Running command: /usr/bin/ln -s /dev/ceph-9ec2bfc6-c620-4282-a667-ed436a078c4f/osd-block-a85e5dda-50f4-4722-bba4-2c255997677f /var/lib/ceph/osd/ceph-0/block Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring mon getmap -o /var/lib/ceph/osd/ceph-0/activate.monmap stderr: got monmap epoch 1 Running command: /usr/bin/ceph-authtool /var/lib/ceph/osd/ceph-0/keyring --create-keyring --name osd.0 --add-key ******** stdout: creating /var/lib/ceph/osd/ceph-0/keyring stdout: added entity osd.0 auth(key=********) Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-0/keyring Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-0/ Running command: /usr/bin/ceph-osd --cluster ceph --osd-objectstore bluestore --mkfs -i 0 --monmap /var/lib/ceph/osd/ceph-0/activate.monmap --keyfile - --osd-data /var/lib/ceph/osd/ceph-0/ --osd-uuid a85e5dda-50f4-4722-bba4-2c255997677f --setuser ceph --setgroup ceph stderr: 2022-12-31T16:32:25.748+1100 7f70f7d365c0 -1 bluestore(/var/lib/ceph/osd/ceph-0/) _read_fsid unparsable uuid --> ceph-volume lvm prepare successful for: /dev/vdb Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-0 Running command: /usr/bin/ceph-bluestore-tool --cluster=ceph prime-osd-dir --dev /dev/ceph-9ec2bfc6-c620-4282-a667-ed436a078c4f/osd-block-a85e5dda-50f4-4722-bba4-2c255997677f --path /var/lib/ceph/osd/ceph-0 --no-mon-config Running command: /usr/bin/ln -snf /dev/ceph-9ec2bfc6-c620-4282-a667-ed436a078c4f/osd-block-a85e5dda-50f4-4722-bba4-2c255997677f /var/lib/ceph/osd/ceph-0/block Running command: /usr/bin/chown -h ceph:ceph /var/lib/ceph/osd/ceph-0/block Running command: /usr/bin/chown -R ceph:ceph /dev/dm-1 Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-0 Running command: /usr/bin/systemctl enable ceph-volume@lvm-0-a85e5dda-50f4-4722-bba4-2c255997677f stderr: Created symlink /etc/systemd/system/multi-user.target.wants/ceph-volume@lvm-0-a85e5dda-50f4-4722-bba4-2c255997677f.service → /lib/systemd/system/ceph-volume@.service. Running command: /usr/bin/systemctl enable --runtime ceph-osd@0 stderr: Created symlink /run/systemd/system/ceph-osd.target.wants/ceph-osd@0.service → /lib/systemd/system/ceph-osd@.service. Running command: /usr/bin/systemctl start ceph-osd@0 --> ceph-volume lvm activate successful for osd ID: 0 --> ceph-volume lvm create successful for: /dev/vdb Running command: /usr/bin/ceph-authtool --gen-print-key Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring -i - osd new 16c9a037-dea3-49f1-adcb-4e4b79432605 Running command: vgcreate --force --yes ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89 /dev/vdc stdout: Physical volume "/dev/vdc" successfully created. stdout: Volume group "ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89" successfully created Running command: lvcreate --yes -l 8191 -n osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605 ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89 stdout: Logical volume "osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605" created. Running command: /usr/bin/ceph-authtool --gen-print-key Running command: /usr/bin/mount -t tmpfs tmpfs /var/lib/ceph/osd/ceph-4 --> Executable selinuxenabled not in PATH: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin Running command: /usr/bin/chown -h ceph:ceph /dev/ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89/osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605 Running command: /usr/bin/chown -R ceph:ceph /dev/dm-2 Running command: /usr/bin/ln -s /dev/ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89/osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605 /var/lib/ceph/osd/ceph-4/block Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring mon getmap -o /var/lib/ceph/osd/ceph-4/activate.monmap stderr: got monmap epoch 1 Running command: /usr/bin/ceph-authtool /var/lib/ceph/osd/ceph-4/keyring --create-keyring --name osd.4 --add-key ******** stdout: creating /var/lib/ceph/osd/ceph-4/keyring stdout: added entity osd.4 auth(key=********) Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-4/keyring Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-4/ Running command: /usr/bin/ceph-osd --cluster ceph --osd-objectstore bluestore --mkfs -i 4 --monmap /var/lib/ceph/osd/ceph-4/activate.monmap --keyfile - --osd-data /var/lib/ceph/osd/ceph-4/ --osd-uuid 16c9a037-dea3-49f1-adcb-4e4b79432605 --setuser ceph --setgroup ceph stderr: 2022-12-31T16:32:33.592+1100 7f7e8922f5c0 -1 bluestore(/var/lib/ceph/osd/ceph-4/) _read_fsid unparsable uuid stderr: 2022-12-31T16:32:33.644+1100 7f7e8922f5c0 -1 bluefs _replay 0x0: stop: uuid 00000000-0000-0000-0000-000000000000 != super.uuid 722affc6-13a4-4549-93f3-f324e7eeec1d, block dump: stderr: 00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| stderr: * stderr: 00000ff0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| stderr: 00001000 stderr: 2022-12-31T16:32:34.904+1100 7f7e8922f5c0 -1 rocksdb: verify_sharding unable to list column families: NotFound: stderr: 2022-12-31T16:32:34.904+1100 7f7e8922f5c0 -1 bluestore(/var/lib/ceph/osd/ceph-4/) _open_db erroring opening db: stderr: 2022-12-31T16:32:35.428+1100 7f7e8922f5c0 -1 OSD::mkfs: ObjectStore::mkfs failed with error (5) Input/output error stderr: 2022-12-31T16:32:35.428+1100 7f7e8922f5c0 -1 [0;31m ** ERROR: error creating empty object store in /var/lib/ceph/osd/ceph-4/: (5) Input/output error[0m --> Was unable to complete a new OSD, will rollback changes Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring osd purge-new osd.4 --yes-i-really-mean-it stderr: purged osd.4 --> Zapping: /dev/ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89/osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605 --> Unmounting /var/lib/ceph/osd/ceph-4 Running command: /usr/bin/umount -v /var/lib/ceph/osd/ceph-4 stderr: umount: stderr: /var/lib/ceph/osd/ceph-4 unmounted stderr: Running command: /usr/bin/dd if=/dev/zero of=/dev/ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89/osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605 bs=1M count=10 conv=fsync stderr: 10+0 records in 10+0 records out stderr: 10485760 bytes (10 MB, 10 MiB) copied, 0.104136 s, 101 MB/s --> Only 1 LV left in VG, will proceed to destroy volume group ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89 Running command: vgremove -v -f ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89 stderr: stderr: Removing ceph--07864d4e--937a--4bd2--b80c--c68ce9a36b89-osd--block--16c9a037--dea3--49f1--adcb--4e4b79432605 (253:2) stderr: stderr: Archiving volume group "ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89" metadata (seqno 5). Releasing logical volume "osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605" stderr: Creating volume group backup "/etc/lvm/backup/ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89" (seqno 6). stdout: Logical volume "osd-block-16c9a037-dea3-49f1-adcb-4e4b79432605" successfully removed stderr: Removing physical volume "/dev/vdc" from volume group "ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89" stdout: Volume group "ceph-07864d4e-937a-4bd2-b80c-c68ce9a36b89" successfully removed --> Zapping successful for OSD: 4 Traceback (most recent call last): File "/usr/sbin/ceph-volume", line 33, in <module> sys.exit(load_entry_point('ceph-volume==1.0.0', 'console_scripts', 'ceph-volume')()) File "/usr/lib/python3/dist-packages/ceph_volume/main.py", line 41, in __init__ self.main(self.argv) File "/usr/lib/python3/dist-packages/ceph_volume/decorators.py", line 59, in newfunc return f(*a, **kw) File "/usr/lib/python3/dist-packages/ceph_volume/main.py", line 153, in main terminal.dispatch(self.mapper, subcommand_args) File "/usr/lib/python3/dist-packages/ceph_volume/terminal.py", line 194, in dispatch instance.main() File "/usr/lib/python3/dist-packages/ceph_volume/devices/lvm/main.py", line 46, in main terminal.dispatch(self.mapper, self.argv) File "/usr/lib/python3/dist-packages/ceph_volume/terminal.py", line 194, in dispatch instance.main() File "/usr/lib/python3/dist-packages/ceph_volume/decorators.py", line 16, in is_root return func(*a, **kw) File "/usr/lib/python3/dist-packages/ceph_volume/devices/lvm/batch.py", line 444, in main self._execute(plan) File "/usr/lib/python3/dist-packages/ceph_volume/devices/lvm/batch.py", line 463, in _execute c.create(argparse.Namespace(**args)) File "/usr/lib/python3/dist-packages/ceph_volume/decorators.py", line 16, in is_root return func(*a, **kw) File "/usr/lib/python3/dist-packages/ceph_volume/devices/lvm/create.py", line 26, in create prepare_step.safe_prepare(args) File "/usr/lib/python3/dist-packages/ceph_volume/devices/lvm/prepare.py", line 252, in safe_prepare self.prepare() File "/usr/lib/python3/dist-packages/ceph_volume/decorators.py", line 16, in is_root return func(*a, **kw) File "/usr/lib/python3/dist-packages/ceph_volume/devices/lvm/prepare.py", line 387, in prepare prepare_bluestore( File "/usr/lib/python3/dist-packages/ceph_volume/devices/lvm/prepare.py", line 115, in prepare_bluestore prepare_utils.osd_mkfs_bluestore( File "/usr/lib/python3/dist-packages/ceph_volume/util/prepare.py", line 481, in osd_mkfs_bluestore raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command))) RuntimeError: Command failed with exit code 250: /usr/bin/ceph-osd --cluster ceph --osd-objectstore bluestore --mkfs -i 4 --monmap /var/lib/ceph/osd/ceph-4/activate.monmap --keyfile - --osd-data /var/lib/ceph/osd/ceph-4/ --osd-uuid 16c9a037-dea3-49f1-adcb-4e4b79432605 --setuser ceph --setgroup ceph stderr_lines: <omitted> stdout: '' stdout_lines: <omitted> ``` ##### Ansible task fail "[ceph-grafana : download ceph grafana dashboards]" This could be a network issue, re-run playbook again. ``` TASK [ceph-grafana : download ceph grafana dashboards] *********************************************************************************************************************************************************************************** Saturday 31 December 2022 05:55:28 +0000 (0:00:02.700) 0:13:30.257 ***** failed: [ans-ceph-mon3] (item=ceph-cluster.json) => changed=false ansible_loop_var: item dest: /etc/grafana/dashboards/ceph-dashboard/ceph-cluster.json elapsed: 5 item: ceph-cluster.json msg: 'Request failed: <urlopen error [Errno -3] Temporary failure in name resolution>' url: https://raw.githubusercontent.com/ceph/ceph/quincy/monitoring/ceph-mixin/dashboards_out/ceph-cluster.json ``` #### Reference: * https://docs.ceph.com/projects/ceph-ansible/en/latest/