Build all-in-one Ceph cluster via cephadm

tags: `ceph`

Deploy all-in-one ceph cluster for testing purpose.

VM spec.

Hostname: ceph-aio
OS: Ubuntu 20.04
IP: 192.168.122.90
2 vCPU
4G RAM
16GB vda (OS disk)
10GB vdb
10GB vdc
10GB vdd

The cluster will be configured with no replica because cluster has single node only.

build steps

1 - download cephadmin

root@ceph-aio:~# curl --silent --remote-name --location https://github.com/ceph/ceph/raw/quincy/src/cephadm/cephadm
root@ceph-aio:~# chmod +x cephadm

2 - install docker

root@ceph-aio:~# sudo apt update
root@ceph-aio:~# sudo apt -y install ca-certificates curl gnupg lsb-release

root@ceph-aio:~# curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg

root@ceph-aio:~# echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \
>   $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null

root@ceph-aio:~# sudo apt update
root@ceph-aio:~# sudo apt -y install docker-ce docker-ce-cli containerd.io docker-compose-plugin

root@ceph-aio:~# docker --version
Docker version 20.10.14, build a224086

3 - add ceph repo

root@ceph-aio:~# ./cephadm add-repo --release quincy
Installing repo GPG key from https://download.ceph.com/keys/release.gpg...
Installing repo file at /etc/apt/sources.list.d/ceph.list...
Updating package list...
Completed adding repo.

4 - install cephadm and ceph-common

root@ceph-aio:~# ./cephadm install
Installing packages ['cephadm']...

root@ceph-aio:~# which cephadm
/usr/sbin/cephadm

root@ceph-aio:~# ./cephadm install ceph-common

root@ceph-aio:~# ceph -v
ceph version 17.2.0 (43e2e60a7559d3f46c9d53f1ca875fd499a1e35e) quincy (stable)

5 - bootstrap ceph

root@ceph-aio:~# sudo mkdir -p /etc/ceph
root@ceph-aio:~# cephadm bootstrap --mon-ip 192.168.122.90

root@ceph-aio:~# ceph orch ps
NAME                    HOST      PORTS        STATUS         REFRESHED  AGE  MEM USE  MEM LIM  VERSION  IMAGE ID      CONTAINER ID  
alertmanager.ceph-aio   ceph-aio  *:9093,9094  running (17m)     5m ago  19m    12.1M        -           ba2b418f427c  6a0419782df9  
crash.ceph-aio          ceph-aio               running (19m)     5m ago  19m    7740k        -  17.2.0   c92c93952052  98879b083c8c  
grafana.ceph-aio        ceph-aio  *:3000       running (17m)     5m ago  17m    39.4M        -  8.3.5    dad864ee21e9  c19407f8a7e8  
mgr.ceph-aio.nmuhhm     ceph-aio  *:9283       running (20m)     5m ago  20m     426M        -  17.2.0   c92c93952052  d02b89542cb3  
mon.ceph-aio            ceph-aio               running (20m)     5m ago  20m    33.4M    2048M  17.2.0   c92c93952052  10e30a018478  
node-exporter.ceph-aio  ceph-aio  *:9100       running (17m)     5m ago  17m    9247k        -           1dbe0e931976  7ea15b5b4410  
prometheus.ceph-aio     ceph-aio  *:9095       running (17m)     5m ago  17m    40.0M        -           514e6a882f6e  703ac81e0905

root@ceph-aio:~# ceph orch ps --daemon-type mon
NAME          HOST      PORTS  STATUS         REFRESHED  AGE  MEM USE  MEM LIM  VERSION  IMAGE ID      CONTAINER ID  
mon.ceph-aio  ceph-aio         running (20m)     6m ago  20m    33.4M    2048M  17.2.0   c92c93952052  10e30a018478

root@ceph-aio:~# ceph -s
  cluster:
    id:     b25ef344-c62a-11ec-a3e0-4f9f42678996
    health: HEALTH_WARN
            OSD count 0 < osd_pool_default_size 3
 
  services:
    mon: 1 daemons, quorum ceph-aio (age 19m)
    mgr: ceph-aio.nmuhhm(active, since 16m)
    osd: 0 osds: 0 up, 0 in
 
  data:
    pools:   0 pools, 0 pgs
    objects: 0 objects, 0 B
    usage:   0 B used, 0 B / 0 B avail
    pgs:

6 - add OSDs

root@ceph-aio:~# lsblk 
NAME   MAJ:MIN RM  SIZE RO TYPE MOUNTPOINT
loop0    7:0    0 55.5M  1 loop /snap/core18/2284
loop1    7:1    0 61.9M  1 loop /snap/core20/1405
loop2    7:2    0 67.9M  1 loop /snap/lxd/22526
loop3    7:3    0 67.8M  1 loop /snap/lxd/22753
loop4    7:4    0 61.9M  1 loop /snap/core20/1376
loop5    7:5    0 55.5M  1 loop /snap/core18/2344
loop6    7:6    0 43.6M  1 loop /snap/snapd/15177
loop7    7:7    0 44.7M  1 loop /snap/snapd/15534
vda    252:0    0   16G  0 disk 
├─vda1 252:1    0    1M  0 part 
└─vda2 252:2    0   16G  0 part /
vdb    252:16   0   10G  0 disk 
vdc    252:32   0   10G  0 disk 
vdd    252:48   0   10G  0 disk

root@ceph-aio:~# ceph orch device ls
HOST      PATH      TYPE  DEVICE ID   SIZE  AVAILABLE  REJECT REASONS  
ceph-aio  /dev/vdb  hdd              10.7G  Yes                        
ceph-aio  /dev/vdc  hdd              10.7G  Yes                        
ceph-aio  /dev/vdd  hdd              10.7G  Yes

root@ceph-aio:~# ceph orch apply osd --all-available-devices
Scheduled osd.all-available-devices update...

root@ceph-aio:~# ceph -s
  cluster:
    id:     b25ef344-c62a-11ec-a3e0-4f9f42678996
    health: HEALTH_OK
 
  services:
    mon: 1 daemons, quorum ceph-aio (age 25m)
    mgr: ceph-aio.nmuhhm(active, since 23m)
    osd: 3 osds: 0 up, 3 in (since 4s)
 
  data:
    pools:   0 pools, 0 pgs
    objects: 0 objects, 0 B
    usage:   0 B used, 0 B / 0 B avail
    pgs:

root@ceph-aio:~# ceph osd tree
ID  CLASS  WEIGHT   TYPE NAME          STATUS  REWEIGHT  PRI-AFF
-1         0.02939  root default                                
-3         0.02939      host ceph-aio                           
 0    hdd  0.00980          osd.0          up   1.00000  1.00000
 1    hdd  0.00980          osd.1          up   1.00000  1.00000
 2    hdd  0.00980          osd.2          up   1.00000  1.00000

root@ceph-aio:~# ceph osd df
ID  CLASS  WEIGHT   REWEIGHT  SIZE    RAW USE  DATA     OMAP  META    AVAIL   %USE  VAR   PGS  STATUS
 0    hdd  0.00980   1.00000  10 GiB   19 MiB  160 KiB   0 B  19 MiB  10 GiB  0.19  1.00    0      up
 1    hdd  0.00980   1.00000  10 GiB   19 MiB  160 KiB   0 B  19 MiB  10 GiB  0.19  1.00    1      up
 2    hdd  0.00980   1.00000  10 GiB   19 MiB  160 KiB   0 B  19 MiB  10 GiB  0.19  1.00    0      up
                       TOTAL  30 GiB   58 MiB  480 KiB   0 B  58 MiB  30 GiB  0.19                   
MIN/MAX VAR: 1.00/1.00  STDDEV: 0

root@ceph-aio:~# docker ps
CONTAINER ID   IMAGE                                     COMMAND                  CREATED          STATUS          PORTS     NAMES
811d374e297c   quay.io/ceph/ceph                         "/usr/bin/ceph-osd -…"   5 minutes ago    Up 5 minutes              ceph-b25ef344-c62a-11ec-a3e0-4f9f42678996-osd-2
7296b32c2ba9   quay.io/ceph/ceph                         "/usr/bin/ceph-osd -…"   5 minutes ago    Up 5 minutes              ceph-b25ef344-c62a-11ec-a3e0-4f9f42678996-osd-1
59dbe29f7152   quay.io/ceph/ceph                         "/usr/bin/ceph-osd -…"   6 minutes ago    Up 5 minutes              ceph-b25ef344-c62a-11ec-a3e0-4f9f42678996-osd-0
c19407f8a7e8   quay.io/ceph/ceph-grafana:8.3.5           "/bin/sh -c 'grafana…"   28 minutes ago   Up 28 minutes             ceph-b25ef344-c62a-11ec-a3e0-4f9f42678996-grafana-ceph-aio
6a0419782df9   quay.io/prometheus/alertmanager:v0.23.0   "/bin/alertmanager -…"   28 minutes ago   Up 28 minutes             ceph-b25ef344-c62a-11ec-a3e0-4f9f42678996-alertmanager-ceph-aio
703ac81e0905   quay.io/prometheus/prometheus:v2.33.4     "/bin/prometheus --c…"   28 minutes ago   Up 28 minutes             ceph-b25ef344-c62a-11ec-a3e0-4f9f42678996-prometheus-ceph-aio
7ea15b5b4410   quay.io/prometheus/node-exporter:v1.3.1   "/bin/node_exporter …"   29 minutes ago   Up 29 minutes             ceph-b25ef344-c62a-11ec-a3e0-4f9f42678996-node-exporter-ceph-aio
98879b083c8c   quay.io/ceph/ceph                         "/usr/bin/ceph-crash…"   30 minutes ago   Up 30 minutes             ceph-b25ef344-c62a-11ec-a3e0-4f9f42678996-crash-ceph-aio
d02b89542cb3   quay.io/ceph/ceph:v17                     "/usr/bin/ceph-mgr -…"   31 minutes ago   Up 31 minutes             ceph-b25ef344-c62a-11ec-a3e0-4f9f42678996-mgr-ceph-aio-nmuhhm
10e30a018478   quay.io/ceph/ceph:v17                     "/usr/bin/ceph-mon -…"   31 minutes ago   Up 31 minutes             ceph-b25ef344-c62a-11ec-a3e0-4f9f42678996-mon-ceph-aio

a few minutes later, the ceph status become HEALTH_WARN

root@ceph-aio:~# ceph -s
  cluster:
    id:     b25ef344-c62a-11ec-a3e0-4f9f42678996
    health: HEALTH_WARN
            Reduced data availability: 1 pg inactive
            Degraded data redundancy: 1 pg undersized
 
  services:
    mon: 1 daemons, quorum ceph-aio (age 27m)
    mgr: ceph-aio.nmuhhm(active, since 24m)
    osd: 3 osds: 3 up (since 91s), 3 in (since 112s)
 
  data:
    pools:   1 pools, 1 pgs
    objects: 0 objects, 0 B
    usage:   58 MiB used, 30 GiB / 30 GiB avail
    pgs:     100.000% pgs not active
             1 undersized+peered

This is due to a pool has default size 3 and min_size 2. But the all-in-one cluster is only able to store single replica.

root@ceph-aio:~# ceph osd dump | grep 'replicated size'
pool 1 '.mgr' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 19 flags hashpspool stripe_width 0 pg_num_max 32 pg_num_min 1 application mgr

7 - configure cluster to allow pool size one

root@ceph-aio:~# ceph config set global mon_allow_pool_size_one true

set the '.mgr' pool size to 1 (set size 2 first and then 1)

root@ceph-aio:~# ceph osd pool set .mgr size 2 --yes-i-really-mean-it
set pool 1 size to 2
root@ceph-aio:~# ceph osd pool set .mgr size 1 --yes-i-really-mean-it
set pool 1 size to 1

# this step is not necessary here as the size already decreased to 1
root@ceph-aio:~# ceph osd pool set .mgr min_size 1
set pool 1 min_size to 1

8 - create "rbd" pool

root@ceph-aio:~# ceph osd pool create rbd 64 64 replicated
pool 'rbd' created

set size to 1

root@ceph-aio:~# ceph osd pool set rbd min_size 1
set pool 2 min_size to 1
root@ceph-aio:~# ceph osd pool set rbd size 2 --yes-i-really-mean-it
set pool 2 size to 2
root@ceph-aio:~# ceph osd pool set rbd size 1 --yes-i-really-mean-it
set pool 2 size to 1

config pool for RBD use

# initialise pool used for RBD
root@ceph-aio:~# rbd pool init rbd

# associate pool to application
root@ceph-aio:~# ceph osd pool application enable rbd rbd
enabled application 'rbd' on pool 'rbd'

9 - ignore the no replicas warning

root@ceph-aio:~# ceph -s
  cluster:
    id:     b25ef344-c62a-11ec-a3e0-4f9f42678996
    health: HEALTH_WARN
            2 pool(s) have no replicas configured
 
  services:
    mon: 1 daemons, quorum ceph-aio (age 63m)
    mgr: ceph-aio.nmuhhm(active, since 61m)
    osd: 3 osds: 3 up (since 37m), 3 in (since 38m)
 
  data:
    pools:   2 pools, 65 pgs
    objects: 2 objects, 449 KiB
    usage:   63 MiB used, 30 GiB / 30 GiB avail
    pgs:     65 active+clean

root@ceph-aio:~# ceph health mute POOL_NO_REDUNDANCY
root@ceph-aio:~# ceph -s
  cluster:
    id:     b25ef344-c62a-11ec-a3e0-4f9f42678996
    health: HEALTH_OK
            (muted: POOL_NO_REDUNDANCY)
 
  services:
    mon: 1 daemons, quorum ceph-aio (age 65m)
    mgr: ceph-aio.nmuhhm(active, since 63m)
    osd: 3 osds: 3 up (since 39m), 3 in (since 40m)
 
  data:
    pools:   2 pools, 65 pgs
    objects: 2 objects, 449 KiB
    usage:   63 MiB used, 30 GiB / 30 GiB avail
    pgs:     65 active+clean

test

root@ceph-aio:~# rbd create rbd0 --size 1024  --image-feature layering
root@ceph-aio:~# rbd ls -l
NAME  SIZE   PARENT  FMT  PROT  LOCK
rbd0  1 GiB            2

setup cephfs

to be updated..

setup rados gateway

to be updated..

other configs

to allow pool deletion

$ ceph config set global mon_allow_pool_delete true

reference

https://docs.docker.com/engine/install/ubuntu/
https://balderscape.medium.com/setting-up-a-virtual-single-node-ceph-storage-cluster-d86d6a6c658e
https://docs.ceph.com/en/quincy/start/quick-rbd/
https://docs.ceph.com/en/latest/rados/operations/pools/

Build all-in-one Ceph cluster via cephadm

tags: ceph

build steps

test

setup cephfs

setup rados gateway

other configs

reference

Read more

Setup DNS server

Ueful actions and tips

Gnu Privacy Guard

Deploy OpenStack using Kolla-Ansible

tags: `ceph`