Preparation:

  • close firewall、selinux

    1
    2
    $ systemctl stop firewalld
    $ setenforce 0
  • set hosts。

    1
    2
    3
    4
    5
    $ vim /etc/hosts
    192.168.2.172 ca12
    192.168.2.179 ca19
    192.168.2.95 ca95
    192.168.2.98 ca98
  • set ssh

    1
    2
    3
    4
    5
    $ ssh-keygen
    $ ssh-copy-id ca12
    $ ssh-copy-id ca19
    $ ssh-copy-id ca95
    $ ssh-copy-id ca98

1. Arch of my cluster

hostname IP role info
ca12 192.168.2.172 Mon、OSD a moniter, and 1 OSD run in SSD
ca19 192.168.2.179 Mon、OSD a moniter, and 1 OSD run in SSD
ca95 192.168.2.95 Mon、OSD a moniter, and 1 OSD run in SSD

2. Partition disk for bluestore

split each SSD into 4 partitions. Then use ‘mkfs.xfs’ to format /dev/sdb1.

1
2
3
4
5
6
7
8
9
10
$ parted /dev/sdb 
(parted) mkpart osd-device-0-data 0G 10G
$ parted /dev/sdb
(parted) mkpart osd-device-0-wal 10G 20G
$ parted /dev/sdb
(parted) mkpart osd-device-0-db 20G 30G
$ parted /dev/sdb
(parted) mkpart osd-device-0-block 30G 70G

$ mkfs.xfs /dev/sdb1

result:

Number Start End Size File system Name Flags
1 1049kB 10.0GB 9999MB osd-device-0-data
2 10.0GB 20.0GB 9999MB osd-device-0-wal
3 20.0GB 30.0GB 10.0GB osd-device-0-db
4 30.0GB 70.0GB 40.0GB osd-device-0-block

3. Ceph.conf

create directory /etc/ceph to store conf and keyring。

/etc/ceph/ceph.conf:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
[global]
fsid = 57077c8f-0a92-42e8-a82c-61198875a30e
osd crush chooseleaf type =0

[mon]
mon data=/data/$name

[mon.ca12]
host=ca12
mon addr=192.168.2.172:6789
public addr=192.168.2.172

[mon.ca19]
host=ca19
mon addr=192.168.2.179:6789
public addr=192.168.2.179

[mon.ca95]
host=ca95
mon addr=192.168.2.95:6789
public addr=192.168.2.95

[osd]
osd mkfs type=xfs
osd data = /data/$name
enable_experimental_unrecoverable_data_corrupting_features= bluestore
osd objectstore = bluestore
bluestore = true
bluestore fsck on mount = true
bluestore block create = true
bluestore block db size =67108864
bluestore block db create = true
bluestore block wal size =134217728
bluestore block wal create =true

[osd.0]
host = ca12
bluestore block db path =/dev/sdc2
bluestore block wal path =/dev/sdc3
bluestore block path = /dev/sdc4

[osd.1]
host = ca19
bluestore block db path =/dev/sdd2
bluestore block wal path =/dev/sdd3
bluestore block path = /dev/sdd4

[osd.2]
host = ca95
bluestore block db path =/dev/sdb2
bluestore block wal path =/dev/sdb3
bluestore block path = /dev/sdb4

[mgr]
mgr modules = dashboard balancer
mgr data = /data/$name
  • db path: storing data of rocksdb
  • wal path: for atom operation of rocksdb
  • block path: storing userdata

matedata, such as keyring, is stored in the first partition /dev/sdb1。

4. Mount osd

1
2
3
4
5
6
7
8
9
10
11
# ca12
$ mount /dev/sdc1 /data/osd.0
$ touch /data/osd.0/keyring

# ca19
$ mount /dev/sdd1 /data/osd.1
$ touch /data/osd.1/keyring

# ca95
$ mount /dev/sdb1 /data/osd.2
$ touch /data/osd.2/keyring

5. Deploy a MON in ca12

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# create mon keyring
$ ceph-authtool --create-keyring /etc/ceph/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'

# create admin keyring
$ ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring --gen-key -n client.admin --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow'

# add admin keyring into mon keyring
$ ceph-authtool /etc/ceph/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring

# generate monmap,and save as /etc/ceph/monmap. Then register ca12 in monmap.
$ monmaptool --create --clobber --add ca12 192.168.2.172 --fsid 57077c8f-0a92-42e8-a82c-61198875a30e /etc/ceph/monmap

# create work dir of mon.ca12
$ ceph-mon --mkfs -i ca12 --monmap /etc/ceph/monmap --keyring /etc/ceph/ceph.mon.keyring

# start mon service
$ ceph-mon -i ca12

6. Deploy a osd in ca12

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# generate an osd id。
$ ceph osd create

# general keyring of osd
$ ceph-osd -i 0 --mkfs --mkkey --no-mon-config

# add keyring of osd into ceph auth
$ ceph auth add osd.0 osd 'allow *' mon 'allow profile osd' -i /data/osd.0/keyring

# create a host in crushmap
$ ceph osd crush add-bucket ca12 host

# add the host into root of crushmap
$ ceph osd crush move ca12 root=default

# add osd.0 into host ca12
$ ceph osd crush add osd.0 1.0 host=ca12

# start osd service
$ ceph-osd -i 0

3. Deploy mgr

1
2
3
4
5
6
7
$ mkdir /data/mgr.admin

# generate keyring of mgr
$ bin/ceph --cluster ceph auth get-or-create mgr.admin mon 'allow profile mgr' osd 'allow *' > /data/mgr.admin/keyring

# start mgr service
$ ceph-mgr -i admin

Now, a local cluster is deployed in node ca12. We can watch its health info in shell.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
[root@localhost build]# bin/ceph -s
*** DEVELOPER MODE: setting PATH, PYTHONPATH and LD_LIBRARY_PATH ***
cluster:
id: 57077c8f-0a92-42e8-a82c-61198875a30e
health: HEALTH_WARN
13 mgr modules have failed dependencies
1 monitors have not enabled msgr2
OSD count 1 < osd_pool_default_size 3

services:
mon: 1 daemons, quorum ca12 (age 18h)
mgr: 0(active, since 9s)
osd: 1 osds: 1 up (since 29m), 1 in (since 29m)

data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 B
usage: 10 GiB used, 36 GiB / 47 GiB avail
pgs:

Next, let’s deploy more OSDs in other nodes.

4. Deploy more MONs

  • register new MON in MonMap (Configue in old moniter nodes)
1
$ monmaptool --add ca19 192.168.2.179  --fsid 57077c8f-0a92-42e8-a82c-61198875a30e /etc/ceph/monmap
  • use scp to copy conf, keyring, monmap to the new node.
1
$ scp -r root@192.168.2.172:/etc/ceph root@192.168.2.179:/etc/
  • switch to the new node, and create work directory of new moniter.
1
$ ceph-mon --mkfs -i ca19 --monmap /etc/ceph/monmap --keyring /etc/ceph/ceph.mon.keyring
  • start mon service
1
$ ceph-mon -i ca19

5. Deploy more OSDs

  • general osd id in MON node
1
$ ceph osd create
  • use scp to copy conf, keyring, monmap to the new node.
1
$ scp -r root@192.168.2.172:/etc/ceph root@192.168.2.179:/etc/
  • switch to the new node, and general keyring of the new osd.
1
$ ceph-osd -i 1 --mkfs --mkkey --no-mon-config
  • add the new keyring into the ceph auth
1
$ ceph auth add osd.1 osd 'allow *' mon 'allow profile osd' -i /data/osd.1/keyring
  • add osd into the crush map
1
2
3
4
5
6
7
8
# create a host in crushmap
$ ceph osd crush add-bucket ca19 host

# add the host into the root of crushmap
$ ceph osd crush move ca19 root=default

# add osd.1 into host ca19
$ ceph osd crush add osd.1 1.0 host=ca19
  • start osd service
1
$ ceph-osd -i 1  

Do the similar operation in ca95 to create osd.2.

Result is following:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
[root@localhost build]# bin/ceph osd tree
*** DEVELOPER MODE: setting PATH, PYTHONPATH and LD_LIBRARY_PATH ***
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 3.00000 root default
-2 1.00000 host ca12
0 ssd 1.00000 osd.0 up 1.00000 1.00000
-8 1.00000 host ca19
1 ssd 1.00000 osd.1 up 1.00000 1.00000
-11 1.00000 host ca95
2 ssd 1.00000 osd.2 up 1.00000 1.00000

[root@localhost build]# bin/ceph -s
*** DEVELOPER MODE: setting PATH, PYTHONPATH and LD_LIBRARY_PATH ***
cluster:
id: 57077c8f-0a92-42e8-a82c-61198875a30e
health: HEALTH_WARN
13 mgr modules have failed dependencies
3 monitors have not enabled msgr2

services:
mon: 3 daemons, quorum ca12,ca19,ca95 (age 28m)
mgr: 0(active, since 8h), standbys: admin
osd: 3 osds: 3 up (since 15m), 3 in (since 15m)

data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 B
usage: 31 GiB used, 109 GiB / 140 GiB avail
pgs: