# Install a kubeadm Kubernetes cluster with Cilium replace kube-proxy ## 安裝 k8s #### 設定 kube-vip ``` # 安裝 kube-vip Set configuration details # IP 要改成叢集外的新 IP $ export VIP=172.20.7.69 # 宣告網卡名稱 $ export INTERFACE=ens18 # 取得 kube-vip 版本代號 $ export KVVERSION=$(curl -sL https://api.github.com/repos/kube-vip/kube-vip/releases | jq -r ".[0].name") # 檢查 kube-vip 版本 $ echo $KVVERSION v0.9.1 $ alias kube-vip="sudo ctr -n k8s.io image pull ghcr.io/kube-vip/kube-vip:$KVVERSION;sudo ctr -n k8s.io run --rm --net-host ghcr.io/kube-vip/kube-vip:$KVVERSION vip /kube-vip" $ sudo mkdir -p /etc/kubernetes/manifests/ $ kube-vip manifest pod \ --address $VIP \ --interface $INTERFACE \ --controlplane \ --arp \ --leaderElection | sudo tee /etc/kubernetes/manifests/kube-vip.yaml # 1.29 以後需要調整權限,只有第一台 master 需要修改 $ sudo sed -i 's|path: /etc/kubernetes/admin.conf|path: /etc/kubernetes/super-admin.conf|g' /etc/kubernetes/manifests/kube-vip.yaml ``` ``` $ mkdir ~/k8s; cd ~/k8s $ nano ~/k8s/init-config.yaml ``` 檔案內容 : * `advertiseAddress` 需更換為自己的 master ip * `controlPlaneEndpoint` 要指定 vip 的位置 ``` apiVersion: kubeadm.k8s.io/v1beta3 kind: InitConfiguration localAPIEndpoint: advertiseAddress: 172.20.7.70 # change from Master node IP bindPort: 6443 nodeRegistration: criSocket: unix:///run/containerd/containerd.sock # change from containerd Unix Socket imagePullPolicy: IfNotPresent name: m1 # change from Master node hsotname taints: [] --- apiVersion: kubeadm.k8s.io/v1beta3 kind: ClusterConfiguration kubernetesVersion: 1.33.2 controlPlaneEndpoint: 172.20.7.69:6443 apiServer: timeoutForControlPlane: 4m0s certSANs: - 127.0.0.1 certificatesDir: /etc/kubernetes/pki clusterName: topgun # set your clusterName controllerManager: extraArgs: bind-address: "0.0.0.0" secure-port: "10257" extraVolumes: - name: tz-config hostPath: /etc/localtime mountPath: /etc/localtime readOnly: true scheduler: extraArgs: bind-address: "0.0.0.0" secure-port: "10259" etcd: local: dataDir: /var/lib/etcd extraArgs: listen-metrics-urls: "http://0.0.0.0:2381" dns: {} imageRepository: registry.k8s.io networking: dnsDomain: cluster.local # DNS domain used by Kubernetes Services. podSubnet: 10.244.0.0/16 # the subnet used by Pods. serviceSubnet: 10.96.0.0/16 # subnet used by Kubernetes Services. --- apiVersion: kubelet.config.k8s.io/v1beta1 kind: KubeletConfiguration maxPods: 110 shutdownGracePeriod: 30s shutdownGracePeriodCriticalPods: 10s imageMinimumGCAge: "2m0s" # 至少要保留 image 2 分鐘 imageMaximumGCAge: "168h" # 設為 1 週 (168 hours) → 超過 1 週未使用的 image 可回收 systemReserved: memory: "1Gi" kubeReserved: memory: "2Gi" ``` 初始化 K8s * `--skip-phases=addon/kube-proxy` 不要安裝 kube-proxy ``` $ sudo kubeadm init --skip-phases=addon/kube-proxy --upload-certs --config=init-config.yaml ``` 設定 kubeconfig ``` $ mkdir -p $HOME/.kube; sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config; sudo chown $(id -u):$(id -g) $HOME/.kube/config ``` ### 安裝 Cilium 安裝 helm3 ``` $ curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash $ helm version version.BuildInfo{Version:"v3.17.1", GitCommit:"980d8ac1939e39138101364400756af2bdee1da5", GitTreeState:"clean", GoVersion:"go1.23.5"} ``` Install the Cilium CLI ``` CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt) CLI_ARCH=amd64 if [ "$(uname -m)" = "aarch64" ]; then CLI_ARCH=arm64; fi curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum} sha256sum --check cilium-linux-${CLI_ARCH}.tar.gz.sha256sum sudo tar xzvfC cilium-linux-${CLI_ARCH}.tar.gz /usr/local/bin rm cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum} ``` 宣告變數,這裡要宣告 vip 的位置 ``` API_SERVER_IP=172.20.7.69 API_SERVER_PORT=6443 ``` * `ipam.mode=kubernetes` 使用 Kubernetes 內建宣告的 Pod CIDR 分配 Pod ip。 * `hubble.ui.enabled=true`、`hubble.relay.enabled=true` 啟用 Hubble 可視化功能。 * `hubble.ui.frontend.server.ipv6.enabled` 如果環境沒有開啟 ipv6 要設為 false。 * `kubeProxyReplacement=true` 啟用 kube-proxy 替代模式 * `bpf.masquerade=true` 啟用 eBPF-based NAT Masquerading。 * `bpf.datapathMode=netkit` 使用 netkit 模式作為 BPF 資料平面。 * `autoDirectNodeRoutes=true` 自動為每個 node 建立 Pod CIDR 的路由。 * `routingMode=native` Cilium 使用 host routing table 進行路由轉發。 * `ipv4NativeRoutingCIDR=10.244.0.0/16` 告訴 Cilium 哪一個 CIDR 是「Pod CIDR」範圍。 ``` $ helm repo add cilium https://helm.cilium.io/ $ helm install cilium cilium/cilium \ --namespace kube-system \ --set ipam.mode=kubernetes \ --set kubeProxyReplacement=true \ --set k8sServiceHost=${API_SERVER_IP} \ --set k8sServicePort=${API_SERVER_PORT} \ --set hubble.ui.enabled=true \ --set hubble.relay.enabled=true \ --set hubble.ui.frontend.server.ipv6.enabled=false \ --set hubble.ui.service.type=NodePort \ --set bpf.masquerade=true \ --set bpf.datapathMode=netkit \ --set autoDirectNodeRoutes=true \ --set routingMode=native \ --set ipv4NativeRoutingCIDR=10.244.0.0/16 ``` ## 環境檢查 ``` $ kubectl get po -A NAMESPACE NAME READY STATUS RESTARTS AGE kube-system cilium-envoy-bgx26 1/1 Running 0 2m55s kube-system cilium-operator-578cdddb6f-bkntf 1/1 Running 0 2m55s kube-system cilium-operator-578cdddb6f-pb9n8 0/1 Pending 0 2m55s kube-system cilium-txbhf 1/1 Running 0 2m55s kube-system coredns-674b8bbfcf-jv6nc 1/1 Running 0 5m29s kube-system coredns-674b8bbfcf-zmjn7 1/1 Running 0 5m29s kube-system etcd-m1 1/1 Running 0 5m37s kube-system hubble-relay-6b7b5877f4-nxdvj 1/1 Running 0 2m55s kube-system hubble-ui-655f947f96-x4vwl 2/2 Running 0 2m55s kube-system kube-apiserver-m1 1/1 Running 0 5m37s kube-system kube-controller-manager-m1 1/1 Running 0 5m33s kube-system kube-scheduler-m1 1/1 Running 0 5m33s kube-system kube-vip-m1 1/1 Running 0 5m38s ``` * k8s 安裝好後再將 kube-vip 權限調整回來 ``` $ sudo sed -i 's|path: /etc/kubernetes/super-admin.conf|path: /etc/kubernetes/admin.conf|g' /etc/kubernetes/manifests/kube-vip.yaml $ sudo systemctl daemon-reload $ sudo systemctl restart kubelet ``` ## 加入 m2 master node #### 設定 kube-vip ``` # 安裝 kube-vip Set configuration details # IP 要改成叢集外的新 IP $ export VIP=172.20.7.69 # 宣告網卡名稱 $ export INTERFACE=ens18 # 取得 kube-vip 版本代號 $ export KVVERSION=$(curl -sL https://api.github.com/repos/kube-vip/kube-vip/releases | jq -r ".[0].name") # 檢查 kube-vip 版本 $ echo $KVVERSION v0.9.1 $ alias kube-vip="sudo ctr -n k8s.io image pull ghcr.io/kube-vip/kube-vip:$KVVERSION;sudo ctr -n k8s.io run --rm --net-host ghcr.io/kube-vip/kube-vip:$KVVERSION vip /kube-vip" $ sudo mkdir -p /etc/kubernetes/manifests/ $ kube-vip manifest pod \ --address $VIP \ --interface $INTERFACE \ --controlplane \ --arp \ --leaderElection | sudo tee /etc/kubernetes/manifests/kube-vip.yaml ``` 在 m2 開始安裝 k8s ``` $ sudo kubeadm join 172.20.7.69:6443 --token xhii2u.a7qbggfuba6o3n9g \ --discovery-token-ca-cert-hash sha256:9061e6b042297ad88ce762487a833bb14637d2bd089002f8b2343d721838a861 \ --control-plane --certificate-key 33dc3264459302bd89ada12193b950cce05bd0f23928629001b4e8199b139eb9 ``` ## 加入 m3 master node #### 設定 kube-vip ``` # 安裝 kube-vip Set configuration details # IP 要改成叢集外的新 IP $ export VIP=172.20.7.69 # 宣告網卡名稱 $ export INTERFACE=ens18 # 取得 kube-vip 版本代號 $ export KVVERSION=$(curl -sL https://api.github.com/repos/kube-vip/kube-vip/releases | jq -r ".[0].name") # 檢查 kube-vip 版本 $ echo $KVVERSION v0.9.1 $ alias kube-vip="sudo ctr -n k8s.io image pull ghcr.io/kube-vip/kube-vip:$KVVERSION;sudo ctr -n k8s.io run --rm --net-host ghcr.io/kube-vip/kube-vip:$KVVERSION vip /kube-vip" $ sudo mkdir -p /etc/kubernetes/manifests/ $ kube-vip manifest pod \ --address $VIP \ --interface $INTERFACE \ --controlplane \ --arp \ --leaderElection | sudo tee /etc/kubernetes/manifests/kube-vip.yaml ``` 在 m3 開始安裝 k8s ``` $ sudo kubeadm join 172.20.7.69:6443 --token xhii2u.a7qbggfuba6o3n9g \ --discovery-token-ca-cert-hash sha256:9061e6b042297ad88ce762487a833bb14637d2bd089002f8b2343d721838a861 \ --control-plane --certificate-key 33dc3264459302bd89ada12193b950cce05bd0f23928629001b4e8199b139eb9 ``` ## 加入 w1、w2 worker node 如果沒記錄到指令,可以在 m1 使用以下指令產出 `worker` 註冊指令 ``` $ sudo kubeadm token create --print-join-command ``` w1、w2 貼上標籤,都是叫 worker ``` $ kubectl label node w1 node-role.kubernetes.io/worker=; kubectl label node w2 node-role.kubernetes.io/worker= ``` ``` $ kubectl get no NAME STATUS ROLES AGE VERSION m1 Ready control-plane 12m v1.33.2 m2 Ready control-plane 6m36s v1.33.2 m3 Ready control-plane 5m26s v1.33.2 w1 Ready worker 4m24s v1.33.2 w2 Ready worker 4m19s v1.33.2 ``` ``` $ kubectl -n kube-system get pod NAME READY STATUS RESTARTS AGE cilium-4j2l5 1/1 Running 0 77s cilium-envoy-6fd6p 1/1 Running 0 77s cilium-envoy-d9sgb 1/1 Running 0 77s cilium-envoy-tjtpx 1/1 Running 0 77s cilium-envoy-v4v2n 1/1 Running 0 77s cilium-envoy-vfsfb 1/1 Running 0 77s cilium-ghvd5 1/1 Running 0 77s cilium-k9tvj 1/1 Running 0 77s cilium-mdkcd 1/1 Running 0 77s cilium-operator-7459dc67bb-dmpf5 1/1 Running 0 77s cilium-operator-7459dc67bb-s5nxm 1/1 Running 0 77s cilium-q79mm 1/1 Running 0 77s coredns-674b8bbfcf-gr8zm 1/1 Running 0 17m coredns-674b8bbfcf-kx47l 1/1 Running 0 17m etcd-m1 1/1 Running 0 17m etcd-m2 1/1 Running 0 11m etcd-m3 1/1 Running 0 10m hubble-relay-6b7b5877f4-2vnhn 1/1 Running 0 77s hubble-ui-655f947f96-rch6g 2/2 Running 0 77s kube-apiserver-m1 1/1 Running 0 17m kube-apiserver-m2 1/1 Running 0 11m kube-apiserver-m3 1/1 Running 0 10m kube-controller-manager-m1 1/1 Running 0 17m kube-controller-manager-m2 1/1 Running 0 11m kube-controller-manager-m3 1/1 Running 0 10m kube-scheduler-m1 1/1 Running 0 17m kube-scheduler-m2 1/1 Running 0 11m kube-scheduler-m3 1/1 Running 0 10m kube-vip-m1 1/1 Running 0 16m kube-vip-m2 1/1 Running 0 11m kube-vip-m3 1/1 Running 0 10m ``` ``` $ cilium status /¯¯\ /¯¯\__/¯¯\ Cilium: OK \__/¯¯\__/ Operator: OK /¯¯\__/¯¯\ Envoy DaemonSet: OK \__/¯¯\__/ Hubble Relay: OK \__/ ClusterMesh: disabled DaemonSet cilium Desired: 5, Ready: 5/5, Available: 5/5 DaemonSet cilium-envoy Desired: 5, Ready: 5/5, Available: 5/5 Deployment cilium-operator Desired: 2, Ready: 2/2, Available: 2/2 Deployment hubble-relay Desired: 1, Ready: 1/1, Available: 1/1 Deployment hubble-ui Desired: 1, Ready: 1/1, Available: 1/1 Containers: cilium Running: 5 cilium-envoy Running: 5 cilium-operator Running: 2 clustermesh-apiserver hubble-relay Running: 1 hubble-ui Running: 1 Cluster Pods: 4/4 managed by Cilium Helm chart version: 1.18.0 Image versions cilium quay.io/cilium/cilium:v1.18.0@sha256:dfea023972d06ec183cfa3c9e7809716f85daaff042e573ef366e9ec6a0c0ab2: 5 cilium-envoy quay.io/cilium/cilium-envoy:v1.34.4-1753677767-266d5a01d1d55bd1d60148f991b98dac0390d363@sha256:231b5bd9682dfc648ae97f33dcdc5225c5a526194dda08124f5eded833bf02bf: 5 cilium-operator quay.io/cilium/operator-generic:v1.18.0@sha256:398378b4507b6e9db22be2f4455d8f8e509b189470061b0f813f0fabaf944f51: 2 hubble-relay quay.io/cilium/hubble-relay:v1.18.0@sha256:c13679f22ed250457b7f3581189d97f035608fe13c87b51f57f8a755918e793a: 1 hubble-ui quay.io/cilium/hubble-ui-backend:v0.13.2@sha256:a034b7e98e6ea796ed26df8f4e71f83fc16465a19d166eff67a03b822c0bfa15: 1 hubble-ui quay.io/cilium/hubble-ui:v0.13.2@sha256:9e37c1296b802830834cc87342a9182ccbb71ffebb711971e849221bd9d59392: 1 ``` ## 驗測 cilium 網路效能 * 測試跨節點也跨實體主機,實體主機網頻寬為 10G ``` $ kubectl create ns iperf $ echo 'apiVersion: apps/v1 kind: DaemonSet metadata: name: ds-iperf namespace: iperf spec: selector: matchLabels: app: iperf3 template: metadata: labels: app: iperf3 spec: containers: - name: iperf3 image: leodotcloud/swiss-army-knife command: ["iperf3"] args: ["-s", "-p 12345"] ports: - containerPort: 12345 hostPort: 12345' | kubectl apply -f - ``` ``` $ kubectl -n iperf get pod -owide NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES ds-iperf-2h2jl 1/1 Running 0 102m 10.244.0.226 m1 <none> <none> ds-iperf-4d9kk 1/1 Running 0 102m 10.244.3.193 w1 <none> <none> ds-iperf-5r5ls 1/1 Running 0 102m 10.244.4.80 w2 <none> <none> ds-iperf-fmh86 1/1 Running 0 102m 10.244.1.50 m2 <none> <none> ds-iperf-m9mtc 1/1 Running 0 102m 10.244.2.101 m3 <none> <none> ``` * 測試 pod 對 pod 跨節點網速,實體網路為 10G,而將 cilium 網路設為 `routingMode=native` 可以讓 pod 跨節點之間的網速等同於實體網路。 ``` $ kubectl -n iperf exec ds-iperf-4d9kk -- iperf3 -c 10.244.4.80 -p 12345 Connecting to host 10.244.4.80, port 12345 [ 4] local 10.244.3.193 port 36816 connected to 10.244.4.80 port 12345 [ ID] Interval Transfer Bandwidth Retr Cwnd [ 4] 0.00-1.00 sec 1.10 GBytes 9.42 Gbits/sec 0 3.88 MBytes [ 4] 1.00-2.00 sec 1.10 GBytes 9.41 Gbits/sec 0 3.88 MBytes [ 4] 2.00-3.00 sec 1.09 GBytes 9.41 Gbits/sec 90 2.92 MBytes [ 4] 3.00-4.00 sec 1.10 GBytes 9.41 Gbits/sec 0 3.18 MBytes [ 4] 4.00-5.00 sec 1.10 GBytes 9.41 Gbits/sec 0 3.43 MBytes [ 4] 5.00-6.00 sec 1.10 GBytes 9.41 Gbits/sec 0 3.66 MBytes [ 4] 6.00-7.00 sec 1.10 GBytes 9.41 Gbits/sec 0 3.77 MBytes [ 4] 7.00-8.00 sec 1.09 GBytes 9.41 Gbits/sec 0 3.79 MBytes [ 4] 8.00-9.00 sec 1.10 GBytes 9.41 Gbits/sec 0 3.80 MBytes [ 4] 9.00-10.00 sec 1.09 GBytes 9.41 Gbits/sec 0 3.80 MBytes - - - - - - - - - - - - - - - - - - - - - - - - - [ ID] Interval Transfer Bandwidth Retr [ 4] 0.00-10.00 sec 11.0 GBytes 9.41 Gbits/sec 90 sender [ 4] 0.00-10.00 sec 11.0 GBytes 9.41 Gbits/sec receiver iperf Done. ```