QKCP 安装 - HackMD

# QKCP 安装 ## Install k8s cluster Download KubeKey ``` curl -sSL https://get-kk.kubesphere.io | sh - ./kk create cluster --with-kubernetes v1.21.9 --with-local-storage ``` ## Deploy QKCP v3.2.1 ### 1. kubesphere-installer.yaml ```yaml! --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clusterconfigurations.installer.kubesphere.io spec: group: installer.kubesphere.io versions: - name: v1alpha1 served: true storage: true schema: openAPIV3Schema: type: object properties: spec: type: object x-kubernetes-preserve-unknown-fields: true status: type: object x-kubernetes-preserve-unknown-fields: true scope: Namespaced names: plural: clusterconfigurations singular: clusterconfiguration kind: ClusterConfiguration shortNames: - cc --- apiVersion: v1 kind: Namespace metadata: name: kubesphere-system --- apiVersion: v1 kind: ServiceAccount metadata: name: ks-installer namespace: kubesphere-system --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: ks-installer rules: - apiGroups: - "" resources: - '*' verbs: - '*' - apiGroups: - apps resources: - '*' verbs: - '*' - apiGroups: - extensions resources: - '*' verbs: - '*' - apiGroups: - batch resources: - '*' verbs: - '*' - apiGroups: - rbac.authorization.k8s.io resources: - '*' verbs: - '*' - apiGroups: - apiregistration.k8s.io resources: - '*' verbs: - '*' - apiGroups: - apiextensions.k8s.io resources: - '*' verbs: - '*' - apiGroups: - tenant.kubesphere.io resources: - '*' verbs: - '*' - apiGroups: - certificates.k8s.io resources: - '*' verbs: - '*' - apiGroups: - devops.kubesphere.io resources: - '*' verbs: - '*' - apiGroups: - monitoring.coreos.com resources: - '*' verbs: - '*' - apiGroups: - logging.kubesphere.io resources: - '*' verbs: - '*' - apiGroups: - jaegertracing.io resources: - '*' verbs: - '*' - apiGroups: - storage.k8s.io resources: - '*' verbs: - '*' - apiGroups: - admissionregistration.k8s.io resources: - '*' verbs: - '*' - apiGroups: - policy resources: - '*' verbs: - '*' - apiGroups: - autoscaling resources: - '*' verbs: - '*' - apiGroups: - networking.istio.io resources: - '*' verbs: - '*' - apiGroups: - config.istio.io resources: - '*' verbs: - '*' - apiGroups: - iam.kubesphere.io resources: - '*' verbs: - '*' - apiGroups: - notification.kubesphere.io resources: - '*' verbs: - '*' - apiGroups: - auditing.kubesphere.io resources: - '*' verbs: - '*' - apiGroups: - events.kubesphere.io resources: - '*' verbs: - '*' - apiGroups: - core.kubefed.io resources: - '*' verbs: - '*' - apiGroups: - installer.kubesphere.io resources: - '*' verbs: - '*' - apiGroups: - storage.kubesphere.io resources: - '*' verbs: - '*' - apiGroups: - security.istio.io resources: - '*' verbs: - '*' - apiGroups: - monitoring.kiali.io resources: - '*' verbs: - '*' - apiGroups: - kiali.io resources: - '*' verbs: - '*' - apiGroups: - networking.k8s.io resources: - '*' verbs: - '*' - apiGroups: - kubeedge.kubesphere.io resources: - '*' verbs: - '*' - apiGroups: - types.kubefed.io resources: - '*' verbs: - '*' - apiGroups: - monitoring.kubesphere.io resources: - '*' verbs: - '*' - apiGroups: - application.kubesphere.io resources: - '*' verbs: - '*' --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: name: ks-installer subjects: - kind: ServiceAccount name: ks-installer namespace: kubesphere-system roleRef: kind: ClusterRole name: ks-installer apiGroup: rbac.authorization.k8s.io --- apiVersion: apps/v1 kind: Deployment metadata: name: ks-installer namespace: kubesphere-system labels: app: ks-installer spec: replicas: 1 selector: matchLabels: app: ks-installer template: metadata: labels: app: ks-installer spec: serviceAccountName: ks-installer containers: - name: installer image: registry.cn-beijing.aliyuncs.com/qkcp/ks-installer:v3.2.1 imagePullPolicy: "Always" resources: limits: cpu: "1" memory: 1Gi requests: cpu: 20m memory: 100Mi volumeMounts: - mountPath: /etc/localtime name: host-time readOnly: true volumes: - hostPath: path: /etc/localtime type: "" name: host-time ``` ### 2. cluster-configuration.yaml ```yaml! --- apiVersion: installer.kubesphere.io/v1alpha1 kind: ClusterConfiguration metadata: name: ks-installer namespace: kubesphere-system labels: version: v3.3.1-beta.1 spec: persistence: storageClass: "" # If there is no default StorageClass in your cluster, you need to specify an existing StorageClass here. authentication: # adminPassword: "" # Custom password of the admin user. If the parameter exists but the value is empty, a random password is generated. If the parameter does not exist, P@88w0rd is used. jwtSecret: "" # Keep the jwtSecret consistent with the Host Cluster. Retrieve the jwtSecret by executing "kubectl -n kubesphere-system get cm kubesphere-config -o yaml | grep -v "apiVersion" | grep jwtSecret" on the Host Cluster. local_registry: "" # Add your private registry address if it is needed. # dev_tag: "" # Add your kubesphere image tag you want to install, by default it's same as ks-install release version. etcd: monitoring: false # Enable or disable etcd monitoring dashboard installation. You have to create a Secret for etcd before you enable it. endpointIps: localhost # etcd cluster EndpointIps. It can be a bunch of IPs here. port: 2379 # etcd port. tlsEnable: true common: core: console: enableMultiLogin: true # Enable or disable simultaneous logins. It allows different users to log in with the same account at the same time. port: 30880 type: NodePort # apiserver: # Enlarge the apiserver and controller manager's resource requests and limits for the large cluster # resources: {} # controllerManager: # resources: {} redis: enabled: false enableHA: false volumeSize: 2Gi # Redis PVC size. openldap: enabled: false volumeSize: 2Gi # openldap PVC size. minio: volumeSize: 20Gi # Minio PVC size. monitoring: # type: external # Whether to specify the external prometheus stack, and need to modify the endpoint at the next line. endpoint: http://prometheus-operated.kubesphere-monitoring-system.svc:9090 # Prometheus endpoint to get metrics data. GPUMonitoring: # Enable or disable the GPU-related metrics. If you enable this switch but have no GPU resources, Kubesphere will set it to zero. enabled: false gpu: # Install GPUKinds. The default GPU kind is nvidia.com/gpu. Other GPU kinds can be added here according to your needs. kinds: - resourceName: "nvidia.com/gpu" resourceType: "GPU" default: true es: # Storage backend for logging, events and auditing. # master: # volumeSize: 4Gi # The volume size of Elasticsearch master nodes. # replicas: 1 # The total number of master nodes. Even numbers are not allowed. # resources: {} # data: # volumeSize: 20Gi # The volume size of Elasticsearch data nodes. # replicas: 1 # The total number of data nodes. # resources: {} enabled: false logMaxAge: 7 # Log retention time in built-in Elasticsearch. It is 7 days by default. elkPrefix: logstash # The string making up index names. The index name will be formatted as ks-<elk_prefix>-log. basicAuth: enabled: false username: "" password: "" externalElasticsearchHost: "" externalElasticsearchPort: "" opensearch: # Storage backend for logging, events and auditing. # master: # volumeSize: 4Gi # The volume size of Opensearch master nodes. # replicas: 1 # The total number of master nodes. Even numbers are not allowed. # resources: {} # data: # volumeSize: 20Gi # The volume size of Opensearch data nodes. # replicas: 1 # The total number of data nodes. # resources: {} enabled: true logMaxAge: 7 # Log retention time in built-in Opensearch. It is 7 days by default. opensearchPrefix: whizard # The string making up index names. The index name will be formatted as ks-<opensearchPrefix>-logging. basicAuth: enabled: true username: "admin" password: "admin" externalOpensearchHost: "" externalOpensearchPort: "" dashboard: enabled: false alerting: # (CPU: 0.1 Core, Memory: 100 MiB) It enables users to customize alerting policies to send messages to receivers in time with different time intervals and alerting levels to choose from. enabled: false # Enable or disable the KubeSphere Alerting System. # thanosruler: # replicas: 1 # resources: {} auditing: # Provide a security-relevant chronological set of records，recording the sequence of activities happening on the platform, initiated by different tenants. enabled: false # Enable or disable the KubeSphere Auditing Log System. # operator: # resources: {} # webhook: # resources: {} dmp: enabled: true devops: # (CPU: 0.47 Core, Memory: 8.6 G) Provide an out-of-the-box CI/CD system based on Jenkins, and automated workflow tools including Source-to-Image & Binary-to-Image. enabled: false # Enable or disable the KubeSphere DevOps System. # resources: {} jenkinsMemoryLim: 8Gi # Jenkins memory limit. jenkinsMemoryReq: 4Gi # Jenkins memory request. jenkinsVolumeSize: 8Gi # Jenkins volume size. events: # Provide a graphical web console for Kubernetes Events exporting, filtering and alerting in multi-tenant Kubernetes clusters. enabled: false # Enable or disable the KubeSphere Events System. # operator: # resources: {} # exporter: # resources: {} # ruler: # enabled: true # replicas: 2 # resources: {} logging: # (CPU: 57 m, Memory: 2.76 G) Flexible logging functions are provided for log query, collection and management in a unified console. Additional log collectors can be added, such as Elasticsearch, Kafka and Fluentd. enabled: false # Enable or disable the KubeSphere Logging System. logsidecar: enabled: true replicas: 2 # resources: {} metrics_server: # (CPU: 56 m, Memory: 44.35 MiB) It enables HPA (Horizontal Pod Autoscaler). enabled: false # Enable or disable metrics-server. monitoring: storageClass: "" # If there is an independent StorageClass you need for Prometheus, you can specify it here. The default StorageClass is used by default. node_exporter: port: 9100 # resources: {} # kube_rbac_proxy: # resources: {} # kube_state_metrics: # resources: {} # prometheus: # replicas: 1 # Prometheus replicas are responsible for monitoring different segments of data source and providing high availability. # volumeSize: 20Gi # Prometheus PVC size. # resources: {} # operator: # resources: {} # alertmanager: # replicas: 1 # AlertManager Replicas. # resources: {} # notification_manager: # resources: {} # operator: # resources: {} # proxy: # resources: {} gpu: # GPU monitoring-related plug-in installation. nvidia_dcgm_exporter: # Ensure that gpu resources on your hosts can be used normally, otherwise this plug-in will not work properly. enabled: false # Check whether the labels on the GPU hosts contain "nvidia.com/gpu.present=true" to ensure that the DCGM pod is scheduled to these nodes. # resources: {} whizard: # take effect only when multicluster is enabled enabled: false server: # for host cluster nodePort: 30990 # to expose whizard gateway service by this nodePort to member cluster client: # for member cluster gatewayUrl: "" clusterName: "" multicluster: clusterRole: none # host | member | none # You can install a solo cluster, or specify it as the Host or Member Cluster. network: multus_cni: # Multus CNI enables attaching multiple network interfaces to pods in Kubernetes. enabled: false # Enable or disable multus-cni. networkpolicy: # Network policies allow network isolation within the same cluster, which means firewalls can be set up between certain instances (Pods). # Make sure that the CNI network plugin used by the cluster supports NetworkPolicy. There are a number of CNI network plugins that support NetworkPolicy, including Calico, Cilium, Kube-router, Romana and Weave Net. enabled: false # Enable or disable network policies. ippool: # Use Pod IP Pools to manage the Pod network address space. Pods to be created can be assigned IP addresses from a Pod IP Pool. type: none # Specify "calico" for this field if Calico is used as your CNI plugin. "none" means that Pod IP Pools are disabled. topology: # Use Service Topology to view Service-to-Service communication based on Weave Scope. type: none # Specify "weave-scope" for this field to enable Service Topology. "none" means that Service Topology is disabled. openpitrix: # An App Store that is accessible to all platform tenants. You can use it to manage apps across their entire lifecycle. store: enabled: true # Enable or disable the KubeSphere App Store. servicemesh: # (0.3 Core, 300 MiB) Provide fine-grained traffic management, observability and tracing, and visualized traffic topology. enabled: false # Base component (pilot). Enable or disable KubeSphere Service Mesh (Istio-based). istio: # Customizing the istio installation configuration, refer to https://istio.io/latest/docs/setup/additional-setup/customize-installation/ components: ingressGateways: - name: istio-ingressgateway enabled: false cni: enabled: false edgeruntime: # Add edge nodes to your cluster and deploy workloads on edge nodes. enabled: false kubeedge: # kubeedge configurations enabled: false cloudCore: cloudHub: advertiseAddress: # At least a public IP address or an IP address which can be accessed by edge nodes must be provided. - "" # Note that once KubeEdge is enabled, CloudCore will malfunction if the address is not provided. service: cloudhubNodePort: "30000" cloudhubQuicNodePort: "30001" cloudhubHttpsNodePort: "30002" cloudstreamNodePort: "30003" tunnelNodePort: "30004" # resources: {} # hostNetWork: false iptables-manager: enabled: true mode: "external" # resources: {} # edgeService: # resources: {} notification: history: enabled: true terminal: # image: 'alpine:3.15' # There must be an nsenter program in the image timeout: 600 # Container timeout, if set to 0, no timeout will be used. The unit is seconds springcloud: enabled: false # Refer to https://raw.githubusercontent.com/nacos-group/nacos-k8s/master/helm/values.yaml # https://github.com/kubesphere/kse-installer/blob/kse-3.3/roles/springcloud/files/springcloud-controller/charts/nacos/values.yaml # nacos: # # Default values for nacos. # # This is a YAML-formatted file. # # Declare variables to be passed into your templates. # global: # # mode: cluster # mode: standalone # ############################nacos########################### # nacos: # image: # repository: nacos/nacos-server # tag: latest # pullPolicy: IfNotPresent # plugin: # enable: true # image: # repository: nacos/nacos-peer-finder-plugin # tag: 1.1 # replicaCount: 1 # domainName: cluster.local # preferhostmode: hostname # serverPort: 8848 # health: # enabled: false # auth: # enabled: false # tokenExpireSeconds: 18000 # token: SecretKey012345678901234567890123456789012345678901234567890123456789 # cacheEnabled: false # storage: # type: embedded # # type: mysql # # db: # # host: localhost # # name: nacos # # port: 3306 # # username: usernmae # # password: password # # param: characterEncoding=utf8&connectTimeout=1000&socketTimeout=3000&autoReconnect=true&useSSL=false # persistence: # enabled: false # data: # accessModes: # - ReadWriteOnce # storageClassName: manual # resources: # requests: # storage: 5Gi # service: # type: ClusterIP # port: 8848 # ingress: # enabled: false # annotations: { } # resources: # # We usually recommend not to specify default resources and to leave this as a conscious # # choice for the user. This also increases chances charts run on environments with little # # resources, such as Minikube. If you do want to specify resources, uncomment the following # # lines, adjust them as necessary, and remove the curly braces after 'resources:'. # # limits: # # cpu: 100m # # memory: 128Mi # requests: # cpu: 500m # memory: 2Gi # annotations: {} # nodeSelector: {} # tolerations: [] # affinity: {} ``` ``` Console: http://xxx.xxx.xxx.xxx:30880 Account: admin Password: P@88w0rd ``` ### 增加补丁 ```shell! #前端部署更新 kubectl -n kubesphere-system set image deployment/ks-console ks-console=registry.cn-beijing.aliyuncs.com/kse/ks-console:v3.2.1-cmft #更新ks-apiserver 镜像 kubectl -n kubesphere-system set image deployment/ks-apiserver ks-apiserver=registry.cn-beijing.aliyuncs.com/qkcp/ks-apiserver:v3.2.1-cmft # 更新ks-controller-manager 镜像 kubectl -n kubesphere-system set image deployment/ks-controller-manager ks-controller-manager=registry.cn-beijing.aliyuncs.com/qkcp/ks-controller-manager:v3.2.1-cmft ``` ### 停止 ks-installer ``` kubectl -n kubesphere-system scale deployment ks-installer --replicas=0 ``` ### 常见命令 ``` kubectl logs -n kubesphere-system $(kubectl get pod -n kubesphere-system -l 'app in (ks-install, ks-installer)' -o jsonpath='{.items[0].metadata.name}') -f kubectl logs -n kubesphere-system $(kubectl get pod -n kubesphere-system -l 'app in (ks-apiserver)' -o jsonpath='{.items[0].metadata.name}') -f ``` ### 本地调试 sudo ./ktctl exchange ks-apiserver -n kubesphere-system --expose 9090 9090 是kse的本地 http://172.31.73.239:30880/kapis/schedule.kubesphere.io/v1alpha1/namespaces/default/analysis/analysistask-abcd1234 http://172.31.73.239:30880/kapis/config.kubesphere.io/v1alpha2/configs/configz ### 网络问题获取pod所在的node:kubectl get pod -owide -n namespace | grep podname ``` root@cmft-2:~# kubectl get pod -owide -n kubesphere-schedule-system | grep chart-1668345572-kubesphere-schedule-6974f6f857-k6qnx chart-1668345572-kubesphere-schedule-6974f6f857-k6qnx 2/2 Running 0 38m 10.233.120.78 cmft-2 <none> <none> ``` 查看指定 pod 运行的容器 ID:kubectl describe podname -n namespase 或者 docker ps | grep podname ``` root@cmft-2:~# docker ps | grep chart-1668345572-kubesphere-schedule-6974f6f857-k6qnx 66d219ac1fa7 e5506ffeb56b "/apiserver" 39 minutes ago Up 39 minutes k8s_kubesphere-schedule-api-server_chart-1668345572-kubesphere-schedule-6974f6f857-k6qnx_kubesphere-schedule-system_148eea70-32e2-43a9-9728-fe25f14642dc_0 5d1bc7008551 0533bd89edd5 "/controller-manager" 40 minutes ago Up 39 minutes k8s_kubesphere-schedule-controller_chart-1668345572-kubesphere-schedule-6974f6f857-k6qnx_kubesphere-schedule-system_148eea70-32e2-43a9-9728-fe25f14642dc_0 5c5b7b3ead94 kubesphere/pause:3.4.1 "/pause" 40 minutes ago Up 40 minutes k8s_POD_chart-1668345572-kubesphere-schedule-6974f6f857-k6qnx_kubesphere-schedule-system_148eea70-32e2-43a9-9728-fe25f14642dc_0 ``` 获得容器进程的 pid: docker inspect --format "{{.State.Pid}}" 容器 ID ``` root@cmft-2:~# docker inspect --format "{{.State.Pid}}" 66d219ac1fa7 22492 ``` 进入该容器的 network namespace: nsenter -n --target PID ``` root@cmft-2:~# nsenter -n --target 22492 ``` 使用tcpdump抓包：tcpdump -i any -vnn host 192.168.1.5 and udp and port 6033 -w name.pcap ``` tcpdump -i any -vnn host 192.168.1.5 and udp and port 6033 -w name.pcap ```