当前位置：首页 > news >正文

基于rocky linux 9.7 Kubernetes-1.35基于containerd的高可用集群安装

news 2026/5/9 4:08:17

192.168.3.60	master1.org	K8s 集群主节点 1，Master和etcd
192.168.3.61	master2.org	K8s 集群主节点 2，Master和etcd
192.168.3.62	master3.org	K8s 集群主节点 3，Master和etcd
192.168.3.63	node1.org	K8s 集群工作节点 1
192.168.3.64	node2.org	K8s 集群工作节点 2
192.168.3.65	node3.org	K8s 集群工作节点 3
192.168.3.66	ha1.org	K8s 主节点访问入口 1,提供高可用及负载均衡
192.168.3.67	ha2.org	K8s 主节点访问入口 2,提供高可用及负载均衡
192.168.3.68	kubeapi.org	VIP，在ha1和ha2主机实现

注意： Master节点内存至少2G以上，否则在初始化时会出错

初始环境准备
1.2.1 操作系统及 Kubernetes 组件版本
以下案例中部署集群使用的操作系统、容器引擎、Kubernetes 版本信息：
OS: rocky linux 9.7
Kubernetes: 1.35

dnf install lrzsz wget vim -y

在每个主机设置不同的主机名,IP 和主机名解析：

cat >> /etc/hosts <<EOF

192.168.3.68 kubeapi.org kubeapi
192.168.3.60 master1.org master1
192.168.3.61 master2.org master2
192.168.3.62 master3.org master3
192.168.3.63 node1.org node1
192.168.3.64 node2.org node2
192.168.3.65 node3.org node3
192.168.3.66 ha1.org ha1
192.168.3.67 ha2.org ha2

EOF

在集群的 Master 和各 node 同步时间

同步时间

dnf install chrony

sudo vi /etc/chrony.conf

server ntp1.aliyun.com iburst

grep -q "makestep" /etc/chrony.conf || echo "makestep 1.0 3" >> /etc/chrony.conf

sudo systemctl start chronyd

sudo systemctl enable chronyd

systemctl enable --now chronyd

chronyc -a makestep
timedatectl set-ntp true
timedatectl status

(crontab -l 2>/dev/null; echo '*/2 * * * * /usr/bin/chronyc -a makestep >/dev/null 2>&1') | crontab -

禁用 SELinux

setenforce 0
sed -i 's#^$SELINUX=$.*#\1disabled#' /etc/sysconfig/selinux

关闭防火墙

集群的 Master 和各 node 执行

、关闭firewall：

systemctl stop firewalld.service #停止firewall

systemctl disable firewalld.service #禁止firewall开机启动

2、安装iptables防火墙

yum install iptables-services #安装

systemctl restart iptables.service #最后重启防火墙使配置生效

systemctl enable iptables.service #设置防火墙开机启动

systemctl stop iptables.service #最后重启防火墙使配置生效

systemctl disable iptables.service #设置防火墙开机启动

禁用 Swap 设备

在集群的 Master 和各 node 执行

swapoff -a

sed -i '/swap/s/^/#/' /etc/fstab

实现 keepalived
在两台主机ha1和ha2 按下面步骤部署和配置 keepalived

dnf install keepalived

#keepalived配置

cp /usr/share/doc/keepalived/keepalived.conf.vrrp /etc/keepalived/keepalived.conf

#vim /etc/keepalived/keepalived.conf

#第一个节点的配置

cat /etc/keepalived/keepalived.conf 
global_defs {router_id ha1.org
}vrrp_script check_haproxy {script "killall -0 haproxy"interval 1weight -30fall 3rise 2timeout 2
}vrrp_instance VI_1 {state MASTERinterface ens33virtual_router_id 66priority 100advert_int 1authentication {auth_type PASSauth_pass 123456}virtual_ipaddress {192.168.3.68/24 dev ens33 label ens33:1}track_script {check_haproxy }}

#第二个节点的配置

global_defs {router_id ha2.org
}vrrp_instance VI_1 {state BACKUPinterface ens33virtual_router_id 66priority 80advert_int 1authentication {auth_type PASSauth_pass 123456}virtual_ipaddress { 192.168.3.68/24 dev ens33 label ens33:1}  }

systemctl start keepalived.service

systemctl enable keepalived.service

systemctl status keepalived.service

#验证keepalived服务是否正常

 hostname -I
192.168.3.66 192.168.3.68

ip a

实现 Haproxy
通过 Harproxy 实现 kubernetes Api-server的四层反向代理和负载均衡功能

#在两台主机ha1和ha2都执行下面操作

cat >> /etc/sysctl.conf <<EOF
net.ipv4.ip_nonlocal_bind = 1
EOF

sysctl -p

#安装配置haproxy

dnf -y install haproxy

vim /etc/haproxy/haproxy.cfg

cat /etc/haproxy/haproxy.cfg

cat /etc/haproxy/haproxy.cfg
#---------------------------------------------------------------------
# Example configuration for a possible web application.  See the
# full configuration options online.
#
#   https://www.haproxy.org/download/1.8/doc/configuration.txt
#
#---------------------------------------------------------------------#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global# to have these messages end up in /var/log/haproxy.log you will# need to:## 1) configure syslog to accept network log events.  This is done#    by adding the '-r' option to the SYSLOGD_OPTIONS in#    /etc/sysconfig/syslog## 2) configure local2 events to go to the /var/log/haproxy.log#   file. A line like the following can be added to#   /etc/sysconfig/syslog##    local2.*                       /var/log/haproxy.log#log         127.0.0.1 local2chroot      /var/lib/haproxypidfile     /var/run/haproxy.pidmaxconn     4000user        haproxygroup       haproxydaemon# turn on stats unix socketstats socket /var/lib/haproxy/stats# utilize system-wide crypto-policiesssl-default-bind-ciphers PROFILE=SYSTEMssl-default-server-ciphers PROFILE=SYSTEM#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------
defaultsmode                    httplog                     globaloption                  httplogoption                  dontlognulloption http-server-closeoption forwardfor       except 127.0.0.0/8option                  redispatchretries                 3timeout http-request    10stimeout queue           1mtimeout connect         10stimeout client          1mtimeout server          1mtimeout http-keep-alive 10stimeout check           10smaxconn                 3000#---------------------------------------------------------------------
# main frontend which proxys to the backends
#---------------------------------------------------------------------
frontend mainbind *:5000acl url_static       path_beg       -i /static /images /javascript /stylesheetsacl url_static       path_end       -i .jpg .gif .png .css .jsuse_backend static          if url_staticdefault_backend             app#---------------------------------------------------------------------
# static backend for serving up images, stylesheets and such
#---------------------------------------------------------------------
backend staticbalance     roundrobinserver      static 127.0.0.1:4331 check#---------------------------------------------------------------------
# round robin balancing between the various backends
#---------------------------------------------------------------------
backend appbalance     roundrobinserver  app1 127.0.0.1:5001 checkserver  app2 127.0.0.1:5002 checkserver  app3 127.0.0.1:5003 checkserver  app4 127.0.0.1:5004 check##########添加以下内容######################
listen statsmode httpbind 0.0.0.0:8888stats enablelog globalstats uri /statusstats auth admin:123456listen kubernetes-api-6443bind 192.168.3.68:6443mode tcpserver master1 192.168.3.60:6443 check inter 3s fall 3 rise 3# server master2 192.168.3.102:6443 check inter 3s fall 3 rise 3# server master3 192.168.3.103:6443 check inter 3s fall 3 rise 3

systemctl restart haproxy

systemctl status haproxy

systemctl enable haproxy

haproxy -c -f /etc/haproxy/haproxy.cfg

浏览器访问： http://ha1.org:8888/status ，可以看到下面界面

1.2.8 内核优化
集群的 Master 和各 node 执行

#开机加载内核模块
cat <<EOF | tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF

#立即加载内核模块
modprobe overlay
modprobe br_netfilter

#验证模块已加载

lsmod |grep -E 'overlay|br_netfilter'
br_netfilter 36864 0
bridge 425984 1 br_netfilter
overlay 241664 0

#设置所需的 sysctl 参数，参数在重新启动后保持不变
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
#应用 sysctl 参数生效而不重新启动
sysctl --system

所有主机安装 Containerd
包安装 Containerd
在所有节点上安装 Containerd

# 2. 安装必要的依赖
sudo dnf install -y dnf-plugins-core

# 3. 添加 Docker 官方仓库
sudo dnf config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo

# 4. 安装 Containerd
sudo dnf install -y containerd.io

#修改containerd配置基于toml(Tom's Obvious Minimal Language)格式：toml.io

mkdir -p /etc/containerd/

containerd config default > /etc/containerd/config.toml

#1）将 sandbox 镜像源设置为阿里云google_containers镜像源（国内网络需要）

grep sandbox /etc/containerd/config.toml

sed -i "s#registry.k8s.io/pause#registry.aliyuncs.com/google_containers/pause#g" /etc/containerd/config.toml

#2）配置containerd cgroup 驱动程序systemd 启用 systemd cgroup 驱动（推荐，尤其用于 Kubernetes）

sed -i 's#SystemdCgroup = false#SystemdCgroup = true#g' /etc/containerd/config.toml

grep SystemdCgroup /etc/containerd/config.toml

scp /etc/containerd/config.toml root@192.168.3.61:/etc/containerd/config.toml

for i in {61..65};do scp scp /etc/containerd/config.toml root@192.168.3.$i:/etc/containerd/config.toml;done

systemctl restart containerd

启动并设置开机自启
systemctl enable --now containerd

# 查看状态
systemctl status containerd

# 验证版本
containerd --version
ctr version

#同步到所有节点

所有主机安装 kubeadm、kubelet 和 kubectl
通过国内镜像站点阿里云和清华源安装的参考链接：

https://developer.aliyun.com/mirror/kubernetes
https://mirrors.tuna.tsinghua.edu.cn/help/kubernetes/

范例: 在所有master和node节点执行下面操作安装k8s相关包

cat <<EOF | tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.35/rpm/
enabled=1
gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.35/rpm/repodata/repomd.xml.key
exclude=kubelet kubeadm kubectl cri-tools kubernetes-cni
EOF

dnf install kubelet kubeadm kubectl --disableexcludes=kubernetes -y

systemctl enable kubelet && systemctl start kubelet

systemctl restart kubelet

systemctl status kubelet

范例：实现kubeadm 命令自动补全

# 1. 安装 bash-completion
dnf install -y bash-completion

# 2. 重新加载
source /usr/share/bash-completion/bash_completion

#方法1
[root@master1 ~]#kubeadm completion bash > /etc/profile.d/kubeadm_completion.sh
#方法2
[root@master1 ~]#echo source <(kubeadm completion bash) >> .bashsrc
#生效
bash

在第一个 master 节点初始化 Kubernetes 集群
范例: 初始化集群

K8S_RELEASE_VERSION=1.35.3

kubeadm init --kubernetes-version=v${K8S_RELEASE_VERSION} --control-plane-endpoint kubeapi.org --pod-network-cidr 10.244.0.0/16 --service-cidr 10.96.0.0/12 --token-ttl=0 --image-repository registry.aliyuncs.com/google_containers --upload-certs

cat k8s-init.log 
Your Kubernetes control-plane has initialized successfully!To start using your cluster, you need to run the following as a regular user:mkdir -p $HOME/.kubesudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/configsudo chown $(id -u):$(id -g) $HOME/.kube/configAlternatively, if you are the root user, you can run:export KUBECONFIG=/etc/kubernetes/admin.confYou should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:https://kubernetes.io/docs/concepts/cluster-administration/addons/

You can now join any number of control-plane nodes running the following command on each as root:kubeadm join kubeapi.org:6443 --token kg6wnu.jpxd72a0p4f497mn \--discovery-token-ca-cert-hash sha256:774ccb51e7ec0382eaa726c93caa2cd95c7fcd3af78340a0b96e8fd4bc8347a2 \--control-plane --certificate-key e39113cb5cc6b79c00769542301db2830db26c19080936b6a857e96d4b043d21Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.Then you can join any number of worker nodes by running the following on each as root:kubeadm join kubeapi.org:6443 --token kg6wnu.jpxd72a0p4f497mn \--discovery-token-ca-cert-hash sha256:774ccb51e7ec0382eaa726c93caa2cd95c7fcd3af78340a0b96e8fd4bc8347a2

crictl images

如果想重新初始化,可以执行下面

#如果有工作节点,先在工作节点执行,再在control节点执行下面操作
kubeadm reset -f
rm -rf /etc/cni/net.d/ $HOME/.kube/config

在第一个 master 节点生成 kubectl 命令的授权文件
kubectl是kube-apiserver的命令行客户端程序，实现了除系统部署之外的几乎全部的管理操作，是
kubernetes管理员使用最多的命令之一。kubectl需经由API server认证及授权后方能执行相应的管理操作，kubeadm部署的集群为其生成了一个具有管理员权限的认证配置文
件/etc/kubernetes/admin.conf，它可由kubectl通过默认的“$HOME/.kube/config”的路径进行加载。
当然，用户也可在kubectl命令上使用--kubeconfig选项指定一个别的位置。
下面复制认证为Kubernetes系统管理员的配置文件至目标用户（例如当前用户root）的家目录下

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config

在第一个 master 节点配置网络组件
Kubernetes系统上Pod网络的实现依赖于第三方插件进行，这类插件有近数十种之多，较为著名的有
flannel、calico、canal和kube-router等，简单易用的实现是为CoreOS提供的flannel项目。下面的命令
用于在线部署flannel至Kubernetes系统之上：
首先，下载适配系统及硬件平台环境的flanneld至每个节点，并放置于/opt/bin/目录下。我们这里选用
flanneld-amd64，目前最新的版本为v0.19.1，因而，我们需要在集群的每个节点上执行如下命令：
提示：下载flanneld的地址为 https://github.com/flannel-io/flannel/releases
随后，在初始化的第一个master节点k8s-master01上运行如下命令，向Kubernetes部署kube-flannel

#默认没有网络插件,所以显示如下状态

kubectl get node
NAME          STATUS     ROLES           AGE     VERSION
master1.org   NotReady   control-plane   3m27s   v1.35.4

wget https://raw.githubusercontent.com/flannelio/flannel/master/Documentation/kube-flannel.yml

导入镜像

ctr image import

ctr images import flannel.tar

查看镜像

ctr image ls

kubectl apply -f kube-flannel.yml

#稍等一会儿,可以看到下面状态

kubectl  get pod -A
NAMESPACE      NAME                                  READY   STATUS    RESTARTS   AGE
kube-flannel   kube-flannel-ds-9k4l6                 1/1     Running   0          51s
kube-system    coredns-bbdc5fdf6-kdhr5               1/1     Running   0          14m
kube-system    coredns-bbdc5fdf6-xtwl2               1/1     Running   0          14m
kube-system    etcd-master1.org                      1/1     Running   0          15m
kube-system    kube-apiserver-master1.org            1/1     Running   0          15m
kube-system    kube-controller-manager-master1.org   1/1     Running   0          15m
kube-system    kube-proxy-2z5qp                      1/1     Running   0          14m
kube-system    kube-scheduler-master1.org            1/1     Running   0          15m
[root@master1 ~]# 
[root@master1 ~]# kubectl get node   
NAME          STATUS   ROLES           AGE   VERSION
master1.org   Ready    control-plane   15m   v1.35.4

扩展 Kubernetes 集群为多主模式

kubeadm join kubeapi.org:6443 --token kg6wnu.jpxd72a0p4f497mn \
--discovery-token-ca-cert-hash sha256:774ccb51e7ec0382eaa726c93caa2cd95c7fcd3af78340a0b96e8fd4bc8347a2 \
--control-plane --certificate-key e39113cb5cc6b79c00769542301db2830db26c19080936b6a857e96d4b043d21

mkdir -p $HOME/.kube

cp -i /etc/kubernetes/admin.conf $HOME/.kube/config

kubectl get node
NAME          STATUS   ROLES           AGE     VERSION
master1.org   Ready    control-plane   25m     v1.35.4
master2.org   Ready    control-plane   6m18s   v1.35.4
master3.org   Ready    control-plane   4m30s   v1.35.4

将所有 worker 节点加入 Kubernetes 集群
在所有worker节点执行下面操作,加上集群

kubeadm join kubeapi.org:6443 --token kg6wnu.jpxd72a0p4f497mn \
--discovery-token-ca-cert-hash sha256:774ccb51e7ec0382eaa726c93caa2cd95c7fcd3af78340a0b96e8fd4bc8347a2

kubectl get node
NAME          STATUS   ROLES           AGE     VERSION
master1.org   Ready    control-plane   28m     v1.35.4
master2.org   Ready    control-plane   9m16s   v1.35.4
master3.org   Ready    control-plane   7m28s   v1.35.4
node1.org     Ready    <none>          71s     v1.35.4
node2.org     Ready    <none>          59s     v1.35.4
node3.org     Ready    <none>          52s     v1.35.4