Contents

阿里云ECS搭建高可用k8s集群及Terway、CCM、SLB配置

简介

在阿里云 ECS 上自建k8s时,除自行配置Nginx对外提供服务外,还可以使用阿里云官方的CNI插件Terway以及CCM 使用SLB对外提供服务,本文主要介绍该方式

约定

最小服务器要求

节点名 内网IP 配置 系统 备注
k8s-01 192.168.1.27 4C8G CentOS 7.X control plane、ETCD
k8s-02 192.168.1.28 4C8G CentOS 7.X control plane、ETCD
k8s-03 192.168.1.29 4C8G CentOS 7.X control plane、ETCD
k8s-04 192.168.1.30 4C16G CentOS 7.X Node
k8s-05 192.168.1.31 4C16G CentOS 7.X Node
  • control plane 和 ETCD 为保证高可用,建议数量为>=3的奇数

  • 阿里云 CCM 任何时候均不会把Master节点作为SLB后端

版本约定

目前github上 terway 的deployment文件仅支持 k8s 1.22以下的版本,我们使用1.21的最后一个版本:1.21.13

开始安装

IPVS

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
cat > /etc/sysctl.d/k8s.conf << EOF
net.ipv4.ip_forward                 = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF

# 使设置生效
sysctl --system
# 创建 modules 文件,保证在节点重启后能自动加载所需模块
cat > /etc/sysconfig/modules/ipvs.modules <<EOF
#!/bin/bash
modprobe br_netfilter
modprobe -- ip_vs
modprobe -- ip_vs_lc
modprobe -- ip_vs_wlc
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_lblc
modprobe -- ip_vs_lblcr
modprobe -- ip_vs_dh
modprobe -- ip_vs_sh
# 3.10 内核不支持
#modprobe -- ip_vs_fo
modprobe -- ip_vs_nq
modprobe -- ip_vs_sed
modprobe -- ip_vs_ftp
EOF

chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack_ipv4
yum install ipset ipvsadm  -y
# 检查是否安装 成功 
lsmod | grep -e ip_vs -e nf_conntrack_ipv4

输出以下信息,表示安装成功

https://statics.lshell.com//images/asynccode

Haproxy

因阿里云SLB不支持服务器同时作为客户端和服务商,故我们使用Haproxy 和 Nginx,来进行master节点的负载均衡。因Nginx可能还会被用途web服务器中间件,会涉及到重启影响负载效果,在这里我们以Haproxy为例

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# haproxy的安装和配置都非常简单
yum install -y haproxy
# cat cat /etc/haproxy/haproxy.cfg
global
    log 127.0.0.1 local0
    log 127.0.0.1 local1 notice
 
    maxconn 4096
    chroot /var/lib/haproxy
    user haproxy
    group haproxy
    daemon
defaults
    log global
    mode tcp
    option tcplog
    option dontlognull
    retries 3
    option redispatch
    maxconn 2000
#    contimeout 5000
#    clitimeout 50000
#    srvtimeout 50000
    timeout connect 5000
    timeout client 50000
    timeout server 50000
listen zabbixServer
    bind 127.0.0.1:6443
    server k8s-01 192.168.1.27:5443 check inter 3000 fall 3 rise 3
    server k8s-02 192.168.1.28:5443 check inter 3000 fall 3 rise 3
    server k8s-03 192.168.1.29:5443 check inter 3000 fall 3 rise 3
  
  # 开机启动
  systemctl enable haproxy
  # 启动 haproxy
  systemctl start haproxy
  
  # 检查
  ss -anltp | grep haproxy

ETCD

安装

使用以下Shell脚本进行安装最新的ETCD版本

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/bin/bash

. /etc/profile

function before_install() {
    getent group etcd >/dev/null || groupadd -r etcd
    getent passwd etcd >/dev/null || useradd -r -g etcd -d /var/lib/etcd -s /sbin/nologin -c "etcd user" etcd

    if [ ! -d "/var/lib/etcd" ]; then
        mkdir /var/lib/etcd
        chown -R etcd:etcd /var/lib/etcd
    fi
}

function get_cfssl_version(){
    local cfssl_version=$(curl -s https://api.github.com/repos/cloudflare/cfssl/releases/latest | grep tag_name | cut -d '"' -f 4| cut -b 2-)
    echo $cfssl_version
}

function install_cfssl(){
    local cfssl_version=$(get_cfssl_version)
    local cfssl_url="https://github.com/cloudflare/cfssl/releases/download/v${cfssl_version}/cfssl_${cfssl_version}_linux_amd64"
    local cfssl_bin="cfssl"
    local cfssl_bin_path="/usr/local/bin/${cfssl_bin}"

    local cfssljson_url="https://github.com/cloudflare/cfssl/releases/download/v${cfssl_version}/cfssljson_${cfssl_version}_linux_amd64"
    local cfssljson_bin="cfssljson"
    local cfssljson_bin_path="/usr/local/bin/${cfssljson_bin}"

    echo "cfssl_version: ${cfssl_version}"

    echo "Downloading cfssl..."
    rm -f ${cfssl_bin_path}
    wget -q -O ${cfssl_bin_path} ${cfssl_url}
    if [ $? -ne 0 ]; then
        echo "Failed to download cfssl."
        exit 1
    fi
    chmod +x ${cfssl_bin_path}
    echo "cfssl installed."
    
    echo "Downloading cfssljson..."
    rm -f ${cfssljson_bin_path}
    wget -q -O ${cfssljson_bin_path} ${cfssljson_url}
    if [ $? -ne 0 ]; then
        echo "Failed to download cfssljson."
        exit 1
    fi
    chmod +x ${cfssljson_bin_path}
    echo "cfssljson installed."
}

function get_etcd_version() {
    local etcd_version=$(curl -s https://api.github.com/repos/etcd-io/etcd/releases/latest | grep tag_name | cut -d '"' -f 4)
    echo $etcd_version
}

function install_etcd() {
    local etcd_version=$(get_etcd_version)
    local etcd_url="https://github.com/etcd-io/etcd/releases/download/${etcd_version}/etcd-${etcd_version}-linux-amd64.tar.gz"
    local etcd_file="etcd-${etcd_version}-linux-amd64.tar.gz"
    local etcd_dir="etcd-${etcd_version}-linux-amd64"

    echo "etcd_version: ${etcd_version}"

    cd /tmp
    echo "Downloading etcd..."
    wget -q -O ${etcd_file} ${etcd_url}
    if [ $? -ne 0 ]; then
        echo "download etcd failed"
        exit 1
    fi

    tar -xzf ${etcd_file}
    rm -f /usr/local/bin/etcd /usr/local/bin/etcdctl /usr/local/bin/etcdutl
    mv ${etcd_dir}/etcd /usr/local/bin/
    mv ${etcd_dir}/etcdctl /usr/local/bin/
    mv ${etcd_dir}/etcdutl /usr/local/bin/
    chmod +x /usr/local/bin/etcd /usr/local/bin/etcdctl /usr/local/bin/etcdutl
    echo "etcd installed successfully"

    echo "clean up..."
    rm -f ${etcd_file}
    rm -rf ${etcd_dir}

    cd -
}

function etcd_systemd() {
    cat > /etc/systemd/system/etcd.service << EOF
[Unit]
Description=etcd - highly-available key value store
Documentation=https://github.com/coreos/etcd
After=network.target
Wants=network-online.target

[Service]
#Type=notify
Type=simple
User=etcd
ExecStart=/usr/local/bin/etcd  --config-file /etc/etcd/etcd.yaml
Restart=on-failure
RestartSec=10s

LimitNOFILE=65536
#MemoryLow=200M
Nice=-10
IOSchedulingClass=best-effort
IOSchedulingPriority=2

[Install]
WantedBy=multi-user.target
EOF
}

function after_install() {
    mkdir -p /data/etcd/{data,wal}
    chown -R etcd:etcd /data/etcd

    mkdir -p /etc/etcd/{cfssl, ssl}
    chown -R etcd:etcd /etc/etcd

    echo "Installing systemd service..."
    etcd_systemd
    systemctl enable etcd
    
    grep 'ETCDCTL_API=3' /etc/profile > /dev/null 2>&1
    if [ $? -ne 0 ]; then
        echo "export ETCDCTL_API=3" >> /etc/profile
        . /etc/profile
    fi
    
    grep '$PATH:/usr/local/bin' /etc/profile > /dev/null 2>&1
    if [ $? -ne 0 ]; then
        echo 'export PATH=$PATH:/usr/local/bin' >> /etc/profile
    fi
    
    . /etc/profile
}

function main(){
    before_install

    install_etcd
    install_cfssl

    after_install
}

main

生成证书

需要建立证书配置文件:etcd-root-ca-csr.json、etcd-csr.json、etcd-gencert.json,具体为:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# hosts 中需要根据实际情况修改
# cat etcd-csr.json
{
    "key": {
        "algo": "rsa",
        "size": 2048 
    },
    "names": [
        {
            "O": "etcd",
            "OU": "etcd Security",
            "L": "Chengdu",
            "ST": "Sichuan",
            "C": "CN"
        }
    ],
    "CN": "etcd",
    "hosts": [
        "127.0.0.1",
        "localhost",
        "*.etcd.node",
        "*.kubernetes.node",
        "192.168.1.27",
        "192.168.1.28",
        "192.168.100.29"
    ]
}
# cat etcd-gencert.json
{
  "signing": {
    "default": {
        "usages": [
          "signing",
          "key encipherment",
          "server auth",
          "client auth"
        ],
        "expiry": "87600h"
    }
  }
}
# etcd-root-ca-csr.json
{
    "CN": "etcd-root-ca",
    "key": {
        "algo": "rsa",
        "size": 4096
    },
    "names": [
        {
            "O": "etcd",
            "OU": "etcd Security",
            "L": "Chengdu",
            "ST": "Sichuan",
            "C": "CN"
        }
    ],
    "ca": {
        "expiry": "87600h"
    }
}

生成证书

1
2
3
4
5
6
#!/usr/bin/env bash

set -e

cfssl gencert --initca=true etcd-root-ca-csr.json | cfssljson --bare etcd-root-ca
cfssl gencert --ca etcd-root-ca.pem --ca-key etcd-root-ca-key.pem --config etcd-gencert.json etcd-csr.json | cfssljson --bare etcd

复制证书到 ssl 目录

1
2
3
4
# 复制证书到etcd目录
cp -rf /etc/etcd/cfssl/*.pem /etc/etcd/ssl
# 更新权限
chown etcd: /etc/etcd/ssl

配置文件

三台ETCD节点,都需要根据实际情况配置

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# This is the configuration file for the etcd server.

# Human-readable name for this member.
# 不能重复
name: 'etcd-01'

# Path to the data directory.
data-dir: '/data/etcd/data'

# Path to the dedicated wal directory.
wal-dir: '/data/etcd/wal'

# Number of committed transactions to trigger a snapshot to disk.
#snapshot-count: 10000
snapshot-count: 1000

# Time (in milliseconds) of a heartbeat interval.
heartbeat-interval: 100

# Time (in milliseconds) for an election to timeout.
election-timeout: 1000

# Raise alarms when backend size exceeds the given quota. 0 means use the
# default quota.
#quota-backend-bytes: 0
# 5G
quota-backend-bytes: 5368709120

# List of comma separated URLs to listen on for peer traffic.
listen-peer-urls: https://192.168.1.27:2380

# List of comma separated URLs to listen on for client traffic.
listen-client-urls: https://192.168.1.27:2379,https://127.0.0.1:2379

# Maximum number of snapshot files to retain (0 is unlimited).
max-snapshots: 10

# Maximum number of wal files to retain (0 is unlimited).
max-wals: 10

# Comma-separated white list of origins for CORS (cross-origin resource sharing).
cors:

# List of this member's peer URLs to advertise to the rest of the cluster.
# The URLs needed to be a comma-separated list.
initial-advertise-peer-urls: https://192.168.1.27:2380

# List of this member's client URLs to advertise to the public.
# The URLs needed to be a comma-separated list.
advertise-client-urls: https://192.168.1.27:2379

# Discovery URL used to bootstrap the cluster.
discovery:

# Valid values include 'exit', 'proxy'
discovery-fallback: 'proxy'

# HTTP proxy to use for traffic to discovery service.
discovery-proxy:

# DNS domain used to bootstrap initial cluster.
discovery-srv:

# Initial cluster configuration for bootstrapping.
initial-cluster: etcd-01=https://192.168.1.27:2380,etcd-02=https://192.168.1.28:2380,etcd-03=https://192.168.1.29:2380

# Initial cluster token for the etcd cluster during bootstrap.
# 随机字符串,相同集群内,该配置需要一致
# 可使用shell命令生成: openssl rand -base64 15 
initial-cluster-token: 'krMWJC2ha5'

# Initial cluster state ('new' or 'existing').
initial-cluster-state: 'new'

# Reject reconfiguration requests that would cause quorum loss.
strict-reconfig-check: true

# Enable runtime profiling data via HTTP server
enable-pprof: true

# Valid values include 'on', 'readonly', 'off'
proxy: 'off'

# Time (in milliseconds) an endpoint will be held in a failed state.
proxy-failure-wait: 5000

# Time (in milliseconds) of the endpoints refresh interval.
proxy-refresh-interval: 30000

# Time (in milliseconds) for a dial to timeout.
proxy-dial-timeout: 1000

# Time (in milliseconds) for a write to timeout.
proxy-write-timeout: 5000

# Time (in milliseconds) for a read to timeout.
proxy-read-timeout: 0

client-transport-security:
  # Path to the client server TLS cert file.
  cert-file: /etc/etcd/ssl/etcd.pem

  # Path to the client server TLS key file.
  key-file: /etc/etcd/ssl/etcd-key.pem

  # Enable client cert authentication.
  client-cert-auth: true

  # Path to the client server TLS trusted CA cert file.
  trusted-ca-file: /etc/etcd/ssl/etcd-root-ca.pem

  # Client TLS using generated certificates
  auto-tls: true

peer-transport-security:
  # Path to the peer server TLS cert file.
  cert-file: /etc/etcd/ssl/etcd.pem

  # Path to the peer server TLS key file.
  key-file: /etc/etcd/ssl/etcd-key.pem

  # Enable peer client cert authentication.
  client-cert-auth: true

  # Path to the peer server TLS trusted CA cert file.
  trusted-ca-file: /etc/etcd/ssl/etcd-root-ca.pem

  # Peer TLS using generated certificates.
  auto-tls: true

# The validity period of the self-signed certificate, the unit is year.
self-signed-cert-validity: 1

# Enable debug-level logging for etcd.
log-level: debug

logger: zap

# Specify 'stdout' or 'stderr' to skip journald logging even when running under systemd.
log-outputs: [systemd/journal]

# Force to create a new one member cluster.
force-new-cluster: false

auto-compaction-mode: revision
auto-compaction-retention: "300"

常用命令

1
2
3
4
# 健康检查
etcdctl endpoint health --cacert /etc/etcd/ssl/etcd-root-ca.pem --cert /etc/etcd/ssl/etcd.pem --key /etc/etcd/ssl/etcd-key.pem --endpoints https://192.168.1.27:2379,https://192.168.1.28:2379,https://192.168.1.30:2379
# 清空 ETCD 数据 慎重
etcdctl --cacert /etc/etcd/ssl/etcd-root-ca.pem --cert /etc/etcd/ssl/etcd.pem --key /etc/etcd/ssl/etcd-key.pem --endpoints https://192.168.1.27:2379,https://192.168.1.28:2379,https://192.168.1.30:2379 del "" --prefix

Docker

镜像源

大陆以外地区推荐直接使用官方源

国内服务器建议使用阿里云镜像源:https://developer.aliyun.com/mirror/docker-ce

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# 阿里ECS 可以直接使用阿里云内部的源
# cat /etc/yum.repos.d/docker-ce.repo
[docker-ce-stable]
name=Docker CE Stable - $basearch
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/$basearch/stable
enabled=1
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-stable-debuginfo]
name=Docker CE Stable - Debuginfo $basearch
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/debug-$basearch/stable
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-stable-source]
name=Docker CE Stable - Sources
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/source/stable
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-test]
name=Docker CE Test - $basearch
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/$basearch/test
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-test-debuginfo]
name=Docker CE Test - Debuginfo $basearch
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/debug-$basearch/test
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-test-source]
name=Docker CE Test - Sources
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/source/test
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-nightly]
name=Docker CE Nightly - $basearch
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/$basearch/nightly
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-nightly-debuginfo]
name=Docker CE Nightly - Debuginfo $basearch
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/debug-$basearch/nightly
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-nightly-source]
name=Docker CE Nightly - Sources
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/source/nightly
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

安装和配置

为配合 k8s 1.21.13,安装docker版本 20.10.14,安装命令如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# 更新yum 缓存
yum makecache fast
# 安装
yum -y install docker-ce-20.10.14-3.el7
# 配置
# registry-mirror 仅限公司内部使用
# cat /etc/docker/daemon.json 
{
  "exec-opts": ["native.cgroupdriver=systemd"],
  "registry-mirror": [
      "https://e88hruwq.mirror.aliyuncs.com" 
  ],
  "data-root": "/data/docker_data",
  "log-driver": "json-file",
  "log-level": "",
  "log-opts": {
      "max-size": "10m",
      "max-file": "3"
  }
}
# 开机启动
systemctl enable docker
systemctl restart docker

Kubernetes

Pod Switch

terway依赖于阿里云的交换机,需要在阿里云控制台创建一个ECS可用区的交换机作为Pod子网,

本文以 192.168.32.0/19 例

https://statics.lshell.com//images/asynccode

若使用 kube-flannel 作为集群CNI 插件则不需要该配置

镜像源

大陆以外地区推荐直接使用官方源

国内同样推荐使用阿里云镜像源:https://developer.aliyun.com/mirror/kubernetes

ECS可以直接使用阿里内网镜像源

1
2
3
4
5
6
7
8
# cat /etc/yum.repos.d/kubernetes.repo 
[kubernetes]
name=Kubernetes
baseurl=http://mirrors.cloud.aliyuncs.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/kubernetes/yum/doc/yum-key.gpg http://mirrors.cloud.aliyuncs.com/kubernetes/yum/doc/rpm-package-key.gpg

组件安装

1
2
3
4
5
yum install -y kubelet-1.21.13-0 kubeadm-1.21.13-0 kubectl-1.21.13-0 ebtables ethtool
# 关闭升级,防止引起问题
echo 'exclude=kubelet kubeadm kubectl docker-ce docker-ce-cli' >> /etc/yum.conf

/usr/lib/systemd/system/kubelet.service

根据ccm要求修改 kubelet服务启动参数

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
# cat /usr/lib/systemd/system/kubelet.service
[Unit]
Description=kubelet: The Kubernetes Node Agent
Documentation=https://kubernetes.io/docs/
Wants=network-online.target
After=network-online.target

[Service]
#ExecStart=/usr/bin/kubelet
ExecStart=/usr/bin/kubelet --cloud-provider=external --hostname-override=xxx.xxx --provider-id=xxx.xxx
Restart=always
StartLimitInterval=0
RestartSec=10

[Install]
WantedBy=multi-user.target

hostname-override 和 provider-id 的值需要在对应的ECS上执行下面的命令获取:

echo curl -s http://100.100.100.200/latest/meta-data/region-id.curl -s http://100.100.100.200/latest/meta-data/instance-id

修改完成后需要重载 systemd 并添加开机启动

1
2
systemctl daemon-reload
systemctl enable kubelet

k8s安装

kubeadm 配置

以下是kubeadm.yaml文件示例

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
apiVersion: kubeadm.k8s.io/v1beta2
kind: InitConfiguration
#nodeRegistration:
#    criSocket: /run/containerd/containerd.sock
#    name: containerd
localAPIEndpoint:
  # 第一个 master 节点 IP
  advertiseAddress: "192.168.1.27"
  # 6443 留给了 nginx,apiserver 换到 5443
  bindPort: 5443
# 这个 token 使用以下命令生成
# kubeadm certs certificate-key
certificateKey: e8734dd17eaab2d55cfb363037f0d27cdd0f0cf1016d163d07428ac3aa9e19f1
---
apiVersion: kubeadm.k8s.io/v1beta2
kind: ClusterConfiguration
# 使用外部 etcd 配置
etcd:
  external:
    endpoints:
    - "https://192.168.1.27:2379"
    - "https://192.168.1.28:2379"
    - "https://192.168.1.29:2379"
    caFile: "/etc/etcd/ssl/etcd-root-ca.pem"
    certFile: "/etc/etcd/ssl/etcd.pem"
    keyFile: "/etc/etcd/ssl/etcd-key.pem"
# 网络配置
networking:
  serviceSubnet: "10.25.0.0/16"
  # flannel
  #podSubnet: "10.30.0.1/16"
  # terway
  # 需要在阿里控制面板创建专门的虚拟交换机给pod使用
  podSubnet: "192.168.32.0/19"
  dnsDomain: "cluster.local"
kubernetesVersion: "v1.21.13"
# 全局 apiserver LB 地址,由于采用了 nginx/haproxy 负载,所以直接指向本地既可
controlPlaneEndpoint: "127.0.0.1:6443"
apiServer:
  # apiserver 的自定义扩展参数
  extraArgs:
    v: "4"
    alsologtostderr: "true"
    # 审计日志相关配置
    audit-log-maxage: "20"
    audit-log-maxbackup: "10"
    audit-log-maxsize: "100"
    audit-log-path: "/var/log/kube-audit/audit.log"
    audit-policy-file: "/etc/kubernetes/audit-policy.yaml"
    authorization-mode: "Node,RBAC"
    event-ttl: "720h"
    runtime-config: "api/all=true"
    service-node-port-range: "30000-50000"
    service-cluster-ip-range: "10.25.0.0/16"
  # 由于自行定义了审计日志配置,所以需要将宿主机上的审计配置
  # 挂载到 kube-apiserver 的 pod 容器中
  extraVolumes:
  - name: "audit-config"
    hostPath: "/etc/kubernetes/audit-policy.yaml"
    mountPath: "/etc/kubernetes/audit-policy.yaml"
    readOnly: true
    pathType: "File"
  - name: "audit-log"
    hostPath: "/var/log/kube-audit"
    mountPath: "/var/log/kube-audit"
    pathType: "DirectoryOrCreate"
  # 这里是 apiserver 的证书地址配置
  # 为了防止以后出特殊情况,我增加了一个泛域名
  certSANs:
  - "*.k8s.node"
  - "192.168.1.27"
  - "192.168.1.28"
  - "192.168.1.30"
  - "127.0.0.1"
  timeoutForControlPlane: 5m
controllerManager:
  extraArgs:
    v: "4"
    # 宿主机 ip 掩码
    node-cidr-mask-size: "19"
    deployment-controller-sync-period: "10s"
    experimental-cluster-signing-duration: "87600h"
    node-monitor-grace-period: "20s"
    pod-eviction-timeout: "2m"
    terminated-pod-gc-threshold: "30"
scheduler:
  extraArgs:
    v: "4"
certificatesDir: "/etc/kubernetes/pki"
# gcr.io 被墙
imageRepository: "registry.aliyuncs.com/google_containers"
#imageRepository: "registry.cn-chengdu.aliyuncs.com/archky-k8s"
clusterName: "kuberentes"
---
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
# kubelet specific options here
# 此配置保证了 kubelet 能在 swap 开启的情况下启动
failSwapOn: false
nodeStatusUpdateFrequency: 5s
# 一些驱逐阀值,具体自行查文档修改
evictionSoft:
  "imagefs.available": "15%"
  "memory.available": "512Mi"
  "nodefs.available": "15%"
  "nodefs.inodesFree": "10%"
evictionSoftGracePeriod:
  "imagefs.available": "3m"
  "memory.available": "1m"
  "nodefs.available": "3m"
  "nodefs.inodesFree": "1m"
evictionHard:
  "imagefs.available": "10%"
  "memory.available": "256Mi"
  "nodefs.available": "10%"
  "nodefs.inodesFree": "5%"
evictionMaxPodGracePeriod: 30
imageGCLowThresholdPercent: 70
imageGCHighThresholdPercent: 80
kubeReserved:
  "cpu": "500m"
  "memory": "512Mi"
  "ephemeral-storage": "1Gi"
rotateCertificates: true
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
# kube-proxy specific options here
# 建议与 podSubnet 保持一致
clusterCIDR: "192.168.32.0/19"
# 启用 ipvs 模式
mode: "ipvs"
ipvs:
  minSyncPeriod: 5s
  syncPeriod: 5s
  # ipvs 负载策略
  scheduler: "wrr"

请仔细查看 kubeadm.yml的配置及相关注释

安全组

为保证内网互通,需要到阿里云安全组中允许 kubeadm.yaml中的几个网段互通

安装 Control Plane
1
2
3
4
# 安装前先检查配置文件
kubeadm init phase preflight --config=kubeadm.yaml
# --v 指定日志级别
kubeadm init --config kubeadm.yaml --upload-certs --v=6

正常情况下,命令执行完成后会打印类似信息

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

You should now deploy a Pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  /docs/concepts/cluster-administration/addons/

You can now join any number of machines by running the following on each node
as root:

  kubeadm join 127.0.0.1:6443 --token <token> --discovery-token-ca-cert-hash sha256:<hash>
Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 127.0.0.1:6443 --token <token> \
    --discovery-token-ca-cert-hash sha256:<hash>
配置 kubectl

可使用以下脚本配置 kubectl命令

1
2
3
4
5
6
#!/bin/bash

rm -rf $HOME/.kube
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
检查集群状态
1
2
3
4
5
6
# 查看集群组件状态
kubectl get cs
# 查看 节点状态
kubectl get node
# 查看所有pod状态
kubectl get pod -A
卸载集群

当集群或节点安装失败时,我们可以卸载后重新安装

1
2
3
4
5
6
7
8
9
# 删除节点
kubectl drain <node_name> --delete-emptydir-data --force --ignore-daemonsets
# 重置集群
kubeadm reset
# 删除多余的虚拟网卡
ip link del flannel.1
ip link del cni0
# 清空 ETCD 数据 慎重
etcdctl --cacert /etc/etcd/ssl/etcd-root-ca.pem --cert /etc/etcd/ssl/etcd.pem --key /etc/etcd/ssl/etcd-key.pem --endpoints https://192.168.1.27:2379,https://192.168.1.28:2379,https://192.168.1.30:2379 del "" --prefix

安装CNI

阿里云有关 Terway和Flannel的选择和对比,可以看阿里云官方文档:Kubernetes集群网络规划

Terway

建议先阅读

  1. 官方文档:使用Terway网络插件

  2. Github 文档:Terway 网络插件

Terway有两种安装模式:

  • VPC模式

    • ​ VPC模式,使用Aliyun VPC路由来打通网络,可以使用独立ENI给Pod,安装方式: 修改terway.yml文件中的eni.conf的配置中的授权和网段配置,以及Network的网段配置,然后通过kubectl apply -f terway.yml来安装terway插件。
  • ENI多IP模式

    • ENI多IP模式,使用Aliyun ENI的辅助IP来打通网络,不受VPC的路由条目限制,安装方式: 修改terway-multiip.yml文件中的eni.conf的配置中的授权和资源配置,然后通过kubectl apply -f terway-multiip.yml来安装terway插件。

因为Terway会自动创建弹性网卡,所以它需要相关权限

在阿里控制台创建子用户并添加相关的权限,如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
{
  "Version": "1",
  "Statement": [
    {
      "Action": [
        "ecs:CreateNetworkInterface",
        "ecs:DescribeNetworkInterfaces",
        "ecs:AttachNetworkInterface",
        "ecs:DetachNetworkInterface",
        "ecs:DeleteNetworkInterface",
        "ecs:DescribeInstanceAttribute",
        "ecs:DescribeInstanceTypes",
        "ecs:AssignPrivateIpAddresses",
        "ecs:UnassignPrivateIpAddresses",
        "ecs:DescribeInstances",
        "ecs:ModifyNetworkInterfaceAttribute"
      ],
      "Resource": [
        "*"
      ],
      "Effect": "Allow"
    },
    {
      "Action": [
        "vpc:DescribeVSwitches"
      ],
      "Resource": [
        "*"
      ],
      "Effect": "Allow"
    }
  ]
}

我们使用 terway的 ENI多IP模式

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
apiVersion: v1
kind: ServiceAccount
metadata:
  name: terway
  namespace: kube-system

---

kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: terway-pod-reader
  namespace: kube-system
rules:
  - apiGroups: [ "" ]
    resources: [ "pods", "nodes", "namespaces", "configmaps", "serviceaccounts" ]
    verbs: [ "get", "watch", "list", "update" ]
  - apiGroups: [ "" ]
    resources:
      - events
    verbs:
      - create
  - apiGroups: [ "networking.k8s.io" ]
    resources:
      - networkpolicies
    verbs:
      - get
      - list
      - watch
  - apiGroups: [ "extensions" ]
    resources:
      - networkpolicies
    verbs:
      - get
      - list
      - watch
  - apiGroups: [ "" ]
    resources:
      - pods/status
    verbs:
      - update
  - apiGroups: [ "crd.projectcalico.org" ]
    resources: [ "*" ]
    verbs: [ "*" ]

---

apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
  name: terway-binding
  namespace: kube-system
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: terway-pod-reader
subjects:
  - kind: ServiceAccount
    name: terway
    namespace: kube-system

---

kind: ConfigMap
apiVersion: v1
metadata:
  name: eni-config
  namespace: kube-system
data:
  eni_conf: |
    {
      "version": "1",
      "access_key": "ak",
      "access_secret": "ak",
      "security_group": "sg-xxx",
      "service_cidr": "10.25.0.0/16",
      "vswitches": {
        "cn-hongkong-b": ["vsw-xxx"]
      },
      "max_pool_size": 5,
      "min_pool_size": 0
    }    
  10-terway.conf: |
    {
      "cniVersion": "0.3.1",
      "name": "terway",
      "type": "terway",
      "eniip_virtual_type": "Veth",
      "ip_stack": "ipv4"
    }    
  # eniip_virtual_type: virtual type for eni multi ip "Veth" || "IPVlan"
  disable_network_policy: "false"

---

apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: terway
  namespace: kube-system
spec:
  selector:
    matchLabels:
      app: terway
  template:
    metadata:
      labels:
        app: terway
      annotations:
        scheduler.alpha.kubernetes.io/critical-pod: ''
    spec:
      hostPID: true
      nodeSelector:
        beta.kubernetes.io/arch: amd64
      tolerations:
        - operator: "Exists"
      terminationGracePeriodSeconds: 0
      serviceAccountName: terway
      hostNetwork: true
      initContainers:
        - name: terway-init
          image: registry.aliyuncs.com/acs/terway:v1.2.3
          imagePullPolicy: IfNotPresent
          securityContext:
            privileged: true
          command:
            - 'sh'
            - '-c'
            - 'cp /usr/bin/terway /opt/cni/bin/;
                  chmod +x /opt/cni/bin/terway;
                  cp /etc/eni/10-terway.conf /etc/cni/net.d/;
                  sysctl -w net.ipv4.conf.eth0.rp_filter=0;
                  modprobe sch_htb || true;
                  chroot /host sh -c "systemctl disable eni.service; rm -f /etc/udev/rules.d/75-persistent-net-generator.rules /lib/udev/rules.d/60-net.rules /lib/udev/rules.d/61-eni.rules /lib/udev/write_net_rules && udevadm control --reload-rules && udevadm trigger; true"'
          volumeMounts:
            - name: configvolume
              mountPath: /etc/eni
            - name: cni-bin
              mountPath: /opt/cni/bin/
            - name: cni
              mountPath: /etc/cni/net.d/
            - mountPath: /lib/modules
              name: lib-modules
            - mountPath: /host
              name: host-root
      containers:
        - name: terway
          image: registry.aliyuncs.com/acs/terway:v1.2.3
          imagePullPolicy: IfNotPresent
          command: [ '/usr/bin/terwayd', '-log-level', 'debug', '-daemon-mode', 'ENIMultiIP' ]
          securityContext:
            privileged: true
          env:
            - name: NODE_NAME
              valueFrom:
                fieldRef:
                  fieldPath: spec.nodeName
            - name: POD_NAMESPACE
              valueFrom:
                fieldRef:
                  fieldPath: metadata.namespace
          volumeMounts:
            - name: configvolume
              mountPath: /etc/eni
            - mountPath: /var/run/
              name: eni-run
            - mountPath: /opt/cni/bin/
              name: cni-bin
            - mountPath: /lib/modules
              name: lib-modules
            - mountPath: /var/lib/cni/networks
              name: cni-networks
            - mountPath: /var/lib/cni/terway
              name: cni-terway
            - mountPath: /var/lib/kubelet/device-plugins
              name: device-plugin-path
        - name: policy
          image: registry.aliyuncs.com/acs/terway:v1.2.3
          imagePullPolicy: IfNotPresent
          command: [ "/bin/policyinit.sh" ]
          env:
            - name: NODENAME
              valueFrom:
                fieldRef:
                  fieldPath: spec.nodeName
            - name: DISABLE_POLICY
              valueFrom:
                configMapKeyRef:
                  name: eni-config
                  key: disable_network_policy
                  optional: true
          securityContext:
            privileged: true
          resources:
            requests:
              cpu: 250m
          livenessProbe:
            tcpSocket:
              port: 9099
              host: localhost
            periodSeconds: 10
            initialDelaySeconds: 10
            failureThreshold: 6
          readinessProbe:
            tcpSocket:
              port: 9099
              host: localhost
            periodSeconds: 10
          volumeMounts:
            - mountPath: /lib/modules
              name: lib-modules
      volumes:
        - name: configvolume
          configMap:
            name: eni-config
            items:
              - key: eni_conf
                path: eni.json
              - key: 10-terway.conf
                path: 10-terway.conf
        - name: cni-bin
          hostPath:
            path: /opt/cni/bin
            type: "Directory"
        - name: cni
          hostPath:
            path: /etc/cni/net.d
        - name: eni-run
          hostPath:
            path: /var/run/
            type: "Directory"
        - name: lib-modules
          hostPath:
            path: /lib/modules
        - name: cni-networks
          hostPath:
            path: /var/lib/cni/networks
        - name: cni-terway
          hostPath:
            path: /var/lib/cni/terway
        - name: device-plugin-path
          hostPath:
            path: /var/lib/kubelet/device-plugins
            type: "Directory"
        - name: host-root
          hostPath:
            path: /
            type: "Directory"

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: felixconfigurations.crd.projectcalico.org
spec:
  scope: Cluster
  group: crd.projectcalico.org
  version: v1
  names:
    kind: FelixConfiguration
    plural: felixconfigurations
    singular: felixconfiguration

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: bgpconfigurations.crd.projectcalico.org
spec:
  scope: Cluster
  group: crd.projectcalico.org
  version: v1
  names:
    kind: BGPConfiguration
    plural: bgpconfigurations
    singular: bgpconfiguration

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: ippools.crd.projectcalico.org
spec:
  scope: Cluster
  group: crd.projectcalico.org
  version: v1
  names:
    kind: IPPool
    plural: ippools
    singular: ippool

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: hostendpoints.crd.projectcalico.org
spec:
  scope: Cluster
  group: crd.projectcalico.org
  version: v1
  names:
    kind: HostEndpoint
    plural: hostendpoints
    singular: hostendpoint

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: clusterinformations.crd.projectcalico.org
spec:
  scope: Cluster
  group: crd.projectcalico.org
  version: v1
  names:
    kind: ClusterInformation
    plural: clusterinformations
    singular: clusterinformation

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: globalnetworkpolicies.crd.projectcalico.org
spec:
  scope: Cluster
  group: crd.projectcalico.org
  version: v1
  names:
    kind: GlobalNetworkPolicy
    plural: globalnetworkpolicies
    singular: globalnetworkpolicy

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: globalnetworksets.crd.projectcalico.org
spec:
  scope: Cluster
  group: crd.projectcalico.org
  version: v1
  names:
    kind: GlobalNetworkSet
    plural: globalnetworksets
    singular: globalnetworkset

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: networkpolicies.crd.projectcalico.org
spec:
  scope: Namespaced
  group: crd.projectcalico.org
  version: v1
  names:
    kind: NetworkPolicy
    plural: networkpolicies
    singular: networkpolicy

注意修改 terway配置中的镜像版本,建议使用最新的Release

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
# 配置说明
data:
  eni_conf: |
    {
      "version": "1",
      "access_key": "ak",
      "access_secret": "sk",
      "security_group": "ESC的使用的安全组",
      # 与 kubeadm.yaml 的 serviceSubnet 保持一致
      "service_cidr": "10.25.0.0/16",
      # 交换机信息配置,区域和ID
      "vswitches": {
        "cn-hangzhou-k": ["交换机ID"]
      }    
flannel

flannel使用 ali-vpc模式时,同样需要创建子用户和授予相应的权限(AliyunVPCFullAccess),如下图:

https://statics.lshell.com//images/asynccode

配置文件如下:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
---
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
  name: psp.flannel.unprivileged
  annotations:
    seccomp.security.alpha.kubernetes.io/allowedProfileNames: docker/default
    seccomp.security.alpha.kubernetes.io/defaultProfileName: docker/default
    apparmor.security.beta.kubernetes.io/allowedProfileNames: runtime/default
    apparmor.security.beta.kubernetes.io/defaultProfileName: runtime/default
spec:
  privileged: false
  volumes:
  - configMap
  - secret
  - emptyDir
  - hostPath
  allowedHostPaths:
  - pathPrefix: "/etc/cni/net.d"
  - pathPrefix: "/etc/kube-flannel"
  - pathPrefix: "/run/flannel"
  readOnlyRootFilesystem: false
  # Users and groups
  runAsUser:
    rule: RunAsAny
  supplementalGroups:
    rule: RunAsAny
  fsGroup:
    rule: RunAsAny
  # Privilege Escalation
  allowPrivilegeEscalation: false
  defaultAllowPrivilegeEscalation: false
  # Capabilities
  allowedCapabilities: ['NET_ADMIN', 'NET_RAW']
  defaultAddCapabilities: []
  requiredDropCapabilities: []
  # Host namespaces
  hostPID: false
  hostIPC: false
  hostNetwork: true
  hostPorts:
  - min: 0
    max: 65535
  # SELinux
  seLinux:
    # SELinux is unused in CaaSP
    rule: 'RunAsAny'
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: flannel
rules:
- apiGroups: ['extensions']
  resources: ['podsecuritypolicies']
  verbs: ['use']
  resourceNames: ['psp.flannel.unprivileged']
- apiGroups:
  - ""
  resources:
  - pods
  verbs:
  - get
- apiGroups:
  - ""
  resources:
  - nodes
  verbs:
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - nodes/status
  verbs:
  - patch
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: flannel
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: flannel
subjects:
- kind: ServiceAccount
  name: flannel
  namespace: kube-system
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: flannel
  namespace: kube-system
---
kind: ConfigMap
apiVersion: v1
metadata:
  name: kube-flannel-cfg
  namespace: kube-system
  labels:
    tier: node
    app: flannel
data:
  cni-conf.json: |
    {
      "name": "cbr0",
      "cniVersion": "0.3.1",
      "plugins": [
        {
          "type": "flannel",
          "delegate": {
            "hairpinMode": true,
            "isDefaultGateway": true
          }
        },
        {
          "type": "portmap",
          "capabilities": {
            "portMappings": true
          }
        }
      ]
    }    
  net-conf.json: |
    {
      "Network": "10.30.0.0/16",
      "Backend": {
        "Type": "ali-vpc"
      }
    }    
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: kube-flannel-ds
  namespace: kube-system
  labels:
    tier: node
    app: flannel
spec:
  selector:
    matchLabels:
      app: flannel
  template:
    metadata:
      labels:
        tier: node
        app: flannel
    spec:
      # 增加DNS服务器223.6.6.6(别的公共服务器也行)
      dnsPolicy: "None"
      dnsConfig:
        nameservers:
        - 223.6.6.6
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/os
                operator: In
                values:
                - linux
      hostNetwork: true
      priorityClassName: system-node-critical
      tolerations:
      - operator: Exists
        effect: NoSchedule
      serviceAccountName: flannel
      initContainers:
      - name: install-cni
        image: quay.io/coreos/flannel:v0.14.0
        command:
        - cp
        args:
        - -f
        - /etc/kube-flannel/cni-conf.json
        - /etc/cni/net.d/10-flannel.conflist
        volumeMounts:
        - name: cni
          mountPath: /etc/cni/net.d
        - name: flannel-cfg
          mountPath: /etc/kube-flannel/
      containers:
      - name: kube-flannel
        image: quay.io/coreos/flannel:v0.14.0
        command:
        - /opt/bin/flanneld
        args:
        - --ip-masq
        - --kube-subnet-mgr
        resources:
          requests:
            cpu: "100m"
            memory: "50Mi"
          limits:
            cpu: "100m"
            memory: "50Mi"
        securityContext:
          privileged: false
          capabilities:
            add: ["NET_ADMIN", "NET_RAW"]
        env:
        - name: POD_NAME
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        # ali-vpc
        - name: ACCESS_KEY_ID
          value: Ak
        - name: ACCESS_KEY_SECRET
          value: Sk
        volumeMounts:
        - name: run
          mountPath: /run/flannel
        - name: flannel-cfg
          mountPath: /etc/kube-flannel/
      volumes:
      - name: run
        hostPath:
          path: /run/flannel
      - name: cni
        hostPath:
          path: /etc/cni/net.d
      - name: flannel-cfg
        configMap:
          name: kube-flannel-cfg

配置修改

  • Network字段 需要与kubeadm.yaml的podSubnet 保持一致

  • 修改ACCESS_KEY_ID和ACCESS_KEY_SECRET实际的值

添加 control plane

Kubeadm 安装完成后,会显示如何加入其它 Control Plane,但是如果我们直接使用上面的命令是会报错的,需要添加 apiserver相关信息

1
2
3
4
kubeadm join 127.0.0.1:6443 --token <token> --discovery-token-ca-cert-hash sha256:<hash> \
    --apiserver-advertise-address 192.168.1.27 \
    --apiserver-bind-port 5443 \
    --ignore-preflight-errors=Swap

添加Node

启动node节点相对就比较简单了,只需要加一个防止 swap 开启拒绝启动的参数就行

1
2
3
kubeadm join 127.0.0.1:6443 --token <token> \
    --discovery-token-ca-cert-hash sha256:<hash> \
    --ignore-preflight-errors=Swap

允许Master运行Pod

默认情况下,control plane 不会参与Pod 调度,可在任意control plane节点上运行以下命令,让其参与调度

1
kubectl taint nodes --all node-role.kubernetes.io/master-

安装CCM插件

简介

Cloud Controller Manager提供Kubernetes与阿里云基础产品的对接能力,例如CLB(原SLB)、VPC等。CCM主要提供以下功能:

  • 管理负载均衡

​ 当Service的类型设置为Type=LoadBalancer时,CCM组件会为该Service创建或配置阿里云负载均衡CLB,包括含CLB、监听、后端服务器组等资源。当Service对应的后端Endpoint或者集群节点发生变化时,CCM会自动更新CLB的后端虚拟服务器组。

  • 实现跨节点通信

​ 当集群网络组件为Flannel时,CCM组件负责打通容器与节点间网络,实现容器跨节点通信。CCM会将节点的Pod网段信息写入VPC的路由表中,从而实现跨节点的容器通信。该功能无需配置,安装即可使用。

注意:任何情况下CCM都不会将Master节点作为SLB的后端。

创建子用户

CCM组件涉及到阿里云的云服务管理,同样需要创建子用户和授予相应的权限

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
{
    "Version": "1",
    "Statement": [
        {
            "Action": [
                "ecs:Describe*",
                "ecs:CreateRouteEntry",
                "ecs:DeleteRouteEntry",
                "ecs:CreateNetworkInterface",
                "ecs:DeleteNetworkInterface",
                "ecs:CreateNetworkInterfacePermission",
                "ecs:DeleteNetworkInterfacePermission",
                "ecs:ModifyInstanceAttribute",
                "ecs:AttachKeyPair",
                "ecs:StopInstance",
                "ecs:StartInstance",
                "ecs:ReplaceSystemDisk"
            ],
            "Resource": [
                "*"
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "slb:Describe*",
                "slb:CreateLoadBalancer",
                "slb:DeleteLoadBalancer",
                "slb:ModifyLoadBalancerInternetSpec",
                "slb:RemoveBackendServers",
                "slb:AddBackendServers",
                "slb:RemoveTags",
                "slb:AddTags",
                "slb:StopLoadBalancerListener",
                "slb:StartLoadBalancerListener",
                "slb:SetLoadBalancerHTTPListenerAttribute",
                "slb:SetLoadBalancerHTTPSListenerAttribute",
                "slb:SetLoadBalancerTCPListenerAttribute",
                "slb:SetLoadBalancerUDPListenerAttribute",
                "slb:CreateLoadBalancerHTTPSListener",
                "slb:CreateLoadBalancerHTTPListener",
                "slb:CreateLoadBalancerTCPListener",
                "slb:CreateLoadBalancerUDPListener",
                "slb:DeleteLoadBalancerListener",
                "slb:CreateVServerGroup",
                "slb:DescribeVServerGroups",
                "slb:DeleteVServerGroup",
                "slb:SetVServerGroupAttribute",
                "slb:DescribeVServerGroupAttribute",
                "slb:ModifyVServerGroupBackendServers",
                "slb:AddVServerGroupBackendServers",
                "slb:ModifyLoadBalancerInstanceSpec",
                "slb:ModifyLoadBalancerInternetSpec",
                "slb:SetLoadBalancerModificationProtection",
                "slb:SetLoadBalancerDeleteProtection",
                "slb:SetLoadBalancerName",
                "slb:RemoveVServerGroupBackendServers"
            ],
            "Resource": [
                "*"
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "vpc:Describe*",
                "vpc:DeleteRouteEntry",
                "vpc:CreateRouteEntry"
            ],
            "Resource": [
                "*"
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "log:AnalyzeProductLog",
                "log:CreateIndex",
                "log:UpdateIndex",
                "log:DeleteIndex",
                "log:CreateLogStore",
                "log:UpdateLogStore",
                "log:DeleteLogStore",
                "log:CreateDashboard",
                "log:UpdateDashboard",
                "log:DeleteDashboard"
            ],
            "Resource": [
                "acs:log:*:*:project/*/logstore/alb_*",
                "acs:log:*:*:project/*/dashboard/*"
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "alb:TagResources",
                "alb:UnTagResources",
                "alb:ListServerGroups",
                "alb:ListServerGroupServers",
                "alb:AddServersToServerGroup",
                "alb:RemoveServersFromServerGroup",
                "alb:ReplaceServersInServerGroup",
                "alb:CreateLoadBalancer",
                "alb:DeleteLoadBalancer",
                "alb:UpdateLoadBalancerAttribute",
                "alb:UpdateLoadBalancerEdition",
                "alb:EnableLoadBalancerAccessLog",
                "alb:DisableLoadBalancerAccessLog",
                "alb:EnableDeletionProtection",
                "alb:DisableDeletionProtection",
                "alb:ListLoadBalancers",
                "alb:GetLoadBalancerAttribute",
                "alb:ListListeners",
                "alb:CreateListener",
                "alb:GetListenerAttribute",
                "alb:UpdateListenerAttribute",
                "alb:ListListenerCertificates",
                "alb:AssociateAdditionalCertificatesWithListener",
                "alb:DissociateAdditionalCertificatesFromListener",
                "alb:DeleteListener",
                "alb:CreateRule",
                "alb:DeleteRule",
                "alb:UpdateRuleAttribute",
                "alb:CreateRules",
                "alb:UpdateRulesAttribute",
                "alb:DeleteRules",
                "alb:ListRules",
                "alb:CreateServerGroup",
                "alb:DeleteServerGroup",
                "alb:UpdateServerGroupAttribute",
                "alb:DescribeZones"
            ],
            "Resource": "*",
            "Effect": "Allow"
        },
        {
            "Action": "ram:CreateServiceLinkedRole",
            "Resource": "*",
            "Effect": "Allow",
            "Condition": {
                "StringEquals": {
                    "ram:ServiceName": [
                        "alb.aliyuncs.com",
                        "logdelivery.alb.aliyuncs.com"
                    ]
                }
            }
        },
        {
            "Action": [
                "yundun-cert:DescribeSSLCertificateList",
                "yundun-cert:DescribeSSLCertificatePublicKeyDetail"
            ],
            "Resource": "*",
            "Effect": "Allow"
        }
    ]
}
配置kubelet和Node

组件安装部分,我们已经提前为每台节点上的 kubelet 配置了启动参数,除此之外,我们还需要通过 patch的方式修改node的参数,命令如下:

1
2
NODE_NAME=$(echo `curl -s http://100.100.100.200/latest/meta-data/region-id`.`curl -s http://100.100.100.200/latest/meta-data/instance-id`)
kubectl patch node ${NODE_NAME} -p '{"spec":{"providerID": "${NODE_NAME}"}}'
创建 configMap

使用以下脚本创建 ccm 的 config

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/bin/bash
# create ConfigMap kube-system/cloud-config for CCM.

ACCESS_KEY_ID="xxxx"
ACCESS_KEY_SECRET="xxxx"

# base64 AccessKey & AccessKeySecret
accessKeyIDBase64=`echo -n "$ACCESS_KEY_ID" |base64 -w 0`
accessKeySecretBase64=`echo -n "$ACCESS_KEY_SECRET"|base64 -w 0`

cat <<EOF >cloud-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: cloud-config
  namespace: kube-system
data:
  cloud-config.conf: |-
    {
        "Global": {
            "accessKeyID": "$accessKeyIDBase64",
            "accessKeySecret": "$accessKeySecretBase64"
        }
    }
EOF

kubectl create -f cloud-config.yaml
安装CCM

建议使用 github上的 cloud-controller-manager.yml,进行安装

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: system:cloud-controller-manager
rules:
  - apiGroups:
      - coordination.k8s.io
    resources:
      - leases
    verbs:
      - get
      - list
      - update
      - create
  - apiGroups:
      - ""
    resources:
      - persistentvolumes
      - services
      - secrets
      - endpoints
      - serviceaccounts
    verbs:
      - get
      - list
      - watch
      - create
      - update
      - patch
  - apiGroups:
      - ""
    resources:
      - nodes
    verbs:
      - get
      - list
      - watch
      - delete
      - patch
      - update
  - apiGroups:
      - ""
    resources:
      - services/status
    verbs:
      - update
      - patch
  - apiGroups:
      - ""
    resources:
      - nodes/status
    verbs:
      - patch
      - update
  - apiGroups:
      - ""
    resources:
      - events
      - endpoints
    verbs:
      - create
      - patch
      - update
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: cloud-controller-manager
  namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: system:cloud-controller-manager
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:cloud-controller-manager
subjects:
  - kind: ServiceAccount
    name: cloud-controller-manager
    namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: system:shared-informers
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:cloud-controller-manager
subjects:
  - kind: ServiceAccount
    name: shared-informers
    namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: system:cloud-node-controller
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:cloud-controller-manager
subjects:
  - kind: ServiceAccount
    name: cloud-node-controller
    namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: system:pvl-controller
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:cloud-controller-manager
subjects:
  - kind: ServiceAccount
    name: pvl-controller
    namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: system:route-controller
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:cloud-controller-manager
subjects:
  - kind: ServiceAccount
    name: route-controller
    namespace: kube-system
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  labels:
    app: cloud-controller-manager
    tier: control-plane
  name: cloud-controller-manager
  namespace: kube-system
spec:
  selector:
    matchLabels:
      app: cloud-controller-manager
      tier: control-plane
  template:
    metadata:
      labels:
        app: cloud-controller-manager
        tier: control-plane
      annotations:
        scheduler.alpha.kubernetes.io/critical-pod: ''
    spec:
      serviceAccountName: cloud-controller-manager
      tolerations:
        - effect: NoSchedule
          operator: Exists
          key: node-role.kubernetes.io/master
        - effect: NoSchedule
          operator: Exists
          key: node.cloudprovider.kubernetes.io/uninitialized
      nodeSelector:
        node-role.kubernetes.io/master: ""
      containers:
        - command:
          -  /cloud-controller-manager
          - --leader-elect=true
          - --cloud-provider=alicloud
          - --use-service-account-credentials=true
          - --cloud-config=/etc/kubernetes/config/cloud-config.conf
          - --route-reconciliation-period=3m
          - --leader-elect-resource-lock=endpoints
          # replace ${cluster-cidr} with your own cluster cidr
          # example: 172.16.0.0/16
          - --configure-cloud-routes=false
          - --allocate-node-cidrs=false
          - --cluster-cidr=192.168.32.0/19
          # replace ${ImageVersion} with the latest release version
          # example: v2.1.0
          image: registry.cn-hangzhou.aliyuncs.com/acs/cloud-controller-manager-amd64:v2.3.0
          livenessProbe:
            failureThreshold: 8
            httpGet:
              host: 127.0.0.1
              path: /healthz
              port: 10258
              scheme: HTTP
            initialDelaySeconds: 15
            timeoutSeconds: 15
          name: cloud-controller-manager
          resources:
            requests:
              cpu: 200m
          volumeMounts:
            - mountPath: /etc/kubernetes/
              name: k8s
            - mountPath: /etc/ssl/certs
              name: certs
            - mountPath: /etc/pki
              name: pki
            - mountPath: /etc/kubernetes/config
              name: cloud-config
      hostNetwork: true
      volumes:
        - hostPath:
            path: /etc/kubernetes
          name: k8s
        - hostPath:
            path: /etc/ssl/certs
          name: certs
        - hostPath:
            path: /etc/pki
          name: pki
        - configMap:
            defaultMode: 420
            items:
              - key: cloud-config.conf
                path: cloud-config.conf
            name: cloud-config
          name: cloud-config
检查CCM

我们可以通过查看Pod状态来检查CCM是否安装成功

1
kubectl get pod -n kube-system
扩展阅读
Master作为SLB后端

在阿里云文档的注意事项中有提到:任何情况下CCM都不会将Master节点作为SLB的后端。

可能是基于性能考虑,如果实在要将Master作为SLB后端,我们可以直接修改源码

在github Issues 中有人提到源码是如何过滤master节点,在尊重源码的情况下,我通过给master新增label的方式达到目的,参考代码

安装 ingress

Ingress-controller
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
apiVersion: v1
kind: Namespace
metadata:
  labels:
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
  name: ingress-nginx
---
apiVersion: v1
automountServiceAccountToken: true
kind: ServiceAccount
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx
  namespace: ingress-nginx
---
apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission
  namespace: ingress-nginx
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx
  namespace: ingress-nginx
rules:
- apiGroups:
  - ""
  resources:
  - namespaces
  verbs:
  - get
- apiGroups:
  - ""
  resources:
  - configmaps
  - pods
  - secrets
  - endpoints
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - services
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - networking.k8s.io
  resources:
  - ingresses
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - networking.k8s.io
  resources:
  - ingresses/status
  verbs:
  - update
- apiGroups:
  - networking.k8s.io
  resources:
  - ingressclasses
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - ""
  resourceNames:
  - ingress-controller-leader
  resources:
  - configmaps
  verbs:
  - get
  - update
- apiGroups:
  - ""
  resources:
  - configmaps
  verbs:
  - create
- apiGroups:
  - ""
  resources:
  - events
  verbs:
  - create
  - patch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission
  namespace: ingress-nginx
rules:
- apiGroups:
  - ""
  resources:
  - secrets
  verbs:
  - get
  - create
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx
rules:
- apiGroups:
  - ""
  resources:
  - configmaps
  - endpoints
  - nodes
  - pods
  - secrets
  - namespaces
  verbs:
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - nodes
  verbs:
  - get
- apiGroups:
  - ""
  resources:
  - services
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - networking.k8s.io
  resources:
  - ingresses
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - events
  verbs:
  - create
  - patch
- apiGroups:
  - networking.k8s.io
  resources:
  - ingresses/status
  verbs:
  - update
- apiGroups:
  - networking.k8s.io
  resources:
  - ingressclasses
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission
rules:
- apiGroups:
  - admissionregistration.k8s.io
  resources:
  - validatingwebhookconfigurations
  verbs:
  - get
  - update
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx
  namespace: ingress-nginx
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: ingress-nginx
subjects:
- kind: ServiceAccount
  name: ingress-nginx
  namespace: ingress-nginx
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission
  namespace: ingress-nginx
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: ingress-nginx-admission
subjects:
- kind: ServiceAccount
  name: ingress-nginx-admission
  namespace: ingress-nginx
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: ingress-nginx
subjects:
- kind: ServiceAccount
  name: ingress-nginx
  namespace: ingress-nginx
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: ingress-nginx-admission
subjects:
- kind: ServiceAccount
  name: ingress-nginx-admission
  namespace: ingress-nginx
---
apiVersion: v1
data:
  allow-snippet-annotations: "true"
kind: ConfigMap
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-controller
  namespace: ingress-nginx
---
apiVersion: v1
kind: Service
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-controller-admission
  namespace: ingress-nginx
spec:
  ports:
  - appProtocol: https
    name: https-webhook
    port: 443
    targetPort: webhook
  selector:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
  type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-controller
  namespace: ingress-nginx
spec:
  minReadySeconds: 0
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      app.kubernetes.io/component: controller
      app.kubernetes.io/instance: ingress-nginx
      app.kubernetes.io/name: ingress-nginx
  template:
    metadata:
      labels:
        app.kubernetes.io/component: controller
        app.kubernetes.io/instance: ingress-nginx
        app.kubernetes.io/name: ingress-nginx
    spec:
      containers:
      - args:
        - /nginx-ingress-controller
        - --publish-service=$(POD_NAMESPACE)/ingress-nginx-controller
        - --election-id=ingress-controller-leader
        - --controller-class=k8s.io/ingress-nginx
        - --ingress-class=nginx
        - --configmap=$(POD_NAMESPACE)/ingress-nginx-controller
        - --validating-webhook=:8443
        - --validating-webhook-certificate=/usr/local/certificates/cert
        - --validating-webhook-key=/usr/local/certificates/key
        env:
        - name: POD_NAME
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        - name: LD_PRELOAD
          value: /usr/local/lib/libmimalloc.so
        image: registry.cn-hangzhou.aliyuncs.com/google_containers/nginx-ingress-controller:v1.2.0
        imagePullPolicy: IfNotPresent
        lifecycle:
          preStop:
            exec:
              command:
              - /wait-shutdown
        livenessProbe:
          failureThreshold: 5
          httpGet:
            path: /healthz
            port: 10254
            scheme: HTTP
          initialDelaySeconds: 10
          periodSeconds: 10
          successThreshold: 1
          timeoutSeconds: 1
        name: controller
        ports:
        - containerPort: 80
          name: http
          protocol: TCP
        - containerPort: 443
          name: https
          protocol: TCP
        - containerPort: 8443
          name: webhook
          protocol: TCP
        readinessProbe:
          failureThreshold: 3
          httpGet:
            path: /healthz
            port: 10254
            scheme: HTTP
          initialDelaySeconds: 10
          periodSeconds: 10
          successThreshold: 1
          timeoutSeconds: 1
        resources:
          requests:
            cpu: 100m
            memory: 90Mi
        securityContext:
          allowPrivilegeEscalation: true
          capabilities:
            add:
            - NET_BIND_SERVICE
            drop:
            - ALL
          runAsUser: 101
        volumeMounts:
        - mountPath: /usr/local/certificates/
          name: webhook-cert
          readOnly: true
      dnsPolicy: ClusterFirst
      nodeSelector:
        kubernetes.io/os: linux
      serviceAccountName: ingress-nginx
      terminationGracePeriodSeconds: 300
      volumes:
      - name: webhook-cert
        secret:
          secretName: ingress-nginx-admission
---
apiVersion: batch/v1
kind: Job
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission-create
  namespace: ingress-nginx
spec:
  template:
    metadata:
      labels:
        app.kubernetes.io/component: admission-webhook
        app.kubernetes.io/instance: ingress-nginx
        app.kubernetes.io/name: ingress-nginx
        app.kubernetes.io/part-of: ingress-nginx
        app.kubernetes.io/version: 1.2.0
      name: ingress-nginx-admission-create
    spec:
      containers:
      - args:
        - create
        - --host=ingress-nginx-controller-admission,ingress-nginx-controller-admission.$(POD_NAMESPACE).svc
        - --namespace=$(POD_NAMESPACE)
        - --secret-name=ingress-nginx-admission
        env:
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        image: registry.cn-chengdu.aliyuncs.com/archly-k8s/kube-webhook-certgen:v1.1.1
        imagePullPolicy: IfNotPresent
        name: create
        securityContext:
          allowPrivilegeEscalation: false
      nodeSelector:
        kubernetes.io/os: linux
      restartPolicy: OnFailure
      securityContext:
        fsGroup: 2000
        runAsNonRoot: true
        runAsUser: 2000
      serviceAccountName: ingress-nginx-admission
---
apiVersion: batch/v1
kind: Job
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission-patch
  namespace: ingress-nginx
spec:
  template:
    metadata:
      labels:
        app.kubernetes.io/component: admission-webhook
        app.kubernetes.io/instance: ingress-nginx
        app.kubernetes.io/name: ingress-nginx
        app.kubernetes.io/part-of: ingress-nginx
        app.kubernetes.io/version: 1.2.0
      name: ingress-nginx-admission-patch
    spec:
      containers:
      - args:
        - patch
        - --webhook-name=ingress-nginx-admission
        - --namespace=$(POD_NAMESPACE)
        - --patch-mutating=false
        - --secret-name=ingress-nginx-admission
        - --patch-failure-policy=Fail
        env:
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        image: registry.cn-chengdu.aliyuncs.com/archly-k8s/kube-webhook-certgen:v1.1.1
        imagePullPolicy: IfNotPresent
        name: patch
        securityContext:
          allowPrivilegeEscalation: false
      nodeSelector:
        kubernetes.io/os: linux
      restartPolicy: OnFailure
      securityContext:
        fsGroup: 2000
        runAsNonRoot: true
        runAsUser: 2000
      serviceAccountName: ingress-nginx-admission
---
apiVersion: networking.k8s.io/v1
kind: IngressClass
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: nginx
spec:
  controller: k8s.io/ingress-nginx
---
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingWebhookConfiguration
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission
webhooks:
- admissionReviewVersions:
  - v1
  clientConfig:
    service:
      name: ingress-nginx-controller-admission
      namespace: ingress-nginx
      path: /networking/v1/ingresses
  failurePolicy: Fail
  matchPolicy: Equivalent
  name: validate.nginx.ingress.kubernetes.io
  rules:
  - apiGroups:
    - networking.k8s.io
    apiVersions:
    - v1
    operations:
    - CREATE
    - UPDATE
    resources:
    - ingresses
  sideEffects: None
SLB With Ingress

在安装 ingress-controoler时,我们并没有接入SLB,在此处单独配置

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
apiVersion: v1
kind: Service
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  annotations:
    # 公网模式
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-address-type: "internet"
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-force-override-listeners: "true"
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-id: "lb-xxxx"
    # 指定虚拟交换机
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-vswitch-id: "vsw-xxxx"
    # 健康检查
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-health-check-type: "tcp"
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-health-check-connect-timeout: "8"
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-healthy-threshold: "4"
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-unhealthy-threshold: "4"
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-health-check-interval: "3"
  name: ingress-nginx-controller
  namespace: ingress-nginx
spec:
  externalTrafficPolicy: Local
  ports:
  - appProtocol: http
    name: http
    port: 80
    protocol: TCP
    targetPort: http
  - appProtocol: https
    name: https
    port: 443
    protocol: TCP
    targetPort: https
  selector:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
  type: LoadBalancer

关于SLB 更多参数说明请查看阿里官方文档

鸣谢

感谢以下项目或文档给予我启发