使用Ansible部署etcd 3.2高可用集群
📅 2017-06-05 | 🖱️
本文内容已过期
之前写过一篇手动搭建etcd 3.1集群的文章《etcd 3.1 高可用集群搭建》,最近要初始化一套新的环境,考虑用ansible自动化部署整套环境, 先从部署etcd 3.2集群开始。
需要部署etcd的主机信息如下:
1node1 192.168.61.11
2node2 192.168.61.12
3node3 192.168.61.13
1.配置管理项目目录结构 #
1├── inventories
2│ ├── staging
3│ │ ├── group_vars
4│ │ │ ├── all.yml
5│ │ │ └── etcd-nodes.yml
6│ │ ├── host_vars
7│ │ │ ├── node1.yml
8│ │ │ ├── node2.yml
9│ │ │ └── node3.yml
10│ │ └── hosts
11│ └── production
12├── roles
13│ ├── common
14│ │ ├── defaults
15│ │ │ └── main.yml
16│ │ └── tasks
17│ │ └── main.yml
18│ ├── etcd3
19│ ├── defaults
20│ │ └── main.yml
21│ ├── files
22│ │ └── make-ca-cert.sh
23│ ├── meta
24│ │ └── main.yml
25│ ├── tasks
26│ │ ├── create_etcd_user.yml
27│ │ ├── etcd-restart.yml
28│ │ ├── etcd-start.yml
29│ │ ├── etcd-stop.yml
30│ │ ├── gen-etcd-certs.yml
31│ │ ├── gen-etcd-systemd.yml
32│ │ ├── install_etcd_bin.yml
33│ │ └── main.yml
34│ └── templates
35│ ├── etcd.conf.j2
36│ └── etcd.service.j2
37├── deploy-etcd3.yml
roles/etcd3/defaults/main.yml:
1{% raw %}
2---
3
4etcd_version: 3.2.0
5
6etcd_download_url_base: "https://github.com/coreos/etcd/releases/download/v{{ etcd_version }}"
7etcd_release: "etcd-v{{ etcd_version }}-linux-amd64"
8etcd_download_url: "{{ etcd_download_url_base }}/{{ etcd_release}}.tar.gz"
9
10etcd_bin_path: /usr/bin
11etcd_data_dir: /var/lib/etcd
12
13etcd_conf_dir: /etc/etcd
14etcd_certs_dir: "{{ etcd_conf_dir }}/ssl"
15etcd_cert_group: root
16etcd_ca_file: "{{ etcd_certs_dir }}/ca.crt"
17etcd_cert_file: "{{ etcd_certs_dir }}/server.crt"
18etcd_key_file: "{{ etcd_certs_dir }}/server.key"
19etcd_peer_ca_file: "{{ etcd_certs_dir }}/ca.crt"
20etcd_peer_cert_file: "{{ etcd_certs_dir }}/peer.crt"
21etcd_peer_key_file: "{{ etcd_certs_dir }}/peer.key"
22etcd_client_cert_file: "{{ etcd_certs_dir }}/client.crt"
23etcd_client_key_file: "{{ etcd_certs_dir }}/client.key"
24
25etcd_client_cert_auth: true
26etcd_peer_client_cert_auth: true
27
28etcd_client_port: 2379
29etcd_peer_port: 2380
30
31
32etcd_initial_cluster_state: new
33etcd_initial_cluster_token: etcd-k8s-cluster
34
35
36etcd_initial_advertise_peer_urls: "https://{{ etcd_machine_address }}:{{ etcd_peer_port }}"
37etcd_listen_peer_urls: "https://{{ etcd_machine_address }}:{{ etcd_peer_port }}"
38etcd_advertise_client_urls: "https://{{ etcd_machine_address }}:{{ etcd_client_port }}"
39etcd_listen_client_urls: "https://{{ etcd_machine_address }}:2379,https://127.0.0.1:2379"
40{% endraw %}
2.创建etcd用户和数据目录 #
创建etcd用户、用户组和数据目录。
1{% raw %}
2- name: create system etcd group
3 group:
4 name: etcd
5 state: present
6
7- name: create system etcd user
8 user:
9 name: etcd
10 comment: "etcd user"
11 shell: /sbin/nologin
12 state: present
13 system: yes
14 home: "{{ etcd_data_dir }}"
15 groups: etcd
16
17- name: ensure etcd_data_dir exists
18 file:
19 path: "{{ etcd_data_dir }}"
20 recurse: yes
21 state: directory
22 owner: etcd
23 group: etcd
24{% endraw %}
3.下载和解压etcd #
下载和解压缩etcd release tar包,并将可执行文件etcd, etcdctl拷贝到/usr/bin。
1{% raw %}
2---
3
4- name: set github s3 host on the first etcd server
5 lineinfile:
6 dest: /etc/hosts
7 regexp: '.*github-production-release-asset-2e65be\.s3\.amazonaws\.com$'
8 line: "219.76.4.4 github-production-release-asset-2e65be.s3.amazonaws.com"
9 state: present
10 delegate_to: "{{ groups['etcd-nodes'][0] }}"
11 run_once: true
12
13- name: check whether etcd release tar extracted on the first etcd server
14 stat:
15 path: "{{ ansible_temp_dir }}/{{ etcd_release }}"
16 register: etcd_release_tar_check
17 delegate_to: "{{ groups['etcd-nodes'][0] }}"
18 run_once: true
19
20
21- name: download etcd release tar file on first the etcd server
22 get_url:
23 url: "{{ etcd_download_url }}"
24 dest: "{{ ansible_temp_dir }}"
25 validate_certs: no
26 timeout: 20
27 register: download_etcd
28 delegate_to: "{{ groups['etcd-nodes'][0] }}"
29 run_once: true
30 when: not etcd_release_tar_check.stat.exists
31
32- name: extract etcd tar file
33 unarchive:
34 src: "{{ download_etcd.dest }}"
35 dest: "{{ ansible_temp_dir }}"
36 remote_src: yes
37 run_once: true
38 delegate_to: "{{ groups['etcd-nodes'][0] }}"
39 when: not etcd_release_tar_check.stat.exists
40
41- name: fetch etcd bins from the first etcd server
42 fetch:
43 src: "{{ ansible_temp_dir }}/{{ etcd_release }}/{{ item }}"
44 dest: "tmp/etcd3/{{ item }}"
45 flat: yes
46 register: fetch_etcd
47 run_once: true
48 delegate_to: "{{ groups['etcd-nodes'][0] }}"
49 with_items:
50 - etcd
51 - etcdctl
52
53- name: copy etcd binary
54 copy:
55 src: "tmp/etcd3/{{ item }}"
56 dest: "{{ etcd_bin_path }}"
57 owner: etcd
58 group: etcd
59 mode: 0750
60 with_items:
61 - etcd
62 - etcdctl
63{% endraw %}
4.生成并分发etcd TLS证书 #
1{% raw %}
2---
3
4- name: ensure etcd certs directory
5 file:
6 path: "{{ etcd_certs_dir }}"
7 state: directory
8 owner: etcd
9 group: etcd
10 mode: 0750
11 recurse: yes
12
13- name: copy make-ca-cert.sh
14 copy:
15 src: make-ca-cert.sh
16 dest: "{{ etcd_certs_dir }}"
17 owner: root
18 group: root
19 mode: "0500"
20 run_once: true
21 delegate_to: "{{ groups['etcd-nodes'][0] }}"
22
23
24- name: gen certs on the first etcd server
25 command:
26 "{{ etcd_certs_dir }}/make-ca-cert.sh"
27 args:
28 creates: "{{ etcd_certs_dir }}/server.crt"
29 run_once: true
30 delegate_to: "{{ groups['etcd-nodes'][0] }}"
31 environment:
32 NODE_IPS: "{% for host in groups['etcd-nodes'] %}{{ hostvars[host]['etcd_machine_address'] }}{% if not loop.last %},{% endif %}{% endfor %}"
33 NODE_DNS: "{{ groups['etcd-nodes']|join(',') }}"
34 CERT_DIR: "{{ etcd_certs_dir }}"
35 CERT_GROUP: "{{ etcd_cert_group }}"
36
37- name: slurp etcd certs
38 slurp:
39 src: "{{ item }}"
40 register: pki_certs
41 run_once: true
42 delegate_to: "{{ groups['etcd-nodes'][0] }}"
43 with_items:
44 - "{{ etcd_ca_file }}"
45 - "{{ etcd_cert_file }}"
46 - "{{ etcd_key_file }}"
47 - "{{ etcd_peer_ca_file }}"
48 - "{{ etcd_peer_cert_file }}"
49 - "{{ etcd_peer_key_file }}"
50 - "{{ etcd_client_cert_file }}"
51 - "{{ etcd_client_key_file }}"
52
53- name: copy etcd certs to other etcd servers
54 copy:
55 dest: "{{ item.item }}"
56 content: "{{ item.content | b64decode }}"
57 owner: etcd
58 group: "{{ etcd_cert_group }}"
59 mode: 0400
60 with_items: "{{ pki_certs.results }}"
61 when: inventory_hostname != groups['etcd-nodes'][0]
62
63{% endraw %}
5.systemd和配置 #
1{% raw %}
2---
3
4- name: create etcd systemd unit file
5 template:
6 src: etcd.service.j2
7 dest: /etc/systemd/system/etcd.service
8
9- name: create etcd env conf
10 template:
11 src: etcd.conf.j2
12 dest: /etc/etcd/etcd.conf
13 owner: etcd
14 group: etcd
15 mode: 0540
16{% endraw %}
6.启动etcd #
1{% raw %}
2---
3
4- name: start etcd
5 systemd:
6 name: etcd
7 daemon_reload: yes
8 state: started
9 enabled: yes
10
11- name: restart etcd
12 systemd:
13 name: etcd
14 state: restarted
15
16{% endraw %}
7.查看集群状态 #
检查集群是否健康,在任一节点执行:
1etcdctl \
2 --ca-file=/etc/etcd/ssl/ca.crt \
3 --cert-file=/etc/etcd/ssl/client.crt \
4 --key-file=/etc/etcd/ssl/client.key \
5 --endpoints=https://node1:2379,https://node2:2379,https://node3:2379 \
6 cluster-health
7
8member 1e3da2bf674fd07 is healthy: got healthy result from https://192.168.61.11:2379
9member 88548a72a2e9a749 is healthy: got healthy result from https://192.168.61.13:2379
10member c3bda13bf78ed2ab is healthy: got healthy result from https://192.168.61.12:2379
11cluster is healthy
1etcdctl \
2 --ca-file=/etc/etcd/ssl/ca.crt \
3 --cert-file=/etc/etcd/ssl/client.crt \
4 --key-file=/etc/etcd/ssl/client.key \
5 --endpoints=https://node1:2379,https://node2:2379,https://node3:2379 \
6 member list
7
81e3da2bf674fd07: name=node1 peerURLs=https://192.168.61.11:2380 clientURLs=https://192.168.61.11:2379 isLeader=false
988548a72a2e9a749: name=node3 peerURLs=https://192.168.61.13:2380 clientURLs=https://192.168.61.13:2379 isLeader=false
10c3bda13bf78ed2ab: name=node2 peerURLs=https://192.168.61.12:2380 clientURLs=https://192.168.61.12:2379 isLeader=true
附录源码 #
- 2018/01/08 更新
- 当时写这篇文档是在初次使用ansible初始化我们的Kubernetes集群之后做的记录。这篇文档也是参考Kubernetes github库中的kubernetes/contrib/ansible/roles/etcd/,只是官方的ansible考虑的内容比较全面,而我们线上环境都是CentOS 7的主机,所以当时参考官方的ansible role,写了一个在CentOS 7上用ansible部署etcd的精简版。