之前写过一篇手动搭建etcd 3.1集群的文章《etcd 3.1 高可用集群搭建》,最近要初始化一套新的环境,考虑用ansible自动化部署整套环境, 先从部署etcd 3.2集群开始。

需要部署etcd的主机信息如下:

1node1 192.168.61.11
2node2 192.168.61.12
3node3 192.168.61.13

1.配置管理项目目录结构

 1├── inventories
 2│   ├── staging
 3│   │   ├── group_vars
 4│   │   │   ├── all.yml
 5│   │   │   └── etcd-nodes.yml
 6│   │   ├── host_vars
 7│   │   │   ├── node1.yml
 8│   │   │   ├── node2.yml
 9│   │   │   └── node3.yml
10│   │   └── hosts
11│   └── production
12├── roles
13│   ├── common
14│   │   ├── defaults
15│   │   │   └── main.yml
16│   │   └── tasks
17│   │       └── main.yml
18│   ├── etcd3
19│       ├── defaults
20│       │   └── main.yml
21│       ├── files
22│       │   └── make-ca-cert.sh
23│       ├── meta
24│       │   └── main.yml
25│       ├── tasks
26│       │   ├── create_etcd_user.yml
27│       │   ├── etcd-restart.yml
28│       │   ├── etcd-start.yml
29│       │   ├── etcd-stop.yml
30│       │   ├── gen-etcd-certs.yml
31│       │   ├── gen-etcd-systemd.yml
32│       │   ├── install_etcd_bin.yml
33│       │   └── main.yml
34│       └── templates
35│           ├── etcd.conf.j2
36│           └── etcd.service.j2
37├── deploy-etcd3.yml

roles/etcd3/defaults/main.yml:

 1{% raw %}
 2---
 3
 4etcd_version: 3.2.0
 5
 6etcd_download_url_base: "https://github.com/coreos/etcd/releases/download/v{{ etcd_version }}"
 7etcd_release: "etcd-v{{ etcd_version }}-linux-amd64" 
 8etcd_download_url: "{{ etcd_download_url_base }}/{{ etcd_release}}.tar.gz"
 9
10etcd_bin_path: /usr/bin
11etcd_data_dir: /var/lib/etcd
12
13etcd_conf_dir: /etc/etcd
14etcd_certs_dir: "{{ etcd_conf_dir }}/ssl"
15etcd_cert_group: root
16etcd_ca_file: "{{ etcd_certs_dir }}/ca.crt"
17etcd_cert_file: "{{ etcd_certs_dir }}/server.crt"
18etcd_key_file: "{{ etcd_certs_dir }}/server.key"
19etcd_peer_ca_file: "{{ etcd_certs_dir }}/ca.crt"
20etcd_peer_cert_file: "{{ etcd_certs_dir }}/peer.crt"
21etcd_peer_key_file: "{{ etcd_certs_dir }}/peer.key"
22etcd_client_cert_file: "{{ etcd_certs_dir }}/client.crt"
23etcd_client_key_file: "{{ etcd_certs_dir }}/client.key"
24
25etcd_client_cert_auth: true
26etcd_peer_client_cert_auth: true
27
28etcd_client_port: 2379
29etcd_peer_port: 2380
30
31
32etcd_initial_cluster_state: new
33etcd_initial_cluster_token: etcd-k8s-cluster
34
35
36etcd_initial_advertise_peer_urls: "https://{{ etcd_machine_address }}:{{ etcd_peer_port }}"
37etcd_listen_peer_urls: "https://{{ etcd_machine_address }}:{{ etcd_peer_port }}"
38etcd_advertise_client_urls: "https://{{ etcd_machine_address }}:{{ etcd_client_port }}"
39etcd_listen_client_urls: "https://{{ etcd_machine_address }}:2379,https://127.0.0.1:2379"
40{% endraw %}

2.创建etcd用户和数据目录

创建etcd用户、用户组和数据目录。

 1{% raw %}
 2- name: create system etcd group
 3  group:
 4    name: etcd
 5    state: present
 6
 7- name: create system etcd user
 8  user:
 9    name: etcd
10    comment: "etcd user"
11    shell: /sbin/nologin
12    state: present
13    system: yes
14    home: "{{ etcd_data_dir }}"
15    groups: etcd
16
17- name: ensure etcd_data_dir exists
18  file:
19    path: "{{ etcd_data_dir }}"
20    recurse: yes
21    state: directory
22    owner: etcd
23    group: etcd
24{% endraw %}

3.下载和解压etcd

下载和解压缩etcd release tar包,并将可执行文件etcd, etcdctl拷贝到/usr/bin。

 1{% raw %}
 2---
 3
 4- name: set github s3 host on the first etcd server
 5  lineinfile: 
 6    dest: /etc/hosts 
 7    regexp: '.*github-production-release-asset-2e65be\.s3\.amazonaws\.com$' 
 8    line: "219.76.4.4 github-production-release-asset-2e65be.s3.amazonaws.com" 
 9    state: present
10  delegate_to: "{{ groups['etcd-nodes'][0] }}"
11  run_once: true
12  
13- name: check whether etcd release tar extracted on the first etcd server 
14  stat: 
15    path: "{{ ansible_temp_dir }}/{{ etcd_release }}"
16  register: etcd_release_tar_check
17  delegate_to: "{{ groups['etcd-nodes'][0] }}"
18  run_once: true
19  
20
21- name: download etcd release tar file on first the etcd server 
22  get_url:
23    url: "{{ etcd_download_url }}"
24    dest: "{{ ansible_temp_dir }}"
25    validate_certs: no
26    timeout: 20
27  register: download_etcd
28  delegate_to: "{{ groups['etcd-nodes'][0] }}"
29  run_once: true
30  when: not etcd_release_tar_check.stat.exists
31
32- name: extract etcd tar file
33  unarchive:
34    src: "{{ download_etcd.dest }}"
35    dest: "{{ ansible_temp_dir }}"
36    remote_src: yes
37  run_once: true
38  delegate_to: "{{ groups['etcd-nodes'][0] }}"
39  when: not etcd_release_tar_check.stat.exists
40  
41- name: fetch etcd bins from the first etcd server
42  fetch:
43    src: "{{ ansible_temp_dir }}/{{ etcd_release }}/{{ item }}"
44    dest: "tmp/etcd3/{{ item }}"
45    flat: yes
46  register: fetch_etcd
47  run_once: true
48  delegate_to: "{{ groups['etcd-nodes'][0] }}"
49  with_items:
50    - etcd
51    - etcdctl
52
53- name: copy etcd binary
54  copy:
55    src: "tmp/etcd3/{{ item }}"
56    dest: "{{ etcd_bin_path }}"
57    owner: etcd
58    group: etcd
59    mode: 0750
60  with_items:
61    - etcd
62    - etcdctl
63{% endraw %}

4.生成并分发etcd TLS证书

 1{% raw %}
 2---
 3
 4- name: ensure etcd certs directory
 5  file:
 6    path: "{{ etcd_certs_dir }}"
 7    state: directory
 8    owner: etcd
 9    group: etcd
10    mode: 0750
11    recurse: yes
12    
13- name: copy make-ca-cert.sh
14  copy:
15    src: make-ca-cert.sh
16    dest: "{{ etcd_certs_dir }}"
17    owner: root
18    group: root
19    mode: "0500"
20  run_once: true
21  delegate_to: "{{ groups['etcd-nodes'][0] }}"
22  
23  
24- name: gen certs on the first etcd server
25  command:
26    "{{ etcd_certs_dir }}/make-ca-cert.sh"
27  args:
28    creates: "{{ etcd_certs_dir }}/server.crt"
29  run_once: true
30  delegate_to: "{{ groups['etcd-nodes'][0] }}"
31  environment:
32    NODE_IPS: "{% for host in groups['etcd-nodes'] %}{{ hostvars[host]['etcd_machine_address'] }}{% if not loop.last %},{% endif %}{% endfor %}"
33    NODE_DNS: "{{ groups['etcd-nodes']|join(',') }}"
34    CERT_DIR: "{{ etcd_certs_dir }}"
35    CERT_GROUP: "{{ etcd_cert_group }}"
36    
37- name: slurp etcd certs
38  slurp:
39    src: "{{ item }}"
40  register: pki_certs
41  run_once: true
42  delegate_to: "{{ groups['etcd-nodes'][0] }}"
43  with_items:
44    - "{{ etcd_ca_file }}"
45    - "{{ etcd_cert_file }}"
46    - "{{ etcd_key_file }}"
47    - "{{ etcd_peer_ca_file }}"
48    - "{{ etcd_peer_cert_file }}"
49    - "{{ etcd_peer_key_file }}"
50    - "{{ etcd_client_cert_file }}"
51    - "{{ etcd_client_key_file }}"
52    
53- name: copy etcd certs to other etcd servers
54  copy:
55    dest: "{{ item.item }}"
56    content: "{{ item.content | b64decode }}"
57    owner: etcd
58    group: "{{ etcd_cert_group }}"
59    mode: 0400
60  with_items: "{{ pki_certs.results }}"
61  when: inventory_hostname != groups['etcd-nodes'][0]
62
63{% endraw %}

5.systemd和配置

 1{% raw %}
 2---
 3
 4- name: create etcd systemd unit file
 5  template: 
 6    src: etcd.service.j2
 7    dest: /etc/systemd/system/etcd.service
 8    
 9- name: create etcd env conf
10  template: 
11    src: etcd.conf.j2
12    dest: /etc/etcd/etcd.conf
13    owner: etcd
14    group: etcd
15    mode: 0540
16{% endraw %}

6.启动etcd

 1{% raw %}
 2---
 3
 4- name: start etcd
 5  systemd:
 6    name: etcd
 7    daemon_reload: yes
 8    state: started
 9    enabled: yes
10
11- name: restart etcd
12  systemd:
13    name: etcd
14    state: restarted
15
16{% endraw %}

7.查看集群状态

检查集群是否健康,在任一节点执行:

 1etcdctl \
 2  --ca-file=/etc/etcd/ssl/ca.crt \
 3  --cert-file=/etc/etcd/ssl/client.crt \
 4  --key-file=/etc/etcd/ssl/client.key \
 5  --endpoints=https://node1:2379,https://node2:2379,https://node3:2379 \
 6  cluster-health
 7
 8member 1e3da2bf674fd07 is healthy: got healthy result from https://192.168.61.11:2379
 9member 88548a72a2e9a749 is healthy: got healthy result from https://192.168.61.13:2379
10member c3bda13bf78ed2ab is healthy: got healthy result from https://192.168.61.12:2379
11cluster is healthy
 1etcdctl \
 2  --ca-file=/etc/etcd/ssl/ca.crt \
 3  --cert-file=/etc/etcd/ssl/client.crt \
 4  --key-file=/etc/etcd/ssl/client.key \
 5  --endpoints=https://node1:2379,https://node2:2379,https://node3:2379 \
 6  member list
 7
 81e3da2bf674fd07: name=node1 peerURLs=https://192.168.61.11:2380 clientURLs=https://192.168.61.11:2379 isLeader=false
 988548a72a2e9a749: name=node3 peerURLs=https://192.168.61.13:2380 clientURLs=https://192.168.61.13:2379 isLeader=false
10c3bda13bf78ed2ab: name=node2 peerURLs=https://192.168.61.12:2380 clientURLs=https://192.168.61.12:2379 isLeader=true

附录源码

  • 2018/01/08 更新
    • 当时写这篇文档是在初次使用ansible初始化我们的Kubernetes集群之后做的记录。这篇文档也是参考Kubernetes github库中的kubernetes/contrib/ansible/roles/etcd/,只是官方的ansible考虑的内容比较全面,而我们线上环境都是CentOS 7的主机,所以当时参考官方的ansible role,写了一个在CentOS 7上用ansible部署etcd的精简版。
    • 好多朋友问源码在哪儿,今天把它从我们ansible项目中剥离处理,放到了github上,地址是:https://github.com/erichll/ansible-etcd3