Skip to content
3 changes: 3 additions & 0 deletions defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,9 @@ rke2_etcd_snapshot_destination_dir: "{{ rke2_data_path }}/server/db/snapshots"
rke2_snapshotter: "{{ rke2_snapshooter }}"
rke2_snapshooter: overlayfs # legacy variable that only exists to keep backward compatibility with previous configurations

# when doing restore allow cleanup of old nodes secrets and remove not existing nodes
rke2_cleanup_on_restore: false
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please add this new variable also to the README.md file and to the argument_specs.yml


# Deploy RKE2 with default CNI canal
rke2_cni: [canal]

Expand Down
38 changes: 0 additions & 38 deletions tasks/first_server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -152,44 +152,6 @@
- not ansible_check_mode
- rke2_cni != 'none'

- name: Restore etcd
when: do_etcd_restore is defined or do_etcd_restore_from_s3 is defined
block:
- name: Get registered nodes
ansible.builtin.shell:
cmd: |
set -o pipefail
{{ rke2_data_path }}/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml \
get nodes --no-headers | awk '{print $1}'
args:
executable: /bin/bash
changed_when: false
register: registered_node_names

- name: Get all node names
ansible.builtin.set_fact:
node_names: "{{ hostvars | dict2items | map(attribute='value.rke2_node_name') }}"
run_once: true
register: node_names

- name: Remove old <node>.node-password.rke2 secrets
ansible.builtin.shell: |
{{ rke2_data_path }}/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml \
delete secret {{ item }}.node-password.rke2 -n kube-system 2>&1 || true
args:
executable: /bin/bash
with_items: "{{ registered_node_names.stdout_lines | difference(node_names) }}"
changed_when: false

- name: Remove old nodes
ansible.builtin.shell: |
{{ rke2_data_path }}/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml \
delete node {{ item }} 2>&1 || true
args:
executable: /bin/bash
with_items: "{{ registered_node_names.stdout_lines | difference(node_names) }}"
changed_when: false

- name: Set an Active Server variable
ansible.builtin.set_fact:
active_server: "{{ inventory_hostname }}"
Expand Down
15 changes: 12 additions & 3 deletions tasks/first_server_restore.yml
Copy link

@michalg91 michalg91 Jul 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should not delete this file and put here logic with deleting secrets and diffed nodes like in previous deleted parts, and execute it if specific variables are set. This will cover all the cases like before in 1.37.0 Restoring cluster with same node names, restoring cluster with efemeral node names etc.

Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,22 @@
args:
executable: /bin/bash
changed_when: false
register: node_names
register: registered_node_names

- name: Restore etcd - remove old nodes
- name: Restore etcd - cleanup <node>.node-password.rke2 secrets
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ansible.builtin.shell: |
{{ rke2_data_path }}/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml \
delete secret {{ item }}.node-password.rke2 -n kube-system 2>&1 || true
args:
executable: /bin/bash
with_items: "{{ registered_node_names.stdout_lines }}"
when: item != rke2_node_name

- name: Restore etcd - remove old (not existing) nodes
ansible.builtin.shell: |
{{ rke2_data_path }}/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml \
delete node {{ item }} 2>&1 || true
args:
executable: /bin/bash
with_items: "{{ node_names.stdout_lines | difference(groups[rke2_cluster_group_name]) }}"
with_items: "{{ registered_node_names.stdout_lines | difference(groups[rke2_cluster_group_name]) }}"
changed_when: false
3 changes: 2 additions & 1 deletion tasks/main.yml
Copy link

@michalg91 michalg91 Jul 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this task should stay and should be triggered if another switch exists eg. rke2_cleanup_secrets. This file is only triggered on fresh unprovisioned and just restored cluster.

Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,13 @@
- inventory_hostname == groups[rke2_servers_group_name].0
- active_server is not defined or groups[rke2_cluster_group_name] | length | int == 1

- name: Restore etcd specific tasks
- name: Run cleanup tasks after etcd restore
ansible.builtin.include_tasks: first_server_restore.yml
when:
- active_server is defined
- inventory_hostname == active_server or inventory_hostname == groups[rke2_servers_group_name].0
- do_etcd_restore is defined or do_etcd_restore_from_s3 is defined
- rke2_cleanup_on_restore

- name: Download kubeconfig to ansible localhost
ansible.builtin.include_tasks: download_kubeconfig.yaml
Expand Down
40 changes: 40 additions & 0 deletions tasks/remaining_nodes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,46 @@
when: (rke2_custom_registry_mirrors | length > 0 or rke2_custom_registry_configs | length > 0)
notify: "Config file changed"

- name: Get current nodes secrets
delegate_to: "{{ active_server }}"
run_once: true
block:
- name: Get list of existing node secrets
ansible.builtin.shell: |
set -o pipefail
"{{ rke2_data_path }}/bin/kubectl" --kubeconfig /etc/rancher/rke2/rke2.yaml \
get secrets -n kube-system -o jsonpath="{.items[*].metadata.name}" | tr ' ' '\n' | grep -E 'node-password\.rke2$' | sed 's/\.node-password\.rke2//g'
args:
executable: /bin/bash
register: nodes_with_passwords # A node name on each line
changed_when: false
- name: Set fact for existing node passwords
ansible.builtin.set_fact:
nodes_with_existing_passwords: "{{ nodes_with_passwords.stdout_lines }}"


- name: Validate presence of node password file when secret exists
block:
- name: Register if rke2 password file exists
ansible.builtin.stat:
path: /etc/rancher/node/password
register: node_password_file

- name: Fail if the cluster already has a <hostname>.node-password.rke2 secret and the node doesn't have a password file
ansible.builtin.fail:
msg: |
The node password secret already exists for node name {{ rke2_node_name }}, but no password file exists in /etc/rancher/node/password!
The node will not be able to join the cluster with this node name without a password file matching the secret.
This can happen for a few reasons:
- The node was previously part of the cluster and RKE2 was removed without running `kubectl delete node {{ rke2_node_name }}`.
- The cluster etcd was restored from a backup from before the node was correctly removed from the cluster.
To join this node, please recreate the file with the password, use a different node name (rke2_node_name), or remove the secret from the cluster using:
kubectl delete secret {{ rke2_node_name}}.node-password.rke2 -n kube-system
when:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

- rke2_node_name in nodes_with_existing_passwords
- not node_password_file.stat.exists

- name: Start RKE2 service on the rest of the nodes
ansible.builtin.systemd:
name: "{{ rke2_service_name }}"
Expand Down
Loading