# Ansible https://github.com/RyaxTech/ansible-hpc-cluster ~~~ --- - name: Deploy Slurm Cluster hosts: all become: yes vars: control_node: hpc1 compute_nodes: hpc2 slurm_version: 23.11.4 munge_key_path: /etc/munge/munge.key tasks: - name: Install common dependencies apt: name: "{{ item }}" state: present loop: - wget - build-essential - libmunge-dev - nfs-common - name: Install Munge on all nodes apt: name: munge state: present - name: Generate munge key on control node command: /usr/sbin/create-munge-key -f when: inventory_hostname == control_node notify: Restart Munge - name: Copy munge key to compute nodes ansible.builtin.copy: src: "{{ munge_key_path }}" dest: "{{ munge_key_path }}" owner: munge group: munge mode: '0400' when: inventory_hostname != control_node - name: Configure Control Node hosts: hpc1 tasks: - name: Install Slurm Controller apt: name: slurmctld state: present - name: Create Slurm config template: src: slurm.conf.j2 dest: /etc/slurm/slurm.conf notify: Restart Slurm Controller - name: Start NFS server service: name: nfs-kernel-server state: started enabled: yes - name: Configure Compute Nodes hosts: all tasks: - name: Mount NFS share mount: path: /etc/slurm src: "{{ control_node }}:/etc/slurm" fstype: nfs state: mounted - name: Install Slurm Compute Daemon apt: name: slurmd state: present - name: Start Slurmd service: name: slurmd state: restarted enabled: yes handlers: - name: Restart Munge service: name: munge state: restarted - name: Restart Slurm Controller service: name: slurmctld state: restarted ~~~