DataChannelV2 setup on new machine

# DataChannelV2 setup on new machine ## User creation #### >> Usage sh this_script.sh {username: str} - jobs - health ```bash= username=$1 if [ $# -eq 0 ]; then echo "No arguments provided." exit 1 fi sudo useradd -m -s /bin/bash $username sudo cp /etc/sudoers /etc/sudoers.old echo "" | sudo tee -a /etc/sudoers echo "$username ALL=(ALL) NOPASSWD: ALL" | sudo tee -a /etc/sudoers sudo apt install python3-venv gcc build-essential python3-dev -y echo "User $username created !!" ``` ## SSH Key add to github #### >> Usage sh this_script.sh {username:str} {machine-dns-name: str} - jobs - health ```bash= if [ "$#" -lt 2 ]; then echo "At least two arguments are required." exit 1 fi username=$1 machine_dns=$2 ssh_key_identifier="$username@$machine_dns" echo "Running command: ssh-keygen -N '' -f /home/$username/.ssh/id_rsa -C $ssh_key_identifier" sudo -H -u $username bash -c "ssh-keygen -N '' -f /home/$username/.ssh/id_rsa -C $ssh_key_identifier" echo "" sudo -H -u $username bash -c "cat /home/$username/.ssh/id_rsa.pub" echo "" read -p "Copy the above key and upload it in your github account GPG keys. Once this is done, press Enter." sudo -H -u $username bash -c "ssh-keyscan github.com >> /home/$username/.ssh/known_hosts" ``` ## Repo clone - DatChannelV2 - DataChannelScripts *THIS IS IMPORTANT FOR env vars deployment - DataChannelHealth *IMPORTANT - DataChannelTransform - DataChannelDBT - DataChannelDBTCloud #### >> Usage sh this_script.sh {username: str} {repo_name: str} ```bash= if [ "$#" -lt 2 ]; then echo "At least two arguments are required." exit 1 fi username=$1 repo_name=$2 sudo -H -u $username bash -c "git clone git@github.com:DataChannelTechnologies/$repo_name.git /home/$username/$repo_name" sudo -H -u $username bash -c "python3 -m venv /home/$username/$repo_name/.venv" sudo -H -u $username bash -c "/home/$username/$repo_name/.venv/bin/pip install --upgrade pip wheel setuptools" ``` ## Install initial requirements.txt #### >> Usage sh this_script.sh {username: str} {repo_name: str} ```bash= if [ "$#" -lt 2 ]; then echo "At least two arguments are required." exit 1 fi username=$1 repo_name=$2 sudo -H -u $username bash -c "/home/$username/$repo_name/.venv/bin/pip install -r /home/$username/$repo_name/requirements.txt" sudo -H -u $username bash -c "/home/$username/$repo_name/.venv/bin/pip install -e /home/$username/$repo_name/" ``` > machine-health.datachannel.co > health's public key add to new machine ~/.ssh/authorized_keys > DataChannelScripts/.env has to be procurred manually > DataChannelHealth/.env has to be procurred manually > ssh gh-actions@new-machine should work!!! > Ensure CI/CD has run once ## Setup service files ## daemonize_worker_homegenized.py ```python= import os import subprocess import click @click.command() @click.option('--username', required=True, help='Username') @click.option('--autoscale', required=True, help='Autoscale') @click.option('--hostname', required=True, help='Hostname') @click.option('--repo-name', required=True, help='Repository name') @click.option('--worker-name', required=True, help='Worker name') @click.option('--queue-name', required=True, help='Queue name') @click.option('--repo-runner-name', required=True, help='Name of the runner in the repo, e.g. process_job') def main(username, autoscale, hostname, repo_name, worker_name, queue_name, repo_runner_name): # Content for /etc/default/{worker_name} workerd_opts = f"""\ WORKERD_OPTS=" --pidfile=/tmp/{worker_name}.pid --logfile=/home/{username}/.logs/{repo_name}/{worker_name}.log --autoscale={autoscale} --queues={queue_name} --hostname={hostname}" REPO_DIR="/home/{username}/{repo_name}" """ # Content for /etc/systemd/system/{worker_name}.service worker_service_content = f"""\ [Unit] Description=DataChannel {worker_name.title()} Service After=network.target Wants=network.target [Service] Type=forking User={username} Group={username} Restart=always EnvironmentFile=/etc/default/{worker_name} EnvironmentFile=/home/{username}/{repo_name}/.env WorkingDirectory=/home/{username}/{repo_name} ExecStart=/bin/bash -c '${{REPO_DIR}}/.venv/bin/celery -A {repo_runner_name} multi start worker ${{WORKERD_OPTS}}' ExecStop=/bin/bash -c '${{REPO_DIR}}/.venv/bin/celery -A {repo_runner_name} multi stopwait worker ${{WORKERD_OPTS}}' ExecReload=/bin/bash -c '${{REPO_DIR}}/.venv/bin/celery -A {repo_runner_name} multi restart worker ${{WORKERD_OPTS}}' TimeoutStopSec=300 [Install] WantedBy=multi-user.target """ # Write content to files with open(f"/tmp/{worker_name}", "w") as f: f.write(workerd_opts) with open(f"/tmp/{worker_name}.service", "w") as f: f.write(worker_service_content) # Copy files to system directories using subprocess _cmd = f'sudo cp /tmp/{worker_name} /etc/default/{worker_name}' subprocess.check_output(_cmd.split()) _cmd = f'sudo cp /tmp/{worker_name}.service /etc/systemd/system/{worker_name}.service' subprocess.check_output(_cmd.split()) # Remove temporary files os.unlink(f"/tmp/{worker_name}") subprocess.check_output(["rm", f"/tmp/{worker_name}.service"]) # Enable and start the services subprocess.check_output(["sudo", "systemctl", "enable", f"{worker_name}.service"]) subprocess.check_output(["sudo", "systemctl", "start", f"{worker_name}.service"]) subprocess.check_output(["systemctl", "status", f"{worker_name}.service"]) if __name__ == "__main__": main() ``` ### User migration #### -- Also please update MetaAPI to do this automatically #### -- Add MetaAPI's public key to the new machines `jobs` user `authorized_keys` ```bash= # Create a user and also add it to group jobs sudo useradd -u 1801 -U -d /user_home/customer_success -s /bin/bash customer_success -G jobs ``` ### machineid file creation Add the machine entry in dc_admin.machine table and use that id in the this script ```bash= #!/bin/bash if [ "$#" -lt 2 ]; then echo "At least two arguments are required." exit 1 fi username=$1 machine_id=$2 # Create machine id file denoting particular machine's dc_admin.machine.id sudo -H -u $username bash -c "echo $machine_id | sudo tee /home/$username/machine_id" ``` ## Mount network directory ```bash= sudo apt install nfs-common sudo mkdir /user_home sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport cloud-path-1.amazonaws.com:/ /user_home ``` ### Install machine level dependencies: unixodbc-dev ```bash= #!/bin/bash curl https://packages.microsoft.com/keys/microsoft.asc | sudo tee /etc/apt/trusted.gpg.d/microsoft.asc curl https://packages.microsoft.com/config/ubuntu/$(lsb_release -rs)/prod.list | sudo tee /etc/apt/sources.list.d/mssql-release.list sudo apt-get update sudo ACCEPT_EULA=Y apt-get install -y msodbcsql18 sudo apt-get install -y unixodbc-dev ``` ## /etc/fstab ```text= LABEL=cloudimg-rootfs / ext4 discard,errors=remount-ro 0 1 LABEL=UEFI /boot/efi vfat umask=0077 0 1 foo.amazon.com:/ /user_home nfs4 nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport,_netdev 0 0 ``` ## Check errors ```bash= sudo findmnt --verify --verbose ``` ## DEPRECATED make_enable_swap ```bash= #!/bin/bash # Get the total amount of RAM installed total_ram=$(grep MemTotal /proc/meminfo | awk '{print $2}') # Calculate the size of the swap file (same as RAM size) swap_size=$total_ram # Define the swap file name based on swap size (e.g., 30gb.swp) swap_file_name="${swap_size}kb.swp" # Define the path of the swap file swap_file_path="/mnt/$swap_file_name" # Create a swap file sudo fallocate -l ${swap_size}K $swap_file_path # Set appropriate permissions sudo chmod 600 $swap_file_path # Make it a swap space sudo mkswap $swap_file_path # Activate the swap file sudo swapon $swap_file_path # Add an entry to /etc/fstab to make the swap file persistent echo "$swap_file_path none swap sw 0 0" | sudo tee -a /etc/fstab # Display the new swap space information echo "Swap file of ${swap_size}K created and activated at $swap_file_path." free -h ``` ## add_aliases.py ```python= import click import os @click.command() @click.option('--username', required=True, help='Username') def append_to_bash_aliases(username): # Construct the path to the user's .bash_aliases file bash_aliases_path = f"/home/{username}/.bash_aliases" # Lines to append to the file lines = """ alias gpl='git pull -v' alias gc='git checkout' alias gs='git status' alias gf='git fetch -v' alias gb='git branch -l' alias pi='pip install' alias gd='git diff' alias gl='git log' alias gr='git remote -v' tll() { ls -ltr /user_home/*/*/ForwardETL/*/"$1"/logs/*trace*; } tllr() { ls -ltr /user_home/*/*/ReverseETL/*/"$1"/logs/"$1"*trace*; } tllt() { ls -ltr /user_home/*/*/DataChannelTransformation/"$1"/logs/*.log; } tllo() { ls -ltr /user_home/*/*/Orchestration/"$1"/logs/*trace*; } tlldbt() { ls -ltr /user_home/*/*/DBT/Run/"$1"/logs/*.log; } """ # Append lines to the file with open(bash_aliases_path, 'a') as file: file.write(lines) print("Lines appended to .bash_aliases successfully!") if __name__ == "__main__": append_to_bash_aliases() ``` ## Maintenance ops ### Crontab ```bash= 30 */4 * * * /home/jobs/DataChannelScripts/.venv/bin/python /home/jobs/DataChannelScripts/rm_tmp_older_files.py --dry-run=False --n-min=2880 >> /home/jobs/DataChannelScripts/cron_rm_tmp_older_files.out 2>&1 ```