#!/bin/bash

# Main entry point to be called from training / processing jobs or inference endpoint by SageMaker.
# It will be either called from setup.py automatically when installing the 'sagemaker-ssh-helper' package
# from 'requirements.txt' and 'bootstrap_on_start' parameter was passed to the wrapper, or manually
# from training / processing / inference script, e. g. with subprocess.check_call()

# This script can be called simultaneously multiple times in a distributed training or inference job
# To avoid race conditions, we install helper scripts under an exclusive lock
if [[ "$1" == "install-helper-scripts" ]]; then
  dir=$(dirname "$0")
  source "$dir"/sm-helper-functions

  _install_helper_scripts
  exit 0
fi

set -e

if [[ "$1" == "start-ssh" ]]; then
  dir=$(dirname "$0")
  source "$dir"/sm-helper-functions

  # Log IP addresses of the container (useful only in training in combination with VPC + VPN)
  echo "SSH Helper Log IP: $(hostname -I)"

  chmod 1777 /tmp
  mkdir -p ~/.ssh

  # Install SSH (if using MPI, already installed)
  if _is_centos; then
    yum install -y openssh-server
  else
    export DEBIAN_FRONTEND=noninteractive
    apt-get update
    apt-get install -y --no-install-recommends openssh-server
  fi

  # Save and dump SageMaker environment for SSH sessions
  sm-save-env

  # Dump container bootstrap environment (PID 1) - can be different from above, useful for debugging
  ps wwwe -p 1 | tail -1

  sed -i -e 's~^ClientAliveInterval~#ClientAliveInterval~' /etc/ssh/sshd_config
  echo "ClientAliveInterval 15" >> /etc/ssh/sshd_config

  sed -i -e 's~^PermitRootLogin~#PermitRootLogin~' /etc/ssh/sshd_config
  echo PermitRootLogin yes >> /etc/ssh/sshd_config

  sed -i -e 's~^AuthorizedKeysFile~#AuthorizedKeysFile~' /etc/ssh/sshd_config
  echo "AuthorizedKeysFile /etc/ssh/authorized_keys" >> /etc/ssh/sshd_config

  # Start SSH server
  if _is_centos; then
    # NOTE: systemctl will not work in CentOS SageMaker container (e.g. Spark processing) because lack of
    # privileges to access DBUS, so we run sshd manually. This command doesn't work:
    # # service sshd start || (echo "ERROR: Failed to start sshd service" && exit 255)
    [[ -f /etc/ssh/ssh_host_rsa_key ]] || (echo "Generating new SSH keys" && ssh-keygen -A)
    /usr/sbin/sshd
  else
    service ssh start || (echo "ERROR: Failed to start ssh service" && exit 255)
  fi

  sm-init-ssm

  # Running forever as daemon
  amazon-ssm-agent

  echo "ERROR: agent died"
  exit 1  # should never reach this line
fi

flock /tmp/sm-install-lock bash "$0" install-helper-scripts \
  | sed -u 's/^/[sagemaker-ssh-helper][sm-setup-ssh] /'

# nohup will detach the child process from parent and run it in background
# flock prevents from starting more than 1 process
# redirection to /proc/1/fd/1 will write logs to CloudWatch
# sed will prepend log output with SSH Helper prefix
if [[ ! -f /tmp/sm-start-ssh-lock ]]; then
  if [[ "$SSH_LOG_TO_STDOUT" == "true" ]]; then
    flock -n /tmp/sm-start-ssh-lock bash "$0" start-ssh &
  else
    nohup flock -n /tmp/sm-start-ssh-lock \
      bash "$0" start-ssh 2>&1 \
        | sed -u 's/^/[sagemaker-ssh-helper][sm-start-ssh] /' \
        >/proc/1/fd/1 2>&1 &
  fi
fi

sm-wait "${SSH_WAIT_TIME_SECONDS:-60}" \
  | sed -u 's/^/[sagemaker-ssh-helper][sm-setup-ssh] /'