Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,6 @@ localtweak__*.tf

# tests folder log file
*.log

# Ignore RSA files
*id_rsa
18 changes: 18 additions & 0 deletions datasource.tf
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,22 @@ data "ibm_is_subnet" "existing_client_subnets" {
data "ibm_is_subnet" "existing_bastion_subnets" {
count = var.vpc != null && var.bastion_subnets != null ? 1 : 0
name = var.bastion_subnets[count.index]
}

# New Code
data "ibm_is_instance_profile" "dynmaic_worker_profile" {
name = var.dynamic_compute_instances[0].profile
}

data "ibm_is_image" "dynamic_compute" {
name = var.dynamic_compute_instances[0].image
}

data "ibm_is_ssh_key" "compute_ssh_keys" {
for_each = toset(local.compute_ssh_keys)
name = each.key
}

data "ibm_is_subnet" "compute_subnet_crn" {
identifier = local.compute_subnet_id
}
14 changes: 13 additions & 1 deletion locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ locals {
storage_private_key_path = var.enable_bastion ? "${path.root}/../../modules/ansible-roles/storage_id_rsa" : "${path.root}/modules/ansible-roles/storage_id_rsa" #checkov:skip=CKV_SECRET_6
compute_playbook_path = var.enable_bastion ? "${path.root}/../../modules/ansible-roles/compute_ssh.yaml" : "${path.root}/modules/ansible-roles/compute_ssh.yaml"
storage_playbook_path = var.enable_bastion ? "${path.root}/../../modules/ansible-roles/storage_ssh.yaml" : "${path.root}/modules/ansible-roles/storage_ssh.yaml"
playbooks_root_path = var.enable_bastion ? "${path.root}/../../modules/ansible-roles" : "${path.root}/modules/ansible-roles"
}

# file Share OutPut
Expand All @@ -257,6 +258,17 @@ locals {
nfs_install_dir = "none"
Enable_Monitoring = false
lsf_deployer_hostname = var.deployer_hostname #data.external.get_hostname.result.name #var.enable_bastion ? "" : flatten(module.deployer.deployer_vsi_data[*].list)[0].name
vcpus = var.enable_deployer ? 0 : tonumber(data.ibm_is_instance_profile.dynmaic_worker_profile.vcpu_count[0].value)
ncores = var.enable_deployer ? 0 : tonumber(local.vcpus / 2)
ncpus = var.enable_deployer ? 0 : tonumber(var.enable_hyperthreading ? local.vcpus : local.ncores)
memInMB = var.enable_deployer ? 0 : tonumber(data.ibm_is_instance_profile.dynmaic_worker_profile.memory[0].value) * 1024
rc_maxNum = var.enable_deployer ? 0 : tonumber(var.dynamic_compute_instances[0].count)
rc_profile = var.enable_deployer ? "" : var.dynamic_compute_instances[0].profile
imageID = var.enable_deployer ? "" : data.ibm_is_image.dynamic_compute.id
compute_subnets_cidr = var.compute_subnets_cidr
dynamic_compute_instances = var.dynamic_compute_instances
compute_subnet_crn = data.ibm_is_subnet.compute_subnet_crn.crn
compute_ssh_keys_ids = [for name in local.compute_ssh_keys : data.ibm_is_ssh_key.compute_ssh_keys[name].id]
}

locals {
Expand All @@ -266,7 +278,7 @@ locals {
remote_terraform_path = format("%s/terraform-ibm-hpc", local.deployer_path)
remote_ansible_path = format("%s/terraform-ibm-hpc", local.deployer_path)
da_hpc_repo_url = "https://github.com/terraform-ibm-modules/terraform-ibm-hpc.git"
da_hpc_repo_tag = "develop" ###### change it to main in future
da_hpc_repo_tag = "latest_code_anand" ###### change it to main in future
zones = jsonencode(var.zones)
list_compute_ssh_keys = jsonencode(local.compute_ssh_keys)
list_storage_ssh_keys = jsonencode(local.storage_ssh_keys)
Expand Down
58 changes: 49 additions & 9 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,22 @@ resource "local_sensitive_file" "prepare_tf_input" {
"compute_private_key_content": ${local.compute_private_key_content},
"bastion_security_group_id": "${local.bastion_security_group_id}",
"deployer_hostname": "${local.deployer_hostname}",
"deployer_ip": "${local.deployer_ip}"
"deployer_ip": "${local.deployer_ip}",
"enable_hyperthreading": ${var.enable_hyperthreading},
"vcpus": ${local.vcpus},
"ncores": ${local.ncores},
"ncpus": ${local.ncpus},
"memInMB": ${local.memInMB},
"rc_maxNum": ${local.rc_maxNum},
"rc_profile": "${local.rc_profile}",
"imageID": "${local.imageID}",
"compute_subnet_id": "${local.compute_subnet_id}",
"region": "${local.region}",
"resource_group_id": "${local.resource_group_ids["service_rg"]}",
"compute_subnets_cidr": ${jsonencode(local.compute_subnets_cidr)},
"dynamic_compute_instances": ${jsonencode(local.dynamic_compute_instances)},
"compute_ssh_keys_ids": ${jsonencode(local.compute_ssh_keys_ids)},
"compute_subnet_crn": ${jsonencode(local.compute_subnet_crn)}
}
EOT
filename = local.schematics_inputs_path
Expand Down Expand Up @@ -274,6 +289,30 @@ module "write_compute_cluster_inventory" {
nfs_install_dir = local.nfs_install_dir
Enable_Monitoring = local.Enable_Monitoring
lsf_deployer_hostname = local.lsf_deployer_hostname
# New Input
dns_domain_names = var.dns_domain_names["compute"]
compute_public_key_content = var.compute_public_key_content
compute_private_key_content = var.compute_private_key_content
# Other Code
enable_hyperthreading = var.enable_hyperthreading
ibmcloud_api_key = var.ibmcloud_api_key
vpc_id = local.vpc_id
vcpus = local.vcpus
ncores = local.ncores
ncpus = local.ncpus
memInMB = local.memInMB
rc_maxNum = local.rc_maxNum
rc_profile = local.rc_profile
imageID = local.imageID
compute_subnet_id = local.compute_subnet_id
region = local.region
resource_group_id = local.resource_group_ids["service_rg"]
zones = var.zones
compute_subnets_cidr = local.compute_subnets_cidr
dynamic_compute_instances = local.dynamic_compute_instances
compute_security_group_id = local.compute_security_group_id
compute_ssh_keys_ids = local.compute_ssh_keys_ids
compute_subnet_crn = local.compute_subnet_crn
depends_on = [ time_sleep.wait_60_seconds ]
}

Expand Down Expand Up @@ -313,14 +352,15 @@ module "storage_inventory" {
}

module "compute_playbook" {
count = var.enable_deployer == false ? 1 : 0
source = "./modules/playbook"
bastion_fip = local.bastion_fip
private_key_path = local.compute_private_key_path
inventory_path = local.compute_inventory_path
playbook_path = local.compute_playbook_path
enable_bastion = var.enable_bastion
depends_on = [ module.compute_inventory ]
count = var.enable_deployer == false ? 1 : 0
source = "./modules/playbook"
bastion_fip = local.bastion_fip
private_key_path = local.compute_private_key_path
inventory_path = local.compute_inventory_path
playbook_path = local.compute_playbook_path
playbooks_root_path = local.playbooks_root_path
enable_bastion = var.enable_bastion
depends_on = [ module.compute_inventory ]
}

# module "storage_playbook" {
Expand Down
10 changes: 10 additions & 0 deletions modules/ansible-roles/roles/lsf_mgmt_config/handlers/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
- name: Restart lsfd service
service:
name: lsfd
state: restarted

- name: Restart NetworkManager
ansible.builtin.systemd:
name: NetworkManager
state: restarted
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
---
- name: Management Config Templates| Copy credentials
template:
src: "templates/credentials.j2"
dest: "{{ LSF_RC_IC_CONF }}/credentials"
mode: '0644'
run_once: true

- name: Management Config Templates | Copy ibmcloudgen2_config.json
template:
src: "templates/ibmcloudgen2_config.json.j2"
dest: "{{ LSF_RC_IC_CONF }}/ibmcloudgen2_config.json"
mode: '0644'
run_once: true

- name: Management Config Templates | Copy ibmcloudgen2_templates.json
template:
src: "templates/ibmcloudgen2_templates.json.j2"
dest: "{{ LSF_RC_IC_CONF }}/ibmcloudgen2_templates.json"
mode: '0644'
run_once: true

- name: Management Config Templates | Copy hostProviders.json
template:
src: "templates/hostProviders.json.j2"
dest: "{{ LSF_CONF_FILE_PATH }}/resource_connector/hostProviders.json"
mode: '0644'
run_once: true

- name: Management Config Templates | Copy user_data.sh
template:
src: "templates/user_data.sh"
dest: "{{ LSF_RC_IC_CONF }}/user_data.sh"
mode: '0644'
run_once: true
notify:
- Restart lsfd service
- Restart NetworkManager
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
---
- name: Management Config | Append LSF configuration settings
lineinfile:
path: "{{ LSF_CONF_FILE }}"
line: "{{ item }}"
create: yes
loop:
- "LSB_RC_EXTERNAL_HOST_IDLE_TIME=10"
- "LSF_DYNAMIC_HOST_WAIT_TIME=60"
- "LSF_DYNAMIC_HOST_TIMEOUT=\"EXPIRY[10m] THRESHOLD[250] INTERVAL[60m]\""
- "LSB_RC_EXTERNAL_HOST_FLAG=\"icgen2host\""
- "LSB_RC_UPDATE_INTERVAL=15"
- "LSB_RC_MAX_NEWDEMAND=50"
- "LSF_UDP_TO_TCP_THRESHOLD=9000"
- "LSF_CALL_LIM_WITH_TCP=Y"
- "LSF_ANNOUNCE_MASTER_TCP_WAITTIME=600"
- "LSF_RSH=\"ssh -o 'PasswordAuthentication no' -o 'StrictHostKeyChecking no'\""
run_once: true

- name: Management Config | Check if queue configuration already exists
shell: "grep -q '# ANSIBLE MANAGED: QUEUE_NAME added' '{{ LSF_LSBATCH_CONF }}/lsb.queues'"
register: queue_check
changed_when: false
failed_when: false

- name: Management Config | Append LSF queue configuration to lsb.queues
blockinfile:
path: "{{ LSF_LSBATCH_CONF }}/lsb.queues"
insertafter: EOF
block: |
# ANSIBLE MANAGED: QUEUE_NAME added
Begin Queue
QUEUE_NAME=das_q
DATA_TRANSFER=Y
RC_HOSTS=all
HOSTS=all
RES_REQ=type==any
End Queue
marker: ""
when: queue_check.rc != 0
run_once: true

- name: Management Config | Update LSF configuration files
block:
- name: Management Config | Uncomment "icgen2host" in lsf.shared
replace:
path: "{{ LSF_CONF_FILE_PATH }}/lsf.shared"
regexp: '^#\s*(icgen2host)'
replace: ' \1'

- name: Management Config | Uncomment "schmod_demand" in lsb.modules
replace:
path: "{{ LSF_LSBATCH_CONF }}/lsb.modules"
regexp: '^#\s*(schmod_demand)'
replace: '\1'

- name: Check if RC_HOSTS modification was already done
stat:
path: "/tmp/rc_hosts_added"
register: rc_hosts_marker

- name: Management Config | Add "RC_HOSTS = all" after QUEUE_NAME in lsb.queues using sed
shell: |
sed -i '/^Begin Queue$/,/^End Queue$/{/QUEUE_NAME/{N;s/\(QUEUE_NAME\s*=[^\n]*\)\n/\1\nRC_HOSTS = all\n/}}' "{{ LSF_LSBATCH_CONF }}/lsb.queues"
touch /tmp/rc_hosts_added
when: not rc_hosts_marker.stat.exists
run_once: true

- name: Management Config | Append management hostnames to lsb.hosts
vars:
management_hostnames: "{{ lsf_masters_list.split() }}"
lineinfile:
path: "{{ LSF_LSBATCH_CONF }}/lsb.hosts"
insertafter: "^default !.*"
line: "{{ item }} 0 () () () () () (Y)"
state: present
loop: "{{ lsf_masters }}"
run_once: true

- name: Management Config | Check if LSF_HOST_ADDR_RANGE is already set
shell: "grep -q '# ANSIBLE MANAGED: LSF_HOST_ADDR_RANGE added' '{{ LSF_CONF_FILE_PATH }}/lsf.cluster.{{ my_cluster_name }}'"
register: lsf_host_addr_range_marker_check
changed_when: false
failed_when: false

- name: Management Config | Append LSF_HOST_ADDR_RANGE to lsf.cluster
blockinfile:
path: "{{ LSF_CONF_FILE_PATH }}/lsf.cluster.{{ my_cluster_name }}"
block: |
# ANSIBLE MANAGED: LSF_HOST_ADDR_RANGE added
Begin Parameters
LSF_HOST_ADDR_RANGE=10.*.*.*
End Parameters
marker: ""
when: lsf_host_addr_range_marker_check.rc != 0
run_once: true

- name: Set permissions for Python directories
file:
path: "{{ item }}"
mode: "0755"
recurse: yes
loop:
- /usr/local/lib/python3.11
- /usr/local/lib64/python3.11
ignore_errors: yes
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
---

- name: Management Config | Check if IP-to-host mapping already exists
shell: "grep -q '# ANSIBLE MANAGED: IP mapping added' '{{ LSF_HOSTS_FILE }}'"
register: ip_mapping_check
changed_when: false
failed_when: false
run_once: true

- name: Management Config | Generate and append IP-to-host mapping to LSF hosts file
shell: |
echo "# ANSIBLE MANAGED: IP mapping added" >> '{{ LSF_HOSTS_FILE }}'
python3 -c "import ipaddress; \
print('\\n'.join([str(ip) + ' {{ my_cluster_name }}-' + str(ip).replace('.', '-') \
for ip in ipaddress.IPv4Network('{{ compute_subnets_cidr | first }}')]))" >> '{{ LSF_HOSTS_FILE }}'
args:
executable: /bin/bash
run_once: true
when: ip_mapping_check.rc != 0

- name: Get IP addresses using nslookup
shell: "nslookup {{ inventory_hostname }} | awk '/Address: / { print $2 }' | tail -n 1"
register: dns_ip
changed_when: false

- name: Store IPs for each host
set_fact:
host_ip: "{{ dns_ip.stdout }}"

- name: Aggregate all IPs from all hosts
set_fact:
all_ips: "{{ groups['all'] | map('extract', hostvars, 'host_ip') | list }}"
run_once: true

- name: Check if each IP exists in LSF hosts file
shell: "grep -w '{{ item }}' {{ LSF_HOSTS_FILE }} || true"
register: ip_check
loop: "{{ all_ips }}"
changed_when: false
run_once: true

- name: Remove matched IPs from LSF hosts file if they exist
lineinfile:
path: "{{ LSF_HOSTS_FILE }}"
state: absent
regexp: "^{{ item.item }}\\s"
loop: "{{ ip_check.results }}"
when: item.stdout | length > 0
run_once: true

- name: Copy the Hosts file to /etc/hosts
copy:
src: "{{ LSF_HOSTS_FILE }}"
dest: /etc/hosts
remote_src: yes
ignore_errors: yes
Loading