Skip to content

Commit 050e8cd

Browse files
hanwen-clusterhanwen-pcluste
authored andcommitted
Simplify logic around udev rules
ParallelCluster uses udev to trigger a Python script upon EBS attachment. Specifically, when it detects an attach from a device (e.g. /dev/xvdb), ParallelCluster udev rule creates a symbolic link from `/dev/disk/by-ebs-volumeid` (e.g. `/dev/disk/by-ebs-volumeid/vol-123456`) to the device name (/dev/xvdb). Then, our cookbook checks the device under `/dev/disk/by-ebs-volumeid` is ready. Prior to this commit, ParallelCluster udev script used boto3 calls to retrieve volume id from device name. Seems starting from RHEL 9, the scripts triggered by udev no long have network access for security reasons. Therefore, this commit removes the boto3 calls and get the volume id from `/dev/disk/by-ebs-volumeid/parallelcluster_dev_id_mapping`, which is a file ParallelCluster cookbook writes to. Although the logic could be further simplified by not using the udev rule at all, this commit takes the first step of improvement without spending too much time to reach the absolute simplicity. Moreover, prior to this commit, a service was created to trigger `udevadm trigger`. `udevadm trigger` is used to refresh udev system after udev rules changes. It is unnecessary to have a service trigger the command. Therefore, this commit deletes the service and triggers the command once after the rules changes. Signed-off-by: Hanwen <[email protected]>
1 parent d586522 commit 050e8cd

File tree

13 files changed

+28
-376
lines changed

13 files changed

+28
-376
lines changed

cookbooks/aws-parallelcluster-environment/files/default/ec2_udev_rules/ec2_dev_2_volid.py

Lines changed: 8 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,11 @@
11
# FIXME: Fix Code Duplication
22
# pylint: disable=R0801
33

4-
import configparser
4+
import json
55
import os
66
import re
77
import sys
88
import syslog
9-
import time
10-
11-
import boto3
12-
import requests
13-
from botocore.config import Config
14-
15-
METADATA_REQUEST_TIMEOUT = 60
16-
17-
18-
def get_imdsv2_token():
19-
# Try with getting IMDSv2 token, fall back to IMDSv1 if can not get the token
20-
token = requests.put(
21-
"http://169.254.169.254/latest/api/token",
22-
headers={"X-aws-ec2-metadata-token-ttl-seconds": "300"},
23-
timeout=METADATA_REQUEST_TIMEOUT,
24-
)
25-
headers = {}
26-
if token.status_code == requests.codes.ok:
27-
headers["X-aws-ec2-metadata-token"] = token.content
28-
return headers
299

3010

3111
def validate_device_name(device_name):
@@ -66,47 +46,6 @@ def adapt_device_name(dev):
6646
return dev
6747

6848

69-
def parse_proxy_config():
70-
config = configparser.RawConfigParser()
71-
config.read("/etc/boto.cfg")
72-
proxy_config = Config()
73-
if config.has_option("Boto", "proxy") and config.has_option("Boto", "proxy_port"):
74-
proxy = config.get("Boto", "proxy")
75-
proxy_port = config.get("Boto", "proxy_port")
76-
proxy_config = Config(proxies={"https": f"{proxy}:{proxy_port}"})
77-
return proxy_config
78-
79-
80-
def get_device_volume_id(ec2, dev, instance_id):
81-
# Poll for blockdevicemapping
82-
devices = ec2.describe_instance_attribute(InstanceId=instance_id, Attribute="blockDeviceMapping").get(
83-
"BlockDeviceMappings"
84-
)
85-
dev_map = dict((d.get("DeviceName"), d) for d in devices)
86-
loop_count = 0
87-
while dev not in dev_map:
88-
if loop_count == 36:
89-
syslog.syslog(f"Dev {dev} did not appears in 180 seconds.")
90-
sys.exit(1)
91-
syslog.syslog(f"Looking for dev {dev} in dev_map {dev_map}")
92-
time.sleep(5)
93-
devices = ec2.describe_instance_attribute(InstanceId=instance_id, Attribute="blockDeviceMapping").get(
94-
"BlockDeviceMappings"
95-
)
96-
dev_map = dict((d.get("DeviceName"), d) for d in devices)
97-
loop_count += 1
98-
99-
return dev_map.get(dev).get("Ebs").get("VolumeId")
100-
101-
102-
def get_metadata_value(token, metadata_path):
103-
return requests.get(
104-
metadata_path,
105-
headers=token,
106-
timeout=METADATA_REQUEST_TIMEOUT,
107-
).text
108-
109-
11049
def main():
11150
syslog.syslog("Starting ec2_dev_2_volid.py script")
11251
try:
@@ -115,29 +54,14 @@ def main():
11554
syslog.syslog(f"Input block device is {dev}")
11655
except IndexError:
11756
syslog.syslog(syslog.LOG_ERR, "Provide block device i.e. xvdf")
118-
11957
dev = adapt_device_name(dev)
120-
121-
token = get_imdsv2_token()
122-
123-
instance_id = get_metadata_value(token, "http://169.254.169.254/latest/meta-data/instance-id")
124-
125-
region = get_metadata_value(token, "http://169.254.169.254/latest/meta-data/placement/availability-zone")
126-
region = region[:-1]
127-
128-
proxy_config = parse_proxy_config()
129-
130-
# Configure the AWS CA bundle.
131-
# In US isolated regions the dedicated CA bundle will be used.
132-
# In any other region, the default bundle will be used (None stands for the default settings).
133-
# Note: We want to apply a more general solution that applies to every region,
134-
# but for the time being this is enough to support US isolated regions without
135-
# impacting the other ones.
136-
ca_bundle = f"/etc/pki/{region}/certs/ca-bundle.pem" if region.startswith("us-iso") else None
137-
138-
ec2 = boto3.client("ec2", region_name=region, config=proxy_config, verify=ca_bundle)
139-
140-
volume_id = get_device_volume_id(ec2, dev, instance_id)
58+
mapping_file_path = "/dev/disk/by-ebs-volumeid/parallelcluster_dev_id_mapping"
59+
if os.path.isfile(mapping_file_path):
60+
with open(mapping_file_path, "r", encoding="utf-8") as mapping_file:
61+
mapping = json.load(mapping_file)
62+
else:
63+
mapping = {}
64+
volume_id = mapping.get(dev)
14165
print(volume_id)
14266

14367

cookbooks/aws-parallelcluster-environment/files/default/ec2_udev_rules/ec2blkdev-init

Lines changed: 0 additions & 46 deletions
This file was deleted.

cookbooks/aws-parallelcluster-environment/files/default/ec2_udev_rules/manageVolume.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import argparse
99
import configparser
10+
import json
1011
import os
1112
import re
1213
import subprocess # nosec B404
@@ -133,6 +134,17 @@ def attach_volume(volume_id, instance_id, ec2):
133134
dev = available_devices[0]
134135
response = ec2.attach_volume(VolumeId=volume_id, InstanceId=instance_id, Device=dev)
135136

137+
mapping_file_path = "/dev/disk/by-ebs-volumeid/parallelcluster_dev_id_mapping"
138+
if os.path.isfile(mapping_file_path):
139+
with open(mapping_file_path, "r", encoding="utf-8") as mapping_file:
140+
mapping = json.load(mapping_file)
141+
else:
142+
mapping = {}
143+
mapping[dev] = volume_id
144+
os.makedirs(os.path.dirname(mapping_file_path), exist_ok=True)
145+
with open(mapping_file_path, "w", encoding="utf-8") as mapping_file:
146+
json.dump(mapping, mapping_file)
147+
136148
# Poll for volume to attach
137149
state = response.get("State")
138150
delay = 5 # seconds

cookbooks/aws-parallelcluster-environment/files/ubuntu/ec2_udev_rules/ec2blkdev-init

Lines changed: 0 additions & 46 deletions
This file was deleted.

cookbooks/aws-parallelcluster-environment/files/ubuntu/ec2_udev_rules/udev-override.conf

Lines changed: 0 additions & 2 deletions
This file was deleted.

cookbooks/aws-parallelcluster-environment/resources/ec2_udev_rules/ec2_udev_rules_ubuntu20+.rb

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,10 @@
1818

1919
unified_mode true
2020
use 'partial/_common_udev_configuration'
21-
use 'partial/_debian_udev_configuration'
2221

2322
default_action :setup
2423

2524
action :setup do
2625
action_create_common_udev_files
27-
action_set_udev_autoreload
2826
action_start_ec2blk
2927
end

cookbooks/aws-parallelcluster-environment/resources/ec2_udev_rules/partial/_common_udev_configuration.rb

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,6 @@
4747
mode '0744'
4848
end
4949

50-
cookbook_file 'ec2blkdev-init' do
51-
source 'ec2_udev_rules/ec2blkdev-init'
52-
cookbook 'aws-parallelcluster-environment'
53-
path '/etc/init.d/ec2blkdev'
54-
user 'root'
55-
group 'root'
56-
mode '0744'
57-
end
58-
5950
cookbook_file 'manageVolume.py' do
6051
source 'ec2_udev_rules/manageVolume.py'
6152
cookbook 'aws-parallelcluster-environment'
@@ -67,8 +58,7 @@
6758
end
6859

6960
action :start_ec2blk do
70-
service "ec2blkdev" do
71-
supports restart: true
72-
action %i(enable start)
61+
execute "Refresh UdevAdmin" do
62+
command "udevadm trigger --action=change --subsystem-match=block"
7363
end unless on_docker?
7464
end

cookbooks/aws-parallelcluster-environment/resources/ec2_udev_rules/partial/_debian_udev_configuration.rb

Lines changed: 0 additions & 37 deletions
This file was deleted.

cookbooks/aws-parallelcluster-environment/spec/unit/resources/ec2_udev_rules_spec.rb

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -46,45 +46,13 @@ def self.setup(chef_run)
4646
.with(group: 'root')
4747
.with(mode: '0744')
4848

49-
is_expected.to create_cookbook_file('ec2blkdev-init')
50-
.with(source: 'ec2_udev_rules/ec2blkdev-init')
51-
.with(path: '/etc/init.d/ec2blkdev')
52-
.with(user: 'root')
53-
.with(group: 'root')
54-
.with(mode: '0744')
55-
5649
is_expected.to create_cookbook_file('manageVolume.py')
5750
.with(source: 'ec2_udev_rules/manageVolume.py')
5851
.with(path: '/usr/local/sbin/manageVolume.py')
5952
.with(user: 'root')
6053
.with(group: 'root')
6154
.with(mode: '0755')
6255
end
63-
64-
if platform == 'ubuntu'
65-
it 'sets udev autoreload' do
66-
is_expected.to nothing_execute('udev-daemon-reload')
67-
.with(command: 'udevadm control --reload')
68-
69-
is_expected.to create_directory('/etc/systemd/system/systemd-udevd.service.d')
70-
71-
is_expected.to create_cookbook_file('udev-override.conf')
72-
.with(source: 'ec2_udev_rules/udev-override.conf')
73-
.with(path: '/etc/systemd/system/systemd-udevd.service.d/override.conf')
74-
.with(user: 'root')
75-
.with(group: 'root')
76-
.with(mode: '0644')
77-
78-
expect(chef_run.cookbook_file('udev-override.conf')).to notify('execute[udev-daemon-reload]').to(:run).immediately
79-
end
80-
end
81-
82-
it 'enables and starts ec2blk service' do
83-
is_expected.to enable_service('ec2blkdev')
84-
.with(supports: { restart: true })
85-
is_expected.to start_service('ec2blkdev')
86-
.with(supports: { restart: true })
87-
end
8856
end
8957
end
9058
end
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
KERNEL=="xvd*", ENV{DEVTYPE}=="disk", PROGRAM="<%= @cookbook_virtualenv_path %>/bin/python /sbin/ec2_dev_2_volid.py %k", SYMLINK+="disk/by-ebs-volumeid/%c"
2-
KERNEL=="xvd*", ENV{DEVTYPE}=="partition", PROGRAM="<%= @cookbook_virtualenv_path %>/bin/python /sbin/ec2_dev_2_volid.py %k", SYMLINK+="disk/by-ebs-volumeid/%c-p%n"
1+
KERNEL=="xvd*", KERNEL!="xvda*", ENV{DEVTYPE}=="disk", PROGRAM="<%= @cookbook_virtualenv_path %>/bin/python /sbin/ec2_dev_2_volid.py %k", SYMLINK+="disk/by-ebs-volumeid/%c"
2+
KERNEL=="xvd*", KERNEL!="xvda*", ENV{DEVTYPE}=="partition", PROGRAM="<%= @cookbook_virtualenv_path %>/bin/python /sbin/ec2_dev_2_volid.py %k", SYMLINK+="disk/by-ebs-volumeid/%c-p%n"
33
KERNEL=="nvme*", ENV{DEVTYPE}=="disk", PROGRAM="<%= @cookbook_virtualenv_path %>/bin/python /sbin/ec2_dev_2_volid.py %k", SYMLINK+="disk/by-ebs-volumeid/%c"
44
KERNEL=="nvme*", ENV{DEVTYPE}=="partition", PROGRAM="<%= @cookbook_virtualenv_path %>/bin/python /sbin/ec2_dev_2_volid.py %k", SYMLINK+="disk/by-ebs-volumeid/%c-p%n"

0 commit comments

Comments
 (0)