Skip to content

Commit 8641895

Browse files
committed
Merge branch 'pub/q1_dev' of https://github.com/priti-parate/omnia into pub/q1_dev
2 parents 3adb3ff + 6265308 commit 8641895

File tree

32 files changed

+798
-331
lines changed

32 files changed

+798
-331
lines changed

common/library/module_utils/input_validation/common_utils/slurm_conf_utils.py

Lines changed: 88 additions & 84 deletions
Large diffs are not rendered by default.

common/library/modules/slurm_conf.py

Lines changed: 87 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -19,28 +19,28 @@
1919
version_added: "1.0.0"
2020
description:
2121
- This module provides utilities for working with Slurm configuration files.
22-
- It can parse a Slurm conf file into a dictionary (f2d).
23-
- It can convert a dictionary back to Slurm conf INI format (d2f).
22+
- It can parse a Slurm conf file into a dictionary (parse).
23+
- It can convert a dictionary back to Slurm conf INI format (render).
2424
- It can merge multiple configuration sources (files and/or dicts) into one (merge).
2525
options:
2626
op:
2727
description:
2828
- The operation to perform.
29-
- C(f2d) - File to dict. Parse a Slurm conf file and return as dictionary.
30-
- C(d2f) - Dict to file. Convert a dictionary to Slurm conf INI lines.
29+
- C(parse) - File to dict. Parse a Slurm conf file and return as dictionary.
30+
- C(render) - Dict to file. Convert a dictionary to Slurm conf INI lines.
3131
- C(merge) - Merge multiple configuration sources into one.
3232
required: true
3333
type: str
34-
choices: ['f2d', 'd2f', 'merge']
34+
choices: ['parse', 'render', 'merge']
3535
path:
3636
description:
3737
- Path to the Slurm configuration file.
38-
- Required when I(op=f2d).
38+
- Required when I(op=parse).
3939
type: str
4040
conf_map:
4141
description:
4242
- Dictionary of configuration key-value pairs.
43-
- Required when I(op=d2f).
43+
- Required when I(op=render).
4444
type: dict
4545
default: {}
4646
conf_sources:
@@ -58,33 +58,28 @@
5858
- Used for validation of configuration keys.
5959
type: str
6060
default: slurm
61-
choices: ['slurm', 'cgroup', 'gres', 'mpi', 'slurmdbd']
6261
author:
63-
- Jagadeesh N V (jagadeesh.n.v@dell.com)
64-
notes:
65-
- Requires Python 3.7+ for ordered dict behavior.
66-
- Array-type parameters (NodeName, PartitionName, SlurmctldHost, etc.) are handled specially.
62+
- Jagadeesh N V (@jagadeeshnv)
6763
'''
6864

6965
EXAMPLES = r'''
7066
# Parse a slurm.conf file into a dictionary
7167
- name: Read slurm.conf
7268
slurm_conf:
73-
op: f2d
69+
op: parse
7470
path: /etc/slurm/slurm.conf
7571
conf_name: slurm
7672
register: slurm_config
7773
7874
# Convert a dictionary to slurm.conf INI lines
7975
- name: Generate slurm.conf lines
8076
slurm_conf:
81-
op: d2f
77+
op: render
8278
conf_map:
8379
ClusterName: mycluster
8480
SlurmctldPort: 6817
8581
SlurmctldHost:
86-
- SlurmctldHost: controller1
87-
- SlurmctldHost: controller2
82+
- controller2
8883
NodeName:
8984
- NodeName: node[1-10]
9085
CPUs: 16
@@ -118,125 +113,127 @@
118113
'''
119114

120115
RETURN = r'''
121-
slurm_dict:
122-
description: Parsed configuration as a dictionary (when op=f2d).
123-
type: dict
124-
returned: when op=f2d
125-
sample: {"ClusterName": "mycluster", "SlurmctldPort": "6817"}
126-
slurm_conf:
127-
description: Configuration as INI-format lines (when op=d2f).
128-
type: list
129-
returned: when op=d2f
130-
sample: ["ClusterName=mycluster", "SlurmctldPort=6817"]
131116
conf_dict:
132-
description: Merged configuration as a dictionary (when op=merge).
117+
description: Merged configuration as a dictionary (when op=merge or op=parse).
133118
type: dict
134-
returned: when op=merge
119+
returned: when op=merge or op=parse
135120
sample: {"ClusterName": "mycluster", "SlurmctldTimeout": 120}
136121
ini_lines:
137-
description: Merged configuration as INI-format lines (when op=merge).
122+
description: Merged configuration as INI-format lines (when op=merge or op=render).
138123
type: list
139-
returned: when op=merge
124+
returned: when op=merge or op=render
140125
sample: ["ClusterName=mycluster", "SlurmctldTimeout=120"]
141126
'''
142127

128+
# TODO:
129+
# - Module is not case sensitive for conf keys
130+
# - Support for validation of S_P_<data> types
131+
# - Validation for choices for each type
132+
# - Choices types for each type
133+
# - Merge of sub options
134+
# - Hostlist expressions, split and merge computations
135+
136+
143137
from collections import OrderedDict
144138
from ansible.module_utils.basic import AnsibleModule
145139
from ansible.module_utils.input_validation.common_utils.slurm_conf_utils import SlurmParserEnum, all_confs
146140
import os
147141

148-
# NOTE: depends on python3.7+ where dict order is maintained
149-
150142

151143
def read_dict2ini(conf_dict):
144+
"""Convert a configuration dictionary to INI-style lines for slurm.conf."""
152145
data = []
153146
for k, v in conf_dict.items():
154147
if isinstance(v, list):
155148
for dct_item in v:
156149
if isinstance(dct_item, dict):
157150
# TODO: Ordered dict, move the key to the top
158-
# od = OrderedDict([('a', 1), ('b', 2), ('c', 3)])
159-
# od.move_to_end('c', last=False) # Move 'c' to the beginning
151+
od = OrderedDict(dct_item)
152+
od.move_to_end(k, last=False) # Move k to the beginning
160153
data.append(
161-
" ".join(f"{key}={value}" for key, value in dct_item.items()))
154+
" ".join(f"{key}={value}" for key, value in od.items()))
162155
else:
163156
data.append(f"{k}={dct_item}")
164157
else:
165158
data.append(f"{k}={v}")
166159
return data
167160

168161

169-
def parse_slurm_conf(file_path, module):
162+
def parse_slurm_conf(file_path, conf_name, validate):
170163
"""Parses the slurm.conf file and returns it as a dictionary."""
171-
# slurm_dict = {"NodeName": [], "PartitionName": []}
172-
conf_name = module.params['conf_name']
173-
current_conf = all_confs.get(conf_name)
164+
current_conf = all_confs.get(conf_name, {})
174165
slurm_dict = OrderedDict()
175166

176167
if not os.path.exists(file_path):
177168
raise FileNotFoundError(f"{file_path} not found.")
178169

179-
with open(file_path, 'r') as f:
170+
with open(file_path, 'r', encoding='utf-8') as f:
180171
for line in f:
181172
# handles any comment after the data
182173
line = line.split('#')[0].strip()
183-
184-
# Skip comments and empty lines
185174
if not line:
186175
continue
187176
# Split the line by one or more spaces
188177
items = line.split()
189178
tmp_dict = OrderedDict()
190179
for item in items:
191-
# module.warn(f"Item: {item}")
192180
# Split only on the first '=' to allow '=' inside the value
193181
key, value = item.split('=', 1)
194182
tmp_dict[key.strip()] = value.strip()
195183
skey = list(tmp_dict.keys())[0]
196-
if skey not in current_conf:
197-
raise Exception(f"Invalid key while parsing {file_path}: {skey}")
198-
# if current_conf[skey] == SlurmParserEnum.S_P_ARRAY or len(tmp_dict) > 1:
199-
if current_conf[skey] == SlurmParserEnum.S_P_ARRAY:
200-
# TODO hostlist expressions and multiple DEFAULT entries handling
201-
# if len(tmp_dict) == 1:
202-
# first_key = list(tmp_dict.keys())[0]
203-
# first_value = list(tmp_dict.values())[0]
204-
# slurm_dict[first_key] = list(
205-
# slurm_dict.get(first_key, [])) + [first_value]
206-
# else:
184+
if validate and skey not in current_conf:
185+
raise ValueError(f"Invalid key while parsing {file_path}: {skey}")
186+
if current_conf.get(skey) == SlurmParserEnum.S_P_ARRAY:
207187
slurm_dict[list(tmp_dict.keys())[0]] = list(
208188
slurm_dict.get(list(tmp_dict.keys())[0], [])) + [tmp_dict]
189+
elif current_conf.get(skey) == SlurmParserEnum.S_P_CSV:
190+
existing_values = [v.strip() for v in slurm_dict.get(skey, "").split(',') if v.strip()]
191+
new_values = [v.strip() for v in tmp_dict[skey].split(',') if v.strip()]
192+
slurm_dict[skey] = ",".join(list(dict.fromkeys(existing_values + new_values)))
193+
elif current_conf.get(skey) == SlurmParserEnum.S_P_LIST:
194+
slurm_dict[skey] = list(slurm_dict.get(skey, [])) + list(tmp_dict.values())
209195
else:
210-
# TODO handle csv values, currently no definite data type for csv values
211196
slurm_dict.update(tmp_dict)
212197

213198
return slurm_dict
214199

215200

216-
def slurm_conf_dict_merge(conf_dict_list, module):
201+
def slurm_conf_dict_merge(conf_dict_list, conf_name):
202+
"""Merge multiple Slurm configuration dictionaries into a single dictionary."""
217203
merged_dict = OrderedDict()
204+
current_conf = all_confs.get(conf_name, {})
218205
for conf_dict in conf_dict_list:
219206
for ky, vl in conf_dict.items():
220-
if isinstance(vl, list):
207+
if current_conf.get(ky) == SlurmParserEnum.S_P_ARRAY:
221208
for item in vl:
222209
if isinstance(item, dict):
223-
# module.warn(f"DICT Key: {ky}, Value: {vl}")
224210
existing_dict = merged_dict.get(ky, {})
225211
inner_dict = existing_dict.get(item.get(ky), {})
226-
inner_dict.update(item)
227-
# TODO Partition node combiner logic
212+
# Get the sub-options for this array type (e.g., nodename_options, partition_options)
213+
sub_options = all_confs.get(ky, {})
214+
# Merge item into inner_dict, handling CSV fields specially
215+
for k, v in item.items():
216+
if sub_options.get(k) == SlurmParserEnum.S_P_CSV and k in inner_dict:
217+
# Merge CSV values
218+
existing_values = [val.strip() for val in inner_dict[k].split(',') if val.strip()]
219+
new_values = [val.strip() for val in v.split(',') if val.strip()]
220+
inner_dict[k] = ",".join(list(dict.fromkeys(existing_values + new_values)))
221+
else:
222+
# Regular update for non-CSV fields
223+
inner_dict[k] = v
228224
existing_dict[item.get(ky)] = inner_dict
229225
merged_dict[ky] = existing_dict
230-
else:
231-
# module.warn(f"LIST Key: {ky}, Value: {vl}")
232-
existing_list = merged_dict.get(ky, [])
233-
# module.warn(f"Existing list: {existing_list}")
234-
# module.warn(f"Item: {item}")
235-
if item not in existing_list:
236-
# existing_list.append(item)
237-
existing_list.update(item)
238-
# module.warn(f"Updated list: {existing_list}")
239-
merged_dict[ky] = existing_list
226+
elif current_conf.get(ky) == SlurmParserEnum.S_P_LIST:
227+
existing_list = merged_dict.get(ky, [])
228+
if isinstance(vl, list):
229+
new_items = vl
230+
else:
231+
new_items = [vl]
232+
merged_dict[ky] = list(dict.fromkeys(existing_list + new_items))
233+
elif current_conf.get(ky) == SlurmParserEnum.S_P_CSV:
234+
existing_values = [v.strip() for v in merged_dict.get(ky, "").split(',') if v.strip()]
235+
new_values = [v.strip() for v in vl.split(',') if v.strip()]
236+
merged_dict[ky] = ",".join(list(dict.fromkeys(existing_values + new_values)))
240237
else:
241238
merged_dict[ky] = vl
242239
# flatten the dict
@@ -248,50 +245,51 @@ def slurm_conf_dict_merge(conf_dict_list, module):
248245

249246

250247
def run_module():
248+
"""Entry point for the Ansible module handling slurm.conf operations."""
251249
module_args = {
252250
"path": {'type': 'str'},
253-
"op": {'type': 'str', 'required': True, 'choices': ['f2d', 'd2f', 'merge']},
251+
"op": {'type': 'str', 'required': True, 'choices': ['parse', 'render', 'merge']},
254252
"conf_map": {'type': 'dict', 'default': {}},
255253
"conf_sources": {'type': 'list', 'elements': 'raw', 'default': []},
256-
"conf_name": {'type': 'str', 'default': 'slurm'}
254+
"conf_name": {'type': 'str', 'default': 'slurm'},
255+
"validate": {'type': 'bool', 'default': False}
257256
}
258257

259-
result = {"changed": False, "slurm_dict": {}, "failed": False}
258+
result = {"changed": False, "failed": False}
260259

261260
# Create the AnsibleModule object
262261
module = AnsibleModule(argument_spec=module_args,
263262
required_if=[
264-
('op', 'd2f', ('conf_map',)),
263+
('op', 'render', ('conf_map',)),
265264
('op', 'merge', ('conf_sources',))
266265
],
267266
supports_check_mode=True)
268267
try:
268+
conf_name = module.params['conf_name']
269+
validate = module.params['validate']
269270
# Parse the slurm.conf file
270-
if module.params['op'] == 'f2d':
271-
s_dict = parse_slurm_conf(module.params['path'], module)
272-
result['slurm_dict'] = s_dict
273-
elif module.params['op'] == 'd2f':
271+
if module.params['op'] == 'parse':
272+
s_dict = parse_slurm_conf(module.params['path'], conf_name, validate)
273+
result['conf_dict'] = s_dict
274+
elif module.params['op'] == 'render':
274275
s_list = read_dict2ini(module.params['conf_map'])
275-
result['slurm_conf'] = s_list
276+
result['ini_lines'] = s_list
276277
elif module.params['op'] == 'merge':
277278
conf_dict_list = []
278279
for conf_source in module.params['conf_sources']:
279280
if isinstance(conf_source, dict):
280-
conf_dict_list.append(conf_source)
281+
conf_dict_list.append(OrderedDict(conf_source))
281282
elif isinstance(conf_source, str):
282283
if not os.path.exists(conf_source):
283-
raise Exception(f"File {conf_source} does not exist")
284-
s_dict = parse_slurm_conf(conf_source, module)
285-
# module.warn(f"Conf dict: {s_dict}")
286-
conf_dict_list.append(s_dict)
287-
# module.warn("After append")
284+
raise FileNotFoundError(f"File {conf_source} does not exist")
285+
s_dict = parse_slurm_conf(conf_source, conf_name, validate)
286+
conf_dict_list.append(OrderedDict(s_dict))
288287
else:
289-
raise Exception(f"Invalid type for conf_source: {type(conf_source)}")
290-
# module.exit_json(changed=False, conf_dict=conf_dict_list)
291-
merged_dict = slurm_conf_dict_merge(conf_dict_list, module)
288+
raise TypeError(f"Invalid type for conf_source: {type(conf_source)}")
289+
merged_dict = slurm_conf_dict_merge(conf_dict_list, conf_name)
292290
result['conf_dict'] = merged_dict
293291
result['ini_lines'] = read_dict2ini(merged_dict)
294-
except Exception as e:
292+
except (FileNotFoundError, ValueError, TypeError, AttributeError) as e:
295293
result['failed'] = True
296294
result['msg'] = str(e)
297295
module.fail_json(msg=str(e))

discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_node_x86_64.yaml.j2

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,8 @@
126126
- mkdir -p /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm /etc/slurm/epilog.d /etc/munge /cert /var/log/track /var/lib/packages /hpc_tools/container_images /hpc_tools/scripts
127127
- echo "{{ cloud_init_nfs_path }}/cert /cert nfs defaults,_netdev 0 0" >> /etc/fstab
128128
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab
129-
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab
129+
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmd /var/spool/slurmd nfs defaults,_netdev 0 0" >> /etc/fstab
130130
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/slurm/epilog.d /etc/slurm/epilog.d nfs defaults,_netdev 0 0" >> /etc/fstab
131-
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab
132131
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/munge /etc/munge nfs defaults,_netdev 0 0" >> /etc/fstab
133132
- echo "{{ trackfile_nfs_path }} /var/log/track nfs defaults,_netdev 0 0" >> /etc/fstab
134133
- echo "{{ cloud_init_nfs_path}}/hpc_tools/container_images /hpc_tools/container_images nfs defaults,_netdev 0 0" >> /etc/fstab
@@ -153,6 +152,7 @@
153152
- chmod {{ file_mode_755 }} /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm
154153
- chmod {{ file_mode_400 }} /etc/munge/munge.key
155154
- chmod {{ file_mode_755 }} /etc/slurm/epilog.d/
155+
- chmod {{ file_mode_755 }} /etc/slurm/epilog.d/logout_user.sh
156156
- mkdir -p /var/spool/slurmd
157157
- chmod {{ file_mode_755 }} /var/spool/slurmd
158158
- chown -R {{ slurm_user }}:{{ slurm_user }} /var/spool/slurmd

discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_control_node_x86_64.yaml.j2

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -307,10 +307,10 @@
307307
content: |
308308
SELECT VERSION();
309309
SHOW DATABASES;
310-
311-
CREATE DATABASE slurm_acct_db;
312-
CREATE USER 'slurm'@'%' IDENTIFIED BY '{{ hostvars['localhost']['slurm_db_password'] }}';
313-
GRANT ALL PRIVILEGES ON slurm_acct_db.* TO 'slurm'@'%';
310+
CREATE DATABASE IF NOT EXISTS {{ apply_config['slurmdbd']['StorageLoc'] }};
311+
CREATE USER IF NOT EXISTS '{{ apply_config['slurmdbd']['SlurmUser'] }}'@'%' IDENTIFIED BY '{{ hostvars['localhost']['slurm_db_password'] }}';
312+
ALTER USER '{{ apply_config['slurmdbd']['SlurmUser'] }}'@'%' IDENTIFIED BY '{{ hostvars['localhost']['slurm_db_password'] }}';
313+
GRANT ALL PRIVILEGES ON {{ apply_config['slurmdbd']['StorageLoc'] }}.* TO '{{ apply_config['slurmdbd']['SlurmUser'] }}'@'%';
314314
FLUSH PRIVILEGES;
315315

316316
- path: /root/omnia_slurm_scripts/00_munge_setup.sh
@@ -405,7 +405,7 @@
405405
echo "${value:-$default}"
406406
}
407407
#dir StateSaveLocation
408-
StateSaveLocation=$(get_value_slurm_conf "StateSaveLocation" "/var/spool")
408+
StateSaveLocation=$(get_value_slurm_conf "StateSaveLocation" "/var/spool/slurmctld")
409409
mkdir -pv $StateSaveLocation
410410
chown -v "$SLURM_USER:$SLURM_GROUP" $StateSaveLocation
411411
chmod -v 0744 $StateSaveLocation
@@ -477,7 +477,7 @@
477477
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab
478478
{% if powervault_config is not defined %}
479479
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/lib/mysql /var/lib/mysql nfs defaults,_netdev 0 0" >> /etc/fstab
480-
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab
480+
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmctld /var/spool/slurmctld nfs defaults,_netdev 0 0" >> /etc/fstab
481481
{% endif %}
482482
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/munge /etc/munge nfs defaults,_netdev 0 0" >> /etc/fstab
483483
- echo "{{ trackfile_nfs_path }} /var/log/track nfs defaults,_netdev 0 0" >> /etc/fstab

discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@
261261
echo "[INFO] Updating /etc/fstab with NFS entries for Pulp cert, Slurm and Munge paths"
262262
echo "{{ cloud_init_nfs_path }}/cert /cert nfs defaults,_netdev 0 0" >> /etc/fstab
263263
echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab
264-
echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab
264+
echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmd /var/spool/slurmd nfs defaults,_netdev 0 0" >> /etc/fstab
265265
echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/slurm/epilog.d /etc/slurm/epilog.d nfs defaults,_netdev 0 0" >> /etc/fstab
266266
echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/munge /etc/munge nfs defaults,_netdev 0 0" >> /etc/fstab
267267
echo "{{ trackfile_nfs_path }} /var/log/track nfs defaults,_netdev 0 0" >> /etc/fstab

0 commit comments

Comments
 (0)