Skip to content

Commit 4bbaaa0

Browse files
committed
Added test for spark-run with pod identity
1 parent 47fbb39 commit 4bbaaa0

File tree

2 files changed

+188
-1
lines changed

2 files changed

+188
-1
lines changed

paasta_tools/cli/cmds/spark_run.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,7 @@ def add_subparser(subparsers):
431431
" However, it must already be set for a different instance of "
432432
"the service."
433433
),
434+
default=None,
434435
)
435436

436437
jupyter_group = list_parser.add_argument_group(
@@ -1237,7 +1238,6 @@ def paasta_spark_run(args: argparse.Namespace) -> int:
12371238
if iam_role or args.force_pod_identity:
12381239
if args.force_pod_identity:
12391240
if args.yelpsoa_config_root != DEFAULT_SOA_DIR:
1240-
# TODO
12411241
print(
12421242
"--force-pod-identity cannot be used with --yelpsoa-config-root",
12431243
file=sys.stderr,

tests/cli/test_cmds_spark_run.py

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1031,6 +1031,7 @@ def test_paasta_spark_run_bash(
10311031
job_id=None,
10321032
use_web_identity=False,
10331033
uses_bulkdata=True,
1034+
force_pod_identity=False,
10341035
)
10351036
mock_load_system_paasta_config_utils.return_value.get_kube_clusters.return_value = (
10361037
{}
@@ -1048,6 +1049,7 @@ def test_paasta_spark_run_bash(
10481049
mock_spark_conf_builder.return_value.get_spark_conf.return_value = {
10491050
"spark.kubernetes.executor.podTemplateFile": "/test/pod-template.yaml",
10501051
}
1052+
mock_get_instance_config.return_value.get_iam_role.return_value = None
10511053
spark_run.paasta_spark_run(args)
10521054
mock_validate_work_dir.assert_called_once_with("/tmp/local")
10531055
assert args.cmd == "/bin/bash"
@@ -1088,6 +1090,7 @@ def test_paasta_spark_run_bash(
10881090
force_spark_resource_configs=False,
10891091
use_eks=True,
10901092
k8s_server_address=None,
1093+
service_account_name=None,
10911094
)
10921095
mock_spark_conf = mock_spark_conf_builder.return_value.get_spark_conf.return_value
10931096
mock_configure_and_run_docker_container.assert_called_once_with(
@@ -1158,6 +1161,7 @@ def test_paasta_spark_run(
11581161
job_id=None,
11591162
use_web_identity=False,
11601163
uses_bulkdata=True,
1164+
force_pod_identity=False,
11611165
)
11621166
mock_load_system_paasta_config_utils.return_value.get_kube_clusters.return_value = (
11631167
{}
@@ -1178,6 +1182,7 @@ def test_paasta_spark_run(
11781182
mock_spark_conf_builder.return_value.get_spark_conf.return_value = {
11791183
"spark.kubernetes.executor.podTemplateFile": "/test/pod-template.yaml",
11801184
}
1185+
mock_get_instance_config.return_value.get_iam_role.return_value = None
11811186
spark_run.paasta_spark_run(args)
11821187
mock_validate_work_dir.assert_called_once_with("/tmp/local")
11831188
assert args.cmd == "USER=test timeout 1m spark-submit test.py"
@@ -1218,6 +1223,7 @@ def test_paasta_spark_run(
12181223
force_spark_resource_configs=False,
12191224
use_eks=True,
12201225
k8s_server_address=None,
1226+
service_account_name=None,
12211227
)
12221228
mock_configure_and_run_docker_container.assert_called_once_with(
12231229
args,
@@ -1285,6 +1291,7 @@ def test_paasta_spark_run_pyspark(
12851291
job_id=None,
12861292
use_web_identity=False,
12871293
uses_bulkdata=True,
1294+
force_pod_identity=False,
12881295
)
12891296
mock_load_system_paasta_config_utils.return_value.get_kube_clusters.return_value = (
12901297
{}
@@ -1306,6 +1313,7 @@ def test_paasta_spark_run_pyspark(
13061313
mock_spark_conf_builder.return_value.get_spark_conf.return_value = {
13071314
"spark.kubernetes.executor.podTemplateFile": "/test/pod-template.yaml",
13081315
}
1316+
mock_get_instance_config.return_value.get_iam_role.return_value = None
13091317

13101318
spark_run.paasta_spark_run(args)
13111319
mock_validate_work_dir.assert_called_once_with("/tmp/local")
@@ -1347,6 +1355,7 @@ def test_paasta_spark_run_pyspark(
13471355
force_spark_resource_configs=False,
13481356
use_eks=True,
13491357
k8s_server_address=None,
1358+
service_account_name=None,
13501359
)
13511360
mock_configure_and_run_docker_container.assert_called_once_with(
13521361
args,
@@ -1428,6 +1437,7 @@ def test_paasta_spark_run_uses_bulkdata(
14281437
job_id=None,
14291438
use_web_identity=False,
14301439
uses_bulkdata=spark_run_arg_uses_bulkdata,
1440+
force_pod_identity=False,
14311441
)
14321442
mock_load_system_paasta_config_spark_run.return_value.get_pools_for_cluster.return_value = [
14331443
"test-pool"
@@ -1436,6 +1446,7 @@ def test_paasta_spark_run_uses_bulkdata(
14361446
mock_get_instance_config.return_value.config_dict = {
14371447
"uses_bulkdata": instance_config_uses_bulkdata
14381448
}
1449+
mock_get_instance_config.return_value.get_iam_role.return_value = None
14391450

14401451
spark_run.paasta_spark_run(args)
14411452

@@ -1644,3 +1655,179 @@ def test_get_aws_credentials_default_profile(mock_boto3_session, mock_use_aws_pr
16441655
)
16451656

16461657
mock_use_aws_profile.assert_called_once_with()
1658+
1659+
1660+
@mock.patch.object(spark_run, "get_all_iam_roles_for_service", autospec=True)
1661+
@mock.patch.object(spark_run, "validate_work_dir", autospec=True)
1662+
@mock.patch.object(utils, "load_system_paasta_config", autospec=True)
1663+
@mock.patch.object(spark_run, "load_system_paasta_config", autospec=True)
1664+
@mock.patch.object(spark_run, "get_instance_config", autospec=True)
1665+
@mock.patch.object(spark_run, "get_aws_credentials", autospec=True)
1666+
@mock.patch.object(spark_run, "get_docker_image", autospec=True)
1667+
@mock.patch.object(spark_run, "get_spark_app_name", autospec=True)
1668+
@mock.patch.object(spark_run, "auto_add_timeout_for_spark_job", autospec=True)
1669+
@mock.patch.object(spark_run, "_parse_user_spark_args", autospec=True)
1670+
@mock.patch(
1671+
"paasta_tools.cli.cmds.spark_run.spark_config.SparkConfBuilder", autospec=True
1672+
)
1673+
@mock.patch.object(spark_run, "configure_and_run_docker_container", autospec=True)
1674+
@mock.patch.object(spark_run, "get_smart_paasta_instance_name", autospec=True)
1675+
@pytest.mark.parametrize(
1676+
"iam_role_set,force_pod_identity,aws_creds_provided,custom_yelpsoa,should_exit_early",
1677+
[
1678+
# Service has iam_role set, no overrides
1679+
("arn:aws:iam::123456789:role/test", False, None, None, 0),
1680+
# Service has iam_role set, you can override aws_creds_yamls
1681+
("arn:aws:iam::123456789:role/test", False, True, None, 0),
1682+
# Force-pod-identity set, should fail as it's not in the (empty) allowed set
1683+
(None, "arn:aws:iam::123456789:role/test", None, None, 1),
1684+
# Force-pod-identity set, succeeds because test adds it to allowed set
1685+
(None, "arn:aws:iam::123456789:role/test", None, None, 0),
1686+
# Force-pod-identity set, different from set iam_role, fails as it's not in allowed set
1687+
(
1688+
"arn:aws:iam::123456789:role/test",
1689+
"arn:aws:iam::123456789:role/test2",
1690+
None,
1691+
None,
1692+
1,
1693+
),
1694+
# Force-pod-identity set, but fails because custom soa dir provided
1695+
(None, "arn:aws:iam::123456789:role/test", None, "/custom/soa", 1),
1696+
],
1697+
)
1698+
def test_paasta_spark_run_with_pod_identity(
1699+
mock_get_smart_paasta_instance_name,
1700+
mock_configure_and_run_docker_container,
1701+
mock_spark_conf_builder,
1702+
mock_parse_user_spark_args,
1703+
mock_auto_add_timeout_for_spark_job,
1704+
mock_get_spark_app_name,
1705+
mock_get_docker_image,
1706+
mock_get_aws_credentials,
1707+
mock_get_instance_config,
1708+
mock_load_system_paasta_config_spark_run,
1709+
mock_load_system_paasta_config_utils,
1710+
mock_validate_work_dir,
1711+
mock_get_all_iam_roles_for_service,
1712+
iam_role_set,
1713+
force_pod_identity,
1714+
aws_creds_provided,
1715+
custom_yelpsoa,
1716+
should_exit_early,
1717+
):
1718+
args = argparse.Namespace(
1719+
work_dir="/tmp/local",
1720+
cmd="USER=test spark-submit test.py",
1721+
build=True,
1722+
image=None,
1723+
enable_compact_bin_packing=False,
1724+
disable_compact_bin_packing=False,
1725+
service="test-service",
1726+
instance="test-instance",
1727+
cluster="test-cluster",
1728+
pool="test-pool",
1729+
yelpsoa_config_root=custom_yelpsoa or spark_run.DEFAULT_SOA_DIR,
1730+
aws_credentials_yaml="/path/to/creds" if aws_creds_provided else None,
1731+
aws_profile=None,
1732+
spark_args="spark.cores.max=100 spark.executor.cores=10",
1733+
cluster_manager=spark_run.CLUSTER_MANAGER_K8S,
1734+
timeout_job_runtime="1m",
1735+
enable_dra=True,
1736+
aws_region="test-region",
1737+
force_spark_resource_configs=False,
1738+
assume_aws_role=None,
1739+
aws_role_duration=3600,
1740+
k8s_server_address=None,
1741+
tronfig=None,
1742+
job_id=None,
1743+
use_web_identity=False,
1744+
uses_bulkdata=True,
1745+
force_pod_identity=force_pod_identity,
1746+
)
1747+
# Use the expected return code to set the allowed iam roles
1748+
if force_pod_identity and not should_exit_early:
1749+
mock_get_all_iam_roles_for_service.return_value = (force_pod_identity,)
1750+
else:
1751+
mock_get_all_iam_roles_for_service.return_value = (
1752+
"arn:aws:iam::123456789:role/unreferenced-role",
1753+
)
1754+
mock_load_system_paasta_config_utils.return_value.get_kube_clusters.return_value = (
1755+
{}
1756+
)
1757+
mock_load_system_paasta_config_spark_run.return_value.get_cluster_aliases.return_value = (
1758+
{}
1759+
)
1760+
mock_load_system_paasta_config_spark_run.return_value.get_pools_for_cluster.return_value = [
1761+
"test-pool"
1762+
]
1763+
mock_load_system_paasta_config_spark_run.return_value.get_eks_cluster_aliases.return_value = {
1764+
"test-cluster": "test-cluster"
1765+
}
1766+
mock_get_docker_image.return_value = DUMMY_DOCKER_IMAGE_DIGEST
1767+
mock_auto_add_timeout_for_spark_job.return_value = (
1768+
"USER=test timeout 1m spark-submit test.py"
1769+
)
1770+
mock_spark_conf_builder.return_value.get_spark_conf.return_value = {
1771+
"spark.kubernetes.executor.podTemplateFile": "/test/pod-template.yaml",
1772+
}
1773+
mock_get_instance_config.return_value.get_iam_role.return_value = (
1774+
"arn:aws:iam::123456789:role/test"
1775+
)
1776+
ret = spark_run.paasta_spark_run(args)
1777+
if should_exit_early:
1778+
assert ret != 0
1779+
return
1780+
mock_validate_work_dir.assert_called_once_with("/tmp/local")
1781+
assert args.cmd == "USER=test timeout 1m spark-submit test.py"
1782+
mock_get_instance_config.assert_called_once_with(
1783+
service="test-service",
1784+
instance="test-instance",
1785+
cluster="test-cluster",
1786+
load_deployments=False,
1787+
soa_dir=spark_run.DEFAULT_SOA_DIR,
1788+
)
1789+
mock_get_aws_credentials.assert_called_once_with(
1790+
service="test-service",
1791+
aws_credentials_yaml="/path/to/creds"
1792+
if aws_creds_provided
1793+
else "/etc/boto_cfg/mrjob.yaml",
1794+
profile_name=None,
1795+
assume_aws_role_arn=None,
1796+
session_duration=3600,
1797+
use_web_identity=False,
1798+
)
1799+
mock_get_docker_image.assert_called_once_with(
1800+
args, mock_get_instance_config.return_value
1801+
)
1802+
mock_get_spark_app_name.assert_called_once_with("USER=test spark-submit test.py")
1803+
mock_parse_user_spark_args.assert_called_once_with(
1804+
"spark.cores.max=100 spark.executor.cores=10"
1805+
)
1806+
mock_spark_conf_builder.return_value.get_spark_conf.assert_called_once_with(
1807+
cluster_manager=spark_run.CLUSTER_MANAGER_K8S,
1808+
spark_app_base_name=mock_get_spark_app_name.return_value,
1809+
docker_img=DUMMY_DOCKER_IMAGE_DIGEST,
1810+
user_spark_opts=mock_parse_user_spark_args.return_value,
1811+
paasta_cluster="test-cluster",
1812+
paasta_pool="test-pool",
1813+
paasta_service="test-service",
1814+
paasta_instance=mock_get_smart_paasta_instance_name.return_value,
1815+
extra_volumes=mock_get_instance_config.return_value.get_volumes.return_value,
1816+
aws_creds=None,
1817+
aws_region="test-region",
1818+
force_spark_resource_configs=False,
1819+
use_eks=True,
1820+
k8s_server_address=None,
1821+
service_account_name="paasta--arn-aws-iam-123456789-role-test",
1822+
)
1823+
mock_configure_and_run_docker_container.assert_called_once_with(
1824+
args,
1825+
docker_img=DUMMY_DOCKER_IMAGE_DIGEST,
1826+
instance_config=mock_get_instance_config.return_value,
1827+
system_paasta_config=mock_load_system_paasta_config_spark_run.return_value,
1828+
spark_conf=mock_spark_conf_builder.return_value.get_spark_conf.return_value,
1829+
aws_creds=mock_get_aws_credentials.return_value,
1830+
cluster_manager=spark_run.CLUSTER_MANAGER_K8S,
1831+
pod_template_path="/test/pod-template.yaml",
1832+
extra_driver_envs=dict(),
1833+
)

0 commit comments

Comments
 (0)