@@ -1031,6 +1031,7 @@ def test_paasta_spark_run_bash(
10311031 job_id = None ,
10321032 use_web_identity = False ,
10331033 uses_bulkdata = True ,
1034+ force_pod_identity = False ,
10341035 )
10351036 mock_load_system_paasta_config_utils .return_value .get_kube_clusters .return_value = (
10361037 {}
@@ -1048,6 +1049,7 @@ def test_paasta_spark_run_bash(
10481049 mock_spark_conf_builder .return_value .get_spark_conf .return_value = {
10491050 "spark.kubernetes.executor.podTemplateFile" : "/test/pod-template.yaml" ,
10501051 }
1052+ mock_get_instance_config .return_value .get_iam_role .return_value = None
10511053 spark_run .paasta_spark_run (args )
10521054 mock_validate_work_dir .assert_called_once_with ("/tmp/local" )
10531055 assert args .cmd == "/bin/bash"
@@ -1088,6 +1090,7 @@ def test_paasta_spark_run_bash(
10881090 force_spark_resource_configs = False ,
10891091 use_eks = True ,
10901092 k8s_server_address = None ,
1093+ service_account_name = None ,
10911094 )
10921095 mock_spark_conf = mock_spark_conf_builder .return_value .get_spark_conf .return_value
10931096 mock_configure_and_run_docker_container .assert_called_once_with (
@@ -1158,6 +1161,7 @@ def test_paasta_spark_run(
11581161 job_id = None ,
11591162 use_web_identity = False ,
11601163 uses_bulkdata = True ,
1164+ force_pod_identity = False ,
11611165 )
11621166 mock_load_system_paasta_config_utils .return_value .get_kube_clusters .return_value = (
11631167 {}
@@ -1178,6 +1182,7 @@ def test_paasta_spark_run(
11781182 mock_spark_conf_builder .return_value .get_spark_conf .return_value = {
11791183 "spark.kubernetes.executor.podTemplateFile" : "/test/pod-template.yaml" ,
11801184 }
1185+ mock_get_instance_config .return_value .get_iam_role .return_value = None
11811186 spark_run .paasta_spark_run (args )
11821187 mock_validate_work_dir .assert_called_once_with ("/tmp/local" )
11831188 assert args .cmd == "USER=test timeout 1m spark-submit test.py"
@@ -1218,6 +1223,7 @@ def test_paasta_spark_run(
12181223 force_spark_resource_configs = False ,
12191224 use_eks = True ,
12201225 k8s_server_address = None ,
1226+ service_account_name = None ,
12211227 )
12221228 mock_configure_and_run_docker_container .assert_called_once_with (
12231229 args ,
@@ -1285,6 +1291,7 @@ def test_paasta_spark_run_pyspark(
12851291 job_id = None ,
12861292 use_web_identity = False ,
12871293 uses_bulkdata = True ,
1294+ force_pod_identity = False ,
12881295 )
12891296 mock_load_system_paasta_config_utils .return_value .get_kube_clusters .return_value = (
12901297 {}
@@ -1306,6 +1313,7 @@ def test_paasta_spark_run_pyspark(
13061313 mock_spark_conf_builder .return_value .get_spark_conf .return_value = {
13071314 "spark.kubernetes.executor.podTemplateFile" : "/test/pod-template.yaml" ,
13081315 }
1316+ mock_get_instance_config .return_value .get_iam_role .return_value = None
13091317
13101318 spark_run .paasta_spark_run (args )
13111319 mock_validate_work_dir .assert_called_once_with ("/tmp/local" )
@@ -1347,6 +1355,7 @@ def test_paasta_spark_run_pyspark(
13471355 force_spark_resource_configs = False ,
13481356 use_eks = True ,
13491357 k8s_server_address = None ,
1358+ service_account_name = None ,
13501359 )
13511360 mock_configure_and_run_docker_container .assert_called_once_with (
13521361 args ,
@@ -1428,6 +1437,7 @@ def test_paasta_spark_run_uses_bulkdata(
14281437 job_id = None ,
14291438 use_web_identity = False ,
14301439 uses_bulkdata = spark_run_arg_uses_bulkdata ,
1440+ force_pod_identity = False ,
14311441 )
14321442 mock_load_system_paasta_config_spark_run .return_value .get_pools_for_cluster .return_value = [
14331443 "test-pool"
@@ -1436,6 +1446,7 @@ def test_paasta_spark_run_uses_bulkdata(
14361446 mock_get_instance_config .return_value .config_dict = {
14371447 "uses_bulkdata" : instance_config_uses_bulkdata
14381448 }
1449+ mock_get_instance_config .return_value .get_iam_role .return_value = None
14391450
14401451 spark_run .paasta_spark_run (args )
14411452
@@ -1644,3 +1655,179 @@ def test_get_aws_credentials_default_profile(mock_boto3_session, mock_use_aws_pr
16441655 )
16451656
16461657 mock_use_aws_profile .assert_called_once_with ()
1658+
1659+
1660+ @mock .patch .object (spark_run , "get_all_iam_roles_for_service" , autospec = True )
1661+ @mock .patch .object (spark_run , "validate_work_dir" , autospec = True )
1662+ @mock .patch .object (utils , "load_system_paasta_config" , autospec = True )
1663+ @mock .patch .object (spark_run , "load_system_paasta_config" , autospec = True )
1664+ @mock .patch .object (spark_run , "get_instance_config" , autospec = True )
1665+ @mock .patch .object (spark_run , "get_aws_credentials" , autospec = True )
1666+ @mock .patch .object (spark_run , "get_docker_image" , autospec = True )
1667+ @mock .patch .object (spark_run , "get_spark_app_name" , autospec = True )
1668+ @mock .patch .object (spark_run , "auto_add_timeout_for_spark_job" , autospec = True )
1669+ @mock .patch .object (spark_run , "_parse_user_spark_args" , autospec = True )
1670+ @mock .patch (
1671+ "paasta_tools.cli.cmds.spark_run.spark_config.SparkConfBuilder" , autospec = True
1672+ )
1673+ @mock .patch .object (spark_run , "configure_and_run_docker_container" , autospec = True )
1674+ @mock .patch .object (spark_run , "get_smart_paasta_instance_name" , autospec = True )
1675+ @pytest .mark .parametrize (
1676+ "iam_role_set,force_pod_identity,aws_creds_provided,custom_yelpsoa,should_exit_early" ,
1677+ [
1678+ # Service has iam_role set, no overrides
1679+ ("arn:aws:iam::123456789:role/test" , False , None , None , 0 ),
1680+ # Service has iam_role set, you can override aws_creds_yamls
1681+ ("arn:aws:iam::123456789:role/test" , False , True , None , 0 ),
1682+ # Force-pod-identity set, should fail as it's not in the (empty) allowed set
1683+ (None , "arn:aws:iam::123456789:role/test" , None , None , 1 ),
1684+ # Force-pod-identity set, succeeds because test adds it to allowed set
1685+ (None , "arn:aws:iam::123456789:role/test" , None , None , 0 ),
1686+ # Force-pod-identity set, different from set iam_role, fails as it's not in allowed set
1687+ (
1688+ "arn:aws:iam::123456789:role/test" ,
1689+ "arn:aws:iam::123456789:role/test2" ,
1690+ None ,
1691+ None ,
1692+ 1 ,
1693+ ),
1694+ # Force-pod-identity set, but fails because custom soa dir provided
1695+ (None , "arn:aws:iam::123456789:role/test" , None , "/custom/soa" , 1 ),
1696+ ],
1697+ )
1698+ def test_paasta_spark_run_with_pod_identity (
1699+ mock_get_smart_paasta_instance_name ,
1700+ mock_configure_and_run_docker_container ,
1701+ mock_spark_conf_builder ,
1702+ mock_parse_user_spark_args ,
1703+ mock_auto_add_timeout_for_spark_job ,
1704+ mock_get_spark_app_name ,
1705+ mock_get_docker_image ,
1706+ mock_get_aws_credentials ,
1707+ mock_get_instance_config ,
1708+ mock_load_system_paasta_config_spark_run ,
1709+ mock_load_system_paasta_config_utils ,
1710+ mock_validate_work_dir ,
1711+ mock_get_all_iam_roles_for_service ,
1712+ iam_role_set ,
1713+ force_pod_identity ,
1714+ aws_creds_provided ,
1715+ custom_yelpsoa ,
1716+ should_exit_early ,
1717+ ):
1718+ args = argparse .Namespace (
1719+ work_dir = "/tmp/local" ,
1720+ cmd = "USER=test spark-submit test.py" ,
1721+ build = True ,
1722+ image = None ,
1723+ enable_compact_bin_packing = False ,
1724+ disable_compact_bin_packing = False ,
1725+ service = "test-service" ,
1726+ instance = "test-instance" ,
1727+ cluster = "test-cluster" ,
1728+ pool = "test-pool" ,
1729+ yelpsoa_config_root = custom_yelpsoa or spark_run .DEFAULT_SOA_DIR ,
1730+ aws_credentials_yaml = "/path/to/creds" if aws_creds_provided else None ,
1731+ aws_profile = None ,
1732+ spark_args = "spark.cores.max=100 spark.executor.cores=10" ,
1733+ cluster_manager = spark_run .CLUSTER_MANAGER_K8S ,
1734+ timeout_job_runtime = "1m" ,
1735+ enable_dra = True ,
1736+ aws_region = "test-region" ,
1737+ force_spark_resource_configs = False ,
1738+ assume_aws_role = None ,
1739+ aws_role_duration = 3600 ,
1740+ k8s_server_address = None ,
1741+ tronfig = None ,
1742+ job_id = None ,
1743+ use_web_identity = False ,
1744+ uses_bulkdata = True ,
1745+ force_pod_identity = force_pod_identity ,
1746+ )
1747+ # Use the expected return code to set the allowed iam roles
1748+ if force_pod_identity and not should_exit_early :
1749+ mock_get_all_iam_roles_for_service .return_value = (force_pod_identity ,)
1750+ else :
1751+ mock_get_all_iam_roles_for_service .return_value = (
1752+ "arn:aws:iam::123456789:role/unreferenced-role" ,
1753+ )
1754+ mock_load_system_paasta_config_utils .return_value .get_kube_clusters .return_value = (
1755+ {}
1756+ )
1757+ mock_load_system_paasta_config_spark_run .return_value .get_cluster_aliases .return_value = (
1758+ {}
1759+ )
1760+ mock_load_system_paasta_config_spark_run .return_value .get_pools_for_cluster .return_value = [
1761+ "test-pool"
1762+ ]
1763+ mock_load_system_paasta_config_spark_run .return_value .get_eks_cluster_aliases .return_value = {
1764+ "test-cluster" : "test-cluster"
1765+ }
1766+ mock_get_docker_image .return_value = DUMMY_DOCKER_IMAGE_DIGEST
1767+ mock_auto_add_timeout_for_spark_job .return_value = (
1768+ "USER=test timeout 1m spark-submit test.py"
1769+ )
1770+ mock_spark_conf_builder .return_value .get_spark_conf .return_value = {
1771+ "spark.kubernetes.executor.podTemplateFile" : "/test/pod-template.yaml" ,
1772+ }
1773+ mock_get_instance_config .return_value .get_iam_role .return_value = (
1774+ "arn:aws:iam::123456789:role/test"
1775+ )
1776+ ret = spark_run .paasta_spark_run (args )
1777+ if should_exit_early :
1778+ assert ret != 0
1779+ return
1780+ mock_validate_work_dir .assert_called_once_with ("/tmp/local" )
1781+ assert args .cmd == "USER=test timeout 1m spark-submit test.py"
1782+ mock_get_instance_config .assert_called_once_with (
1783+ service = "test-service" ,
1784+ instance = "test-instance" ,
1785+ cluster = "test-cluster" ,
1786+ load_deployments = False ,
1787+ soa_dir = spark_run .DEFAULT_SOA_DIR ,
1788+ )
1789+ mock_get_aws_credentials .assert_called_once_with (
1790+ service = "test-service" ,
1791+ aws_credentials_yaml = "/path/to/creds"
1792+ if aws_creds_provided
1793+ else "/etc/boto_cfg/mrjob.yaml" ,
1794+ profile_name = None ,
1795+ assume_aws_role_arn = None ,
1796+ session_duration = 3600 ,
1797+ use_web_identity = False ,
1798+ )
1799+ mock_get_docker_image .assert_called_once_with (
1800+ args , mock_get_instance_config .return_value
1801+ )
1802+ mock_get_spark_app_name .assert_called_once_with ("USER=test spark-submit test.py" )
1803+ mock_parse_user_spark_args .assert_called_once_with (
1804+ "spark.cores.max=100 spark.executor.cores=10"
1805+ )
1806+ mock_spark_conf_builder .return_value .get_spark_conf .assert_called_once_with (
1807+ cluster_manager = spark_run .CLUSTER_MANAGER_K8S ,
1808+ spark_app_base_name = mock_get_spark_app_name .return_value ,
1809+ docker_img = DUMMY_DOCKER_IMAGE_DIGEST ,
1810+ user_spark_opts = mock_parse_user_spark_args .return_value ,
1811+ paasta_cluster = "test-cluster" ,
1812+ paasta_pool = "test-pool" ,
1813+ paasta_service = "test-service" ,
1814+ paasta_instance = mock_get_smart_paasta_instance_name .return_value ,
1815+ extra_volumes = mock_get_instance_config .return_value .get_volumes .return_value ,
1816+ aws_creds = None ,
1817+ aws_region = "test-region" ,
1818+ force_spark_resource_configs = False ,
1819+ use_eks = True ,
1820+ k8s_server_address = None ,
1821+ service_account_name = "paasta--arn-aws-iam-123456789-role-test" ,
1822+ )
1823+ mock_configure_and_run_docker_container .assert_called_once_with (
1824+ args ,
1825+ docker_img = DUMMY_DOCKER_IMAGE_DIGEST ,
1826+ instance_config = mock_get_instance_config .return_value ,
1827+ system_paasta_config = mock_load_system_paasta_config_spark_run .return_value ,
1828+ spark_conf = mock_spark_conf_builder .return_value .get_spark_conf .return_value ,
1829+ aws_creds = mock_get_aws_credentials .return_value ,
1830+ cluster_manager = spark_run .CLUSTER_MANAGER_K8S ,
1831+ pod_template_path = "/test/pod-template.yaml" ,
1832+ extra_driver_envs = dict (),
1833+ )
0 commit comments