petuum · sangkeun00 · Aug 31, 2020 · Sep 9, 2020 · Sep 9, 2020 · Sep 9, 2020
diff --git a/autodist/resource_spec.py b/autodist/resource_spec.py
@@ -100,6 +100,13 @@ def cpu_devices(self):
             self.__cpu_devices = {k: v for k, v in self.__devices.items() if v.device_type is DeviceType.CPU}
         return self.__cpu_devices.items()
 
+    @property
+    def cpu_only_devices(self):
+        """String-to-device_spec mapping of all cpu ONLY devices."""
+        gpu_addresses = set([k.split(':')[0] for k, _ in self.gpu_devices])
+        cpu_only_devices = {k: v for k, v in self.cpu_devices if k.split(':')[0] not in gpu_addresses}
+        return cpu_only_devices.items()
+
     @property
     def num_cpus(self):
         """Number of all cpu devices."""
@@ -124,7 +131,7 @@ def node_gpu_devices(self):
 
     @property
     def node_cpu_devices(self):
-        """Node_address-to-device_string mapping of all cpu devices."""        
+        """Node_address-to-device_string mapping of all cpu devices."""
         _cpu_devices = dict()
         for device in self.cpu_devices:
             _cpu_devices.setdefault(device[0].split(':')[0], []).append(device[0])

diff --git a/autodist/strategy/__init__.py b/autodist/strategy/__init__.py
@@ -25,3 +25,4 @@
 from .partitioned_all_reduce_strategy import PartitionedAR
 from .random_axis_partition_all_reduce_strategy import RandomAxisPartitionAR
 from .uneven_partition_ps_strategy import UnevenPartitionedPS
+from .byte_ps_strategy import BytePS
diff --git a/autodist/strategy/byte_ps_strategy.py b/autodist/strategy/byte_ps_strategy.py
@@ -0,0 +1,39 @@
+"""BytePS StrategyBuilder."""
+from tensorflow.python.framework import ops
+
+from autodist.strategy.base import Strategy
+from autodist.strategy.ps_lb_strategy import PSLoadBalancing
+from autodist.strategy.all_reduce_strategy import AllReduce
+from autodist.kernel.common.utils import get_op_name
+
+
+class BytePS(PSLoadBalancing):
+    """
+    Generates the BytePS Strategy from https://github.com/bytedance/byteps.
+
+    The BytePS strategy exploits CPU-only nodes for communication while GPU nodes
+    for computatoin.
+    """
+
+    def __init__(self, local_proxy_variable=False, sync=True, staleness=0):
+        PSLoadBalancing.__init__(self, local_proxy_variable, sync, staleness)
+
+    # pylint: disable=attribute-defined-outside-init
+    def build(self, graph_item, resource_spec):
+        """Generate the strategy."""
+        expr = Strategy()
+
+        # get each variable, generate variable synchronizer config
+        expr.graph_config.replicas.extend([k for k, v in resource_spec.gpu_devices])
+
+        # find all variables
+        variables = graph_item.get_trainable_variables()
+        reduction_device_names = [k for k, _ in resource_spec.cpu_only_devices]
+        self.loads = {ps: 0.0 for ps in reduction_device_names}
+
+        # Mark each variable to be synchronized with a Parameter Server
+        node_config = [self._gen_ps_node_config(var, self._local_proxy_variable, self._sync, self._staleness)
+                       for var in variables]
+        expr.node_config.extend(node_config)
+
+        return expr