Replace thrust::tuple with cuda::std::tuple (#2928)

miscco · web-flow · commit d79c88fc7b3a · 2026-01-20T22:56:39.000Z
Its an alias anyway and we should use the standard type Authors: - Michael Schellenberger Costa (https://github.com/miscco) Approvers: - Dante Gama Dessavre (https://github.com/dantegd) URL: #2928
diff --git a/cpp/include/raft/linalg/detail/map.cuh b/cpp/include/raft/linalg/detail/map.cuh
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -16,7 +16,7 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
-#include <thrust/tuple.h>
+#include <cuda/std/tuple>
 
 namespace raft::linalg::detail {
 
@@ -41,13 +41,13 @@ __device__ __forceinline__ void map_kernel_mainloop(
   OutT* out_ptr, IdxT offset, IdxT len, Func f, const InTs*... in_ptrs, std::index_sequence<Is...>)
 {
   TxN_t<OutT, R> wide;
-  thrust::tuple<TxN_t<InTs, R>...> wide_args;
+  cuda::std::tuple<TxN_t<InTs, R>...> wide_args;
   if (offset + R <= len) {
-    (thrust::get<Is>(wide_args).load(in_ptrs, offset), ...);
+    (cuda::std::get<Is>(wide_args).load(in_ptrs, offset), ...);
 #pragma unroll
     for (int j = 0; j < R; ++j) {
       wide.val.data[j] = map_apply<PassOffset, OutT, IdxT, Func, InTs...>(
-        f, offset + j, thrust::get<Is>(wide_args).val.data[j]...);
+        f, offset + j, cuda::std::get<Is>(wide_args).val.data[j]...);
     }
     wide.store(out_ptr, offset);
   }
diff --git a/cpp/include/raft/matrix/detail/linewise_op.cuh b/cpp/include/raft/matrix/detail/linewise_op.cuh
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -11,7 +11,7 @@
 #include <raft/util/pow2_utils.cuh>
 #include <raft/util/vectorized.cuh>
 
-#include <thrust/tuple.h>
+#include <cuda/std/tuple>
 
 #include <algorithm>
 
@@ -32,7 +32,7 @@ template <typename MatT, typename Lambda, class Tuple, size_t... Is>
 __device__ __forceinline__ MatT
 RunMatVecOp(Lambda op, MatT mat, Tuple&& args, std::index_sequence<Is...>)
 {
-  return op(mat, (thrust::get<Is>(args))...);
+  return op(mat, (cuda::std::get<Is>(args))...);
 }
 
 template <typename Type, typename IdxType, std::size_t VecBytes, int BlockSize>
@@ -89,9 +89,7 @@ struct Linewise {
   {
     constexpr IdxType warpPad = (AlignWarp::Value - 1) * VecElems;
     constexpr auto index      = std::index_sequence_for<Vecs...>();
-    // todo(lsugy): switch to cuda::std::tuple from libcudacxx if we add it as a required
-    // dependency. Note that thrust::tuple is limited to 10 elements.
-    thrust::tuple<Vecs...> args;
+    cuda::std::tuple<Vecs...> args;
     Vec v, w;
     bool update = true;
     for (; in < in_end; in += AlignWarp::Value, out += AlignWarp::Value, rowMod += warpPad) {
@@ -102,15 +100,15 @@ struct Linewise {
         update = true;
       }
       if (update) {
-        args   = thrust::make_tuple((vecs[rowDiv])...);
+        args   = cuda::std::make_tuple((vecs[rowDiv])...);
         update = false;
       }
 #pragma unroll VecElems
       for (int k = 0; k < VecElems; k++, rowMod++) {
         if (rowMod == rowLen) {
           rowMod = 0;
           rowDiv++;
-          args = thrust::make_tuple((vecs[rowDiv])...);
+          args = cuda::std::make_tuple((vecs[rowDiv])...);
         }
         w.val.data[k] = RunMatVecOp(op, v.val.data[k], args, index);
       }
diff --git a/cpp/include/raft/sparse/op/detail/sort.h b/cpp/include/raft/sparse/op/detail/sort.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -13,12 +13,12 @@
 
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/std/tuple>
 #include <cuda_runtime.h>
 #include <thrust/device_ptr.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/scan.h>
 #include <thrust/sort.h>
-#include <thrust/tuple.h>
 
 #include <cusparse_v2.h>
 
@@ -37,11 +37,11 @@ struct TupleComp {
     operator()(const one& t1, const two& t2)
   {
     // sort first by each sample's color,
-    if (thrust::get<0>(t1) < thrust::get<0>(t2)) return true;
-    if (thrust::get<0>(t1) > thrust::get<0>(t2)) return false;
+    if (cuda::std::get<0>(t1) < cuda::std::get<0>(t2)) return true;
+    if (cuda::std::get<0>(t1) > cuda::std::get<0>(t2)) return false;
 
     // then sort by value in descending order
-    return thrust::get<1>(t1) < thrust::get<1>(t2);
+    return cuda::std::get<1>(t1) < cuda::std::get<1>(t2);
   }
 };
 
@@ -60,7 +60,7 @@ struct TupleComp {
 template <typename T, typename IdxT = int, typename nnz_t>
 void coo_sort(IdxT m, IdxT n, nnz_t nnz, IdxT* rows, IdxT* cols, T* vals, cudaStream_t stream)
 {
-  auto coo_indices = thrust::make_zip_iterator(thrust::make_tuple(rows, cols));
+  auto coo_indices = thrust::make_zip_iterator(cuda::std::make_tuple(rows, cols));
 
   // get all the colors in contiguous locations so we can map them to warps.
   thrust::sort_by_key(rmm::exec_policy(stream), coo_indices, coo_indices + nnz, vals, TupleComp());
@@ -95,7 +95,7 @@ void coo_sort_by_weight(
 {
   thrust::device_ptr<value_t> t_data = thrust::device_pointer_cast(data);
 
-  auto first = thrust::make_zip_iterator(thrust::make_tuple(rows, cols));
+  auto first = thrust::make_zip_iterator(cuda::std::make_tuple(rows, cols));
 
   thrust::sort_by_key(rmm::exec_policy(stream), t_data, t_data + nnz, first);
 }
diff --git a/cpp/include/raft/sparse/solver/detail/mst_solver_inl.cuh b/cpp/include/raft/sparse/solver/detail/mst_solver_inl.cuh
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -14,6 +14,7 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/std/tuple>
 #include <thrust/copy.h>
 #include <thrust/device_ptr.h>
 #include <thrust/execution_policy.h>
@@ -26,7 +27,6 @@
 #include <thrust/sort.h>
 #include <thrust/transform.h>
 #include <thrust/transform_reduce.h>
-#include <thrust/tuple.h>
 #include <thrust/unique.h>
 
 #include <curand.h>
@@ -169,10 +169,10 @@ Graph_COO<vertex_t, edge_t, weight_t> MST_solver<vertex_t, edge_t, weight_t, alt
 // ||y|-|x||
 template <typename weight_t>
 struct alteration_functor {
-  __host__ __device__ weight_t operator()(const thrust::tuple<weight_t, weight_t>& t)
+  __host__ __device__ weight_t operator()(const cuda::std::tuple<weight_t, weight_t>& t)
   {
-    auto x = thrust::get<0>(t);
-    auto y = thrust::get<1>(t);
+    auto x = cuda::std::get<0>(t);
+    auto y = cuda::std::get<1>(t);
     x      = x < 0 ? -x : x;
     y      = y < 0 ? -y : y;
     return x < y ? y - x : x - y;
@@ -194,8 +194,8 @@ alteration_t MST_solver<vertex_t, edge_t, weight_t, alteration_t>::alteration_ma
   auto new_end = thrust::unique(policy, tmp.begin(), tmp.end());
 
   // min(a[i+1]-a[i])/2
-  auto begin = thrust::make_zip_iterator(thrust::make_tuple(tmp.begin(), tmp.begin() + 1));
-  auto end   = thrust::make_zip_iterator(thrust::make_tuple(new_end - 1, new_end));
+  auto begin = thrust::make_zip_iterator(cuda::std::make_tuple(tmp.begin(), tmp.begin() + 1));
+  auto end   = thrust::make_zip_iterator(cuda::std::make_tuple(new_end - 1, new_end));
   auto init  = tmp.element(1, stream) - tmp.element(0, stream);
   auto max   = thrust::transform_reduce(
     policy, begin, end, alteration_functor<weight_t>(), init, thrust::minimum<weight_t>());
@@ -365,9 +365,9 @@ void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::check_termination()
 
 template <typename vertex_t, typename weight_t>
 struct new_edges_functor {
-  __host__ __device__ bool operator()(const thrust::tuple<vertex_t, vertex_t, weight_t>& t)
+  __host__ __device__ bool operator()(const cuda::std::tuple<vertex_t, vertex_t, weight_t>& t)
   {
-    auto src = thrust::get<0>(t);
+    auto src = cuda::std::get<0>(t);
 
     return src != std::numeric_limits<vertex_t>::max() ? true : false;
   }
@@ -383,15 +383,15 @@ void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::append_src_dst_pair(
 
   // iterator to end of mst edges added to final output in previous iteration
   auto src_dst_zip_end =
-    thrust::make_zip_iterator(thrust::make_tuple(mst_src + curr_mst_edge_count,
-                                                 mst_dst + curr_mst_edge_count,
-                                                 mst_weights + curr_mst_edge_count));
+    thrust::make_zip_iterator(cuda::std::make_tuple(mst_src + curr_mst_edge_count,
+                                                    mst_dst + curr_mst_edge_count,
+                                                    mst_weights + curr_mst_edge_count));
 
   // iterator to new mst edges found
   auto temp_src_dst_zip_begin = thrust::make_zip_iterator(
-    thrust::make_tuple(temp_src.begin(), temp_dst.begin(), temp_weights.begin()));
+    cuda::std::make_tuple(temp_src.begin(), temp_dst.begin(), temp_weights.begin()));
   auto temp_src_dst_zip_end = thrust::make_zip_iterator(
-    thrust::make_tuple(temp_src.end(), temp_dst.end(), temp_weights.end()));
+    cuda::std::make_tuple(temp_src.end(), temp_dst.end(), temp_weights.end()));
 
   // copy new mst edges to final output
   thrust::copy_if(policy,
diff --git a/cpp/include/raft/spectral/detail/spectral_util.cuh b/cpp/include/raft/spectral/detail/spectral_util.cuh
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -13,6 +13,7 @@
 #include <raft/spectral/matrix_wrappers.hpp>
 #include <raft/util/cudart_utils.hpp>
 
+#include <cuda/std/tuple>
 #include <thrust/device_ptr.h>
 #include <thrust/fill.h>
 #include <thrust/for_each.h>
@@ -21,7 +22,6 @@
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/reduce.h>
 #include <thrust/transform.h>
-#include <thrust/tuple.h>
 
 #include <algorithm>
 
@@ -115,7 +115,7 @@ struct equal_to_i_op {
   template <typename Tuple_>
   __host__ __device__ void operator()(Tuple_ t)
   {
-    thrust::get<1>(t) = (thrust::get<0>(t) == i) ? (value_type_t)1.0 : (value_type_t)0.0;
+    cuda::std::get<1>(t) = (cuda::std::get<0>(t) == i) ? (value_type_t)1.0 : (value_type_t)0.0;
   }
 };
 }  // namespace
@@ -140,10 +140,10 @@ bool construct_indicator(
 
   thrust::for_each(
     thrust_exec_policy,
-    thrust::make_zip_iterator(thrust::make_tuple(thrust::device_pointer_cast(clusters),
-                                                 thrust::device_pointer_cast(part_i.raw()))),
-    thrust::make_zip_iterator(thrust::make_tuple(thrust::device_pointer_cast(clusters + n),
-                                                 thrust::device_pointer_cast(part_i.raw() + n))),
+    thrust::make_zip_iterator(cuda::std::make_tuple(thrust::device_pointer_cast(clusters),
+                                                    thrust::device_pointer_cast(part_i.raw()))),
+    thrust::make_zip_iterator(cuda::std::make_tuple(thrust::device_pointer_cast(clusters + n),
+                                                    thrust::device_pointer_cast(part_i.raw() + n))),
     equal_to_i_op<vertex_t, weight_t>(index));
   RAFT_CHECK_CUDA(stream);