Skip to content

Commit 6fb522b

Browse files
committed
Add energy profiling components: kernel timer and utility functions
1 parent 1285969 commit 6fb522b

File tree

6 files changed

+485
-0
lines changed

6 files changed

+485
-0
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ if(NOT WIN32)
150150
add_subdirectory(profiling/chrome-tracing)
151151
add_subdirectory(profiling/space-time-stack)
152152
add_subdirectory(profiling/perfetto-connector)
153+
add_subdirectory(profiling/energy-profiler)
153154
endif()
154155

155156
# External lib connectors
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
add_subdirectory(timing)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Basic kernel timer tool (no energy monitoring dependencies)
2+
kp_add_library(kp_energy_kernel_timer
3+
kp_energy_kernel_timer.cpp
4+
utils.cpp
5+
)
6+
7+
target_include_directories(kp_energy_kernel_timer PRIVATE
8+
${CMAKE_CURRENT_SOURCE_DIR}
9+
)
Lines changed: 376 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,376 @@
1+
//@HEADER
2+
// ************************************************************************
3+
//
4+
// Kokkos v. 4.0
5+
// Copyright (2022) National Technology & Engineering
6+
// Solutions of Sandia, LLC (NTESS).
7+
//
8+
// Under the terms of Contract DE-NA0003525 with NTESS,
9+
// the U.S. Government retains certain rights in this software.
10+
//
11+
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12+
// See https://kokkos.org/LICENSE for license information.
13+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14+
//
15+
//@HEADER
16+
17+
// Kokkos Energy Profiler - Unified Kernel Timer
18+
// Unified kernel timing tool for the energy profiling infrastructure
19+
20+
#include <chrono>
21+
#include <cstdint>
22+
#include <cstdio>
23+
#include <cstring>
24+
#include <deque>
25+
#include <iostream>
26+
#include <iomanip>
27+
#include <string>
28+
#include <algorithm>
29+
30+
#include "kp_core.hpp"
31+
#include "utils.hpp"
32+
33+
namespace KokkosTools {
34+
namespace EnergyProfiler {
35+
36+
// === TimingInfo Structure ===
37+
struct TimingInfo {
38+
std::string name;
39+
RegionType type;
40+
std::chrono::high_resolution_clock::time_point start_time;
41+
std::chrono::high_resolution_clock::time_point end_time;
42+
uint64_t id = 0;
43+
};
44+
45+
// === Export and Display Functions ===
46+
47+
void export_all_timings_csv(const std::deque<TimingInfo>& all_timings,
48+
const std::string& filename) {
49+
FILE* file = fopen(filename.c_str(), "w");
50+
if (!file) {
51+
std::cerr << "ERROR: Unable to open file " << filename << " for writing.\n";
52+
return;
53+
}
54+
55+
// Write CSV header
56+
fprintf(file,
57+
"name,type,start_time_epoch_ms,end_time_epoch_ms,duration_ms\n");
58+
59+
// Write each timing entry
60+
for (const auto& timing : all_timings) {
61+
auto start_ms = get_epoch_ms(timing.start_time);
62+
auto end_ms = get_epoch_ms(timing.end_time);
63+
auto duration_ms = get_duration_ms(timing.start_time, timing.end_time);
64+
std::string type = region_type_to_string(timing.type);
65+
66+
fprintf(file, "%s,%s,%ld,%ld,%ld\n", timing.name.c_str(), type.c_str(),
67+
start_ms, end_ms, duration_ms);
68+
}
69+
70+
fclose(file);
71+
std::cout << "All timing data exported to " << filename << '\n';
72+
}
73+
74+
void print_all_timings_summary(const std::deque<TimingInfo>& kernels,
75+
const std::deque<TimingInfo>& regions,
76+
const std::deque<TimingInfo>& deepcopies) {
77+
std::cout << "\n==== TIMING SUMMARY ====\n";
78+
std::cout
79+
<< "| Category | Name | Type | "
80+
"Start(ms) | End(ms) | Duration (ms) |\n";
81+
std::cout << "|----------|----------------------------------|----------------"
82+
"|------"
83+
"-------------|-------------------|---------------|\n";
84+
85+
// Print kernels
86+
for (const auto& timing_info : kernels) {
87+
auto start_ms = get_epoch_ms(timing_info.start_time);
88+
auto end_ms = get_epoch_ms(timing_info.end_time);
89+
auto duration_ms =
90+
get_duration_ms(timing_info.start_time, timing_info.end_time);
91+
std::string type = region_type_to_string(timing_info.type);
92+
93+
std::cout << "| " << std::setw(8) << std::left << "KERNEL" << " | "
94+
<< std::setw(32) << std::left << timing_info.name << " | "
95+
<< std::setw(14) << std::left << type << " | " << std::setw(17)
96+
<< std::right << start_ms << " | " << std::setw(17) << std::right
97+
<< end_ms << " | " << std::setw(13) << std::right << duration_ms
98+
<< " |\n";
99+
}
100+
101+
// Print regions
102+
for (const auto& timing_info : regions) {
103+
auto start_ms = get_epoch_ms(timing_info.start_time);
104+
auto end_ms = get_epoch_ms(timing_info.end_time);
105+
auto duration_ms =
106+
get_duration_ms(timing_info.start_time, timing_info.end_time);
107+
108+
std::cout << "| " << std::setw(8) << std::left << "REGION" << " | "
109+
<< std::setw(32) << std::left << timing_info.name << " | "
110+
<< std::setw(14) << std::left << "user_region" << " | "
111+
<< std::setw(17) << std::right << start_ms << " | "
112+
<< std::setw(17) << std::right << end_ms << " | " << std::setw(13)
113+
<< std::right << duration_ms << " |\n";
114+
}
115+
116+
// Print deepcopies
117+
for (const auto& timing_info : deepcopies) {
118+
auto start_ms = get_epoch_ms(timing_info.start_time);
119+
auto end_ms = get_epoch_ms(timing_info.end_time);
120+
auto duration_ms =
121+
get_duration_ms(timing_info.start_time, timing_info.end_time);
122+
123+
std::cout << "| " << std::setw(8) << std::left << "DEEPCOPY" << " | "
124+
<< std::setw(32) << std::left << timing_info.name << " | "
125+
<< std::setw(14) << std::left << "deep_copy" << " | "
126+
<< std::setw(17) << std::right << start_ms << " | "
127+
<< std::setw(17) << std::right << end_ms << " | " << std::setw(13)
128+
<< std::right << duration_ms << " |\n";
129+
}
130+
}
131+
132+
// === Global State Variables ===
133+
static std::deque<TimingInfo> g_active_regions;
134+
static std::deque<TimingInfo> g_completed_kernels;
135+
static std::deque<TimingInfo> g_completed_regions;
136+
static std::deque<TimingInfo> g_completed_deepcopies;
137+
static uint64_t g_next_region_id = 1;
138+
static bool g_verbose_enabled = false;
139+
140+
// === Helper Functions ===
141+
static void start_region(const std::string& name, RegionType type,
142+
uint64_t id = 0) {
143+
TimingInfo region;
144+
region.name = name;
145+
region.type = type;
146+
region.start_time = std::chrono::high_resolution_clock::now();
147+
region.id = id;
148+
g_active_regions.push_back(region);
149+
}
150+
151+
static void end_region() {
152+
if (!g_active_regions.empty()) {
153+
auto region = g_active_regions.back();
154+
g_active_regions.pop_back();
155+
region.end_time = std::chrono::high_resolution_clock::now();
156+
157+
// Categorize based on type
158+
switch (region.type) {
159+
case RegionType::UserRegion: g_completed_regions.push_back(region); break;
160+
case RegionType::DeepCopy:
161+
g_completed_deepcopies.push_back(region);
162+
break;
163+
default: g_completed_kernels.push_back(region); break;
164+
}
165+
}
166+
}
167+
168+
static void end_region_with_id(uint64_t expected_id) {
169+
if (g_active_regions.empty()) {
170+
std::cerr << "Warning: Attempting to end region with ID " << expected_id
171+
<< " but no active regions found\n";
172+
return;
173+
}
174+
175+
// Find the region with the matching ID
176+
auto it = std::find_if(g_active_regions.rbegin(), g_active_regions.rend(),
177+
[expected_id](const TimingInfo& region) {
178+
return region.id == expected_id;
179+
});
180+
181+
if (it != g_active_regions.rend()) {
182+
// Found the region with matching ID
183+
auto region = *it;
184+
185+
// Remove the region from active list
186+
g_active_regions.erase(std::next(it).base());
187+
188+
region.end_time = std::chrono::high_resolution_clock::now();
189+
190+
// Categorize based on type
191+
switch (region.type) {
192+
case RegionType::UserRegion: g_completed_regions.push_back(region); break;
193+
case RegionType::DeepCopy:
194+
g_completed_deepcopies.push_back(region);
195+
break;
196+
default: g_completed_kernels.push_back(region); break;
197+
}
198+
} else {
199+
std::cerr << "Warning: No active region found with ID " << expected_id
200+
<< "\n";
201+
}
202+
}
203+
204+
static std::deque<TimingInfo> get_all_timings() {
205+
std::deque<TimingInfo> all_timings;
206+
207+
// Combine all timings into a single deque
208+
for (const auto& timing : g_completed_kernels) {
209+
all_timings.push_back(timing);
210+
}
211+
for (const auto& timing : g_completed_regions) {
212+
all_timings.push_back(timing);
213+
}
214+
for (const auto& timing : g_completed_deepcopies) {
215+
all_timings.push_back(timing);
216+
}
217+
218+
// Sort by start time to ensure chronological order
219+
std::sort(all_timings.begin(), all_timings.end(),
220+
[](const TimingInfo& a, const TimingInfo& b) {
221+
return a.start_time < b.start_time;
222+
});
223+
224+
return all_timings;
225+
}
226+
227+
} // namespace EnergyProfiler
228+
} // namespace KokkosTools
229+
230+
extern "C" {
231+
232+
// Tool Description
233+
void kokkosp_request_tool_settings(const uint32_t,
234+
Kokkos_Tools_ToolSettings* settings) {
235+
settings->requires_global_fencing = false;
236+
settings->padding[0] = 0;
237+
}
238+
239+
// Functions to be added to Kokkos library
240+
void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
241+
const uint32_t devInfoCount,
242+
Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) {
243+
(void)devInfoCount;
244+
(void)deviceInfo;
245+
246+
// Enable verbose mode if environment variable is set
247+
if (std::getenv("KOKKOS_TOOLS_ENERGY_VERBOSE")) {
248+
KokkosTools::EnergyProfiler::g_verbose_enabled = true;
249+
}
250+
251+
std::cout << "Kokkos Energy Profiler: Initializing with load sequence "
252+
<< loadSeq << " and interface version " << interfaceVer << '\n';
253+
std::cout << "Kokkos Energy Profiler: Library initialized" << '\n';
254+
}
255+
256+
void kokkosp_finalize_library() {
257+
std::cout << "Kokkos Energy Profiler: Finalizing library\n";
258+
std::cout << "Kokkos Energy Profiler: Library finalized\n";
259+
260+
std::string prefix = KokkosTools::EnergyProfiler::generate_prefix();
261+
262+
// Print unified summary
263+
KokkosTools::EnergyProfiler::print_all_timings_summary(
264+
KokkosTools::EnergyProfiler::g_completed_kernels,
265+
KokkosTools::EnergyProfiler::g_completed_regions,
266+
KokkosTools::EnergyProfiler::g_completed_deepcopies);
267+
268+
// Export unified CSV with all timings in chronological order
269+
auto all_timings = KokkosTools::EnergyProfiler::get_all_timings();
270+
KokkosTools::EnergyProfiler::export_all_timings_csv(
271+
all_timings, prefix + "_timing_data.csv");
272+
}
273+
274+
void kokkosp_begin_parallel_for(const char* name, const uint32_t devID,
275+
uint64_t* kID) {
276+
(void)devID;
277+
*kID = KokkosTools::EnergyProfiler::g_next_region_id++;
278+
KokkosTools::EnergyProfiler::start_region(
279+
name, KokkosTools::EnergyProfiler::RegionType::ParallelFor, *kID);
280+
if (KokkosTools::EnergyProfiler::g_verbose_enabled) {
281+
std::cout << "Kokkos Energy Profiler: Started parallel_for '" << name
282+
<< "' on device " << devID << " with ID " << *kID << "\n";
283+
}
284+
}
285+
286+
void kokkosp_end_parallel_for(const uint64_t kID) {
287+
KokkosTools::EnergyProfiler::end_region_with_id(kID);
288+
if (KokkosTools::EnergyProfiler::g_verbose_enabled) {
289+
std::cout << "Kokkos Energy Profiler: Ended parallel_for with ID " << kID
290+
<< "\n";
291+
}
292+
}
293+
294+
void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID,
295+
uint64_t* kID) {
296+
(void)devID;
297+
*kID = KokkosTools::EnergyProfiler::g_next_region_id++;
298+
KokkosTools::EnergyProfiler::start_region(
299+
name, KokkosTools::EnergyProfiler::RegionType::ParallelScan, *kID);
300+
if (KokkosTools::EnergyProfiler::g_verbose_enabled) {
301+
std::cout << "Kokkos Energy Profiler: Started parallel_scan '" << name
302+
<< "' on device " << devID << " with ID " << *kID << "\n";
303+
}
304+
}
305+
306+
void kokkosp_end_parallel_scan(const uint64_t kID) {
307+
KokkosTools::EnergyProfiler::end_region_with_id(kID);
308+
if (KokkosTools::EnergyProfiler::g_verbose_enabled) {
309+
std::cout << "Kokkos Energy Profiler: Ended parallel_scan with ID " << kID
310+
<< "\n";
311+
}
312+
}
313+
314+
void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID,
315+
uint64_t* kID) {
316+
(void)devID;
317+
*kID = KokkosTools::EnergyProfiler::g_next_region_id++;
318+
KokkosTools::EnergyProfiler::start_region(
319+
name, KokkosTools::EnergyProfiler::RegionType::ParallelReduce, *kID);
320+
if (KokkosTools::EnergyProfiler::g_verbose_enabled) {
321+
std::cout << "Kokkos Energy Profiler: Started parallel_reduce '" << name
322+
<< "' on device " << devID << " with ID " << *kID << "\n";
323+
}
324+
}
325+
326+
void kokkosp_end_parallel_reduce(const uint64_t kID) {
327+
KokkosTools::EnergyProfiler::end_region_with_id(kID);
328+
if (KokkosTools::EnergyProfiler::g_verbose_enabled) {
329+
std::cout << "Kokkos Energy Profiler: Ended parallel_reduce with ID " << kID
330+
<< "\n";
331+
}
332+
}
333+
334+
void kokkosp_push_profile_region(char const* regionName) {
335+
KokkosTools::EnergyProfiler::start_region(
336+
regionName, KokkosTools::EnergyProfiler::RegionType::UserRegion,
337+
KokkosTools::EnergyProfiler::g_next_region_id++);
338+
if (KokkosTools::EnergyProfiler::g_verbose_enabled) {
339+
std::cout << "Kokkos Energy Profiler: Pushed profile region '" << regionName
340+
<< "'\n";
341+
}
342+
}
343+
344+
void kokkosp_pop_profile_region() {
345+
KokkosTools::EnergyProfiler::end_region();
346+
if (KokkosTools::EnergyProfiler::g_verbose_enabled) {
347+
std::cout << "Kokkos Energy Profiler: Popped profile region\n";
348+
}
349+
}
350+
351+
void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle,
352+
const char* dst_name, const void* dst_ptr,
353+
Kokkos::Tools::SpaceHandle src_handle,
354+
const char* src_name, const void* src_ptr,
355+
uint64_t size) {
356+
(void)dst_handle;
357+
(void)src_handle;
358+
(void)src_name;
359+
(void)src_ptr;
360+
(void)size;
361+
KokkosTools::EnergyProfiler::start_region(
362+
dst_name, KokkosTools::EnergyProfiler::RegionType::DeepCopy,
363+
reinterpret_cast<uint64_t>(dst_ptr));
364+
if (KokkosTools::EnergyProfiler::g_verbose_enabled) {
365+
std::cout << "Kokkos Energy Profiler: Started deep copy from '" << src_name
366+
<< "' to '" << dst_name << "' (size: " << size << " bytes)\n";
367+
}
368+
}
369+
370+
void kokkosp_end_deep_copy() {
371+
KokkosTools::EnergyProfiler::end_region();
372+
if (KokkosTools::EnergyProfiler::g_verbose_enabled) {
373+
std::cout << "Kokkos Energy Profiler: Ended deep copy\n";
374+
}
375+
}
376+
}

0 commit comments

Comments
 (0)