HEBench
hebench_report_stats.cpp
Go to the documentation of this file.
1 
2 // Copyright (C) 2021 Intel Corporation
3 // SPDX-License-Identifier: Apache-2.0
4 
5 #include <algorithm>
6 #include <cassert>
7 #include <cmath>
8 #include <cstring>
9 #include <limits>
10 #include <sstream>
11 #include <unordered_set>
12 
13 #include "hebench/modules/general/include/hebench_math_utils.h"
14 #include "hebench/modules/general/include/hebench_utilities.h"
15 #include "hebench_report_stats.h"
16 
17 namespace hebench {
18 namespace ReportGen {
19 
20 void computeStats(StatisticsResult &result, const double *data, std::size_t count)
21 {
22  std::vector<double> sorted_data(data, data + count);
23  std::sort(sorted_data.begin(), sorted_data.end());
24  std::size_t trim_start = sorted_data.size() / 10;
25  //std::size_t trim_count = sorted_data.size() - 2 * trim_start;
26 
27  hebench::Utilities::Math::EventStats basic_stats;
28  for (std::size_t i = 0; i < count; ++i)
29  basic_stats.newEvent(data[i]);
30  hebench::Utilities::Math::EventStats trimmed_stats;
31  for (std::size_t i = trim_start; i < sorted_data.size() - trim_start; ++i)
32  trimmed_stats.newEvent(sorted_data[i]);
33 
34  std::memset(&result, 0, sizeof(StatisticsResult));
35  result.total = basic_stats.getTotal();
36  result.ave = basic_stats.getMean();
37  result.variance = basic_stats.getVariance();
38  result.min = basic_stats.getMin();
39  result.max = basic_stats.getMax();
40 
41  result.median = hebench::Utilities::Math::computePercentile(sorted_data.data(), sorted_data.size(), 0.5);
42  result.pct_1 = hebench::Utilities::Math::computePercentile(sorted_data.data(), sorted_data.size(), 0.01); // 1-th percentile
43  result.pct_10 = hebench::Utilities::Math::computePercentile(sorted_data.data(), sorted_data.size(), 0.1); // 10-th percentile
44  result.pct_90 = hebench::Utilities::Math::computePercentile(sorted_data.data(), sorted_data.size(), 0.9); // 90-th percentile
45  result.pct_99 = hebench::Utilities::Math::computePercentile(sorted_data.data(), sorted_data.size(), 0.95); // 99-th percentile
46 
47  result.ave_trim = trimmed_stats.getMean(); // trimmed by 10% on each side
48  result.variance_trim = trimmed_stats.getVariance();
49  result.samples_per_unit = (basic_stats.getTotal() == 0.0 ? 0.0 : basic_stats.getCount() / basic_stats.getTotal()); // = total / iterations
50  result.samples_per_unit_trim = (trimmed_stats.getTotal() == 0.0 ? 0.0 : trimmed_stats.getCount() / trimmed_stats.getTotal()); // = total_trim / iterations_trim
51  result.input_sample_count = basic_stats.getCount();
52 }
53 
60 class EventType
61 {
62 public:
68  EventType(const cpp::TimingReport &report, std::uint32_t event_id);
69  EventType(const std::vector<double> &cpu_events, const std::vector<double> &wall_events,
70  std::uint32_t event_id, const std::string_view &event_name);
71 
75  std::uint32_t getID() const { return m_id; }
79  const std::string &getName() const { return m_name; }
83  const std::vector<double> &getCPUEvents() const { return m_cpu_events; }
87  const std::vector<double> &getWallEvents() const { return m_wall_events; }
88 
95  void computeStats(ReportEventTypeStats &result) const;
103  {
104  ReportEventTypeStats retval;
105  computeStats(retval);
106  return retval;
107  }
108 
109 private:
110  std::uint32_t m_id;
111  std::string m_name;
112  std::vector<double> m_cpu_events;
113  std::vector<double> m_wall_events;
114 };
115 
116 EventType::EventType(const cpp::TimingReport &report, std::uint32_t event_id)
117 {
118  m_id = event_id;
119  m_name = report.getEventTypeHeader(event_id);
120 
121  std::exception_ptr p_ex;
122 
123  //#pragma omp parallel for
124  for (std::uint64_t event_i = 0; event_i < report.getEventCount(); ++event_i)
125  {
126  if (!p_ex)
127  {
128  try
129  {
130  TimingReportEventC event;
131  report.getEvent(event, event_i);
132  if (!p_ex && event.event_type_id == event_id)
133  {
134  double wall_time = cpp::TimingReport::computeElapsedWallTime(event) / event.input_sample_count;
135  double cpu_time = cpp::TimingReport::computeElapsedCPUTime(event) / event.input_sample_count;
136  //#pragma omp critical
137  {
138  try
139  {
140  m_wall_events.insert(m_wall_events.end(), event.input_sample_count, wall_time);
141  m_cpu_events.insert(m_cpu_events.end(), event.input_sample_count, cpu_time);
142  }
143  catch (...)
144  {
145  p_ex = std::current_exception();
146  }
147  }
148  } // end if
149  }
150  catch (...)
151  {
152  p_ex = std::current_exception();
153  }
154  } // end if
155  } // end for
156 
157  if (p_ex)
158  std::rethrow_exception(p_ex);
159 }
160 
161 EventType::EventType(const std::vector<double> &cpu_events, const std::vector<double> &wall_events,
162  std::uint32_t event_id, const std::string_view &event_name)
163 {
164  if (cpu_events.size() != wall_events.size())
165  throw std::invalid_argument("Number of CPU events and Wall events cannot differ.");
166 
167  m_id = event_id;
168  m_name = std::string(event_name.begin(), event_name.end());
169  m_cpu_events = cpu_events;
170  m_wall_events = wall_events;
171 }
172 
174 {
175  StatisticsResult stats;
176 
177  hebench::ReportGen::computeStats(stats, this->getCPUEvents().data(), this->getCPUEvents().size());
178  result.cpu_time_ave = stats.ave;
179  result.cpu_time_variance = stats.variance;
180  result.cpu_time_min = stats.min;
181  result.cpu_time_max = stats.max;
182  result.cpu_time_median = stats.median;
183  result.cpu_time_1 = stats.pct_1;
184  result.cpu_time_10 = stats.pct_10;
185  result.cpu_time_90 = stats.pct_90;
186  result.cpu_time_99 = stats.pct_99;
187  result.cpu_time_ave_trim = stats.ave_trim;
188  result.cpu_time_variance_trim = stats.variance_trim;
189 
190  hebench::ReportGen::computeStats(stats, this->getWallEvents().data(), this->getWallEvents().size());
191  result.wall_time_ave = stats.ave;
192  result.wall_time_variance = stats.variance;
193  result.wall_time_min = stats.min;
194  result.wall_time_max = stats.max;
195  result.wall_time_median = stats.median;
196  result.wall_time_1 = stats.pct_1;
197  result.wall_time_10 = stats.pct_10;
198  result.wall_time_90 = stats.pct_90;
199  result.wall_time_99 = stats.pct_99;
200  result.wall_time_ave_trim = stats.ave_trim;
201  result.wall_time_variance_trim = stats.variance_trim;
202  result.ops_per_sec = stats.samples_per_unit;
204  result.total_time = stats.total;
205  result.input_sample_count = stats.input_sample_count;
206 
207  result.event_id = this->getID();
208  result.name = this->getName();
209  result.description = std::string();
210 }
211 
213 {
214  if (report.getEventCount() <= 0)
215  throw std::invalid_argument("Report belongs to a failed benchmark.");
216 
217  m_header = report.getHeader();
218  m_footer = report.getFooter();
219  m_main_event_type_id = report.getMainEventType();
220  m_event_stats.reserve(report.getEventTypeCount());
221 
222  std::vector<std::uint32_t> event_ids; // used to keep track of all event ids
223  event_ids.reserve(report.getEventTypeCount());
224 
225  // map event ID to the event timings
226  std::unordered_map<std::uint32_t, std::vector<double>> cpu_events;
227  std::unordered_map<std::uint32_t, std::vector<double>> wall_events;
228 
229  // retrieve the timings and group by event (use a single pass over the report)
230  bool b_added;
231  for (std::uint64_t event_i = 0; event_i < report.getEventCount(); ++event_i)
232  {
234  report.getEvent(event, event_i);
235  double cpu_time = cpp::TimingReport::computeElapsedCPUTime(event) / event.input_sample_count;
236  double wall_time = cpp::TimingReport::computeElapsedWallTime(event) / event.input_sample_count;
237  b_added = false;
238  if (cpu_events.count(event.event_type_id) <= 0)
239  {
240  cpu_events[event.event_type_id] = std::vector<double>();
241  b_added = true;
242  } // end if
243  if (wall_events.count(event.event_type_id) <= 0)
244  {
245  wall_events[event.event_type_id] = std::vector<double>();
246  b_added = true;
247  } // end if
248  if (b_added)
249  event_ids.push_back(event.event_type_id);
250  for (std::uint64_t i = 0; i < event.input_sample_count; ++i)
251  {
252  cpu_events[event.event_type_id].push_back(cpu_time);
253  wall_events[event.event_type_id].push_back(wall_time);
254  } // end for
255  } // end for
256 
257  // sort by event ID
258  std::sort(event_ids.begin(), event_ids.end());
259 
260  // compute the stats for each event timing
261  for (std::size_t event_id_i = 0; event_id_i < event_ids.size(); ++event_id_i)
262  {
263  std::uint32_t event_id = event_ids[event_id_i];
264  EventType event_type(cpu_events[event_id], wall_events[event_id], event_id, report.getEventTypeHeader(event_id));
265  m_event_types_2_stat_idx[event_type.getID()] = event_id_i; // record the event type ID and match it to the index of the stats
266  std::shared_ptr<ReportEventTypeStats> p_stats = std::make_shared<ReportEventTypeStats>();
267  event_type.computeStats(*p_stats);
268  m_event_stats.push_back(p_stats);
269  } // end for
270 }
271 
272 const ReportEventTypeStats &ReportStats::getEventTypeStats(std::uint64_t index) const
273 {
274  if (index >= m_event_stats.size())
275  {
276  std::stringstream ss;
277  ss << "Out of range `index`. Received " << index << ", but expected less than " << m_event_stats.size() << ".";
278  throw std::out_of_range(ss.str());
279  } // end if
280  if (!m_event_stats[index])
281  throw std::runtime_error("Unexpected empty stats.");
282  return *m_event_stats[index];
283 }
284 
286 {
287  return getEventTypeStats(m_event_types_2_stat_idx.at(id));
288 }
289 
291 {
292  return m_event_types_2_stat_idx.at(m_main_event_type_id);
293 }
294 
296 {
297  return getEventTypeStatsByID(m_main_event_type_id);
298 }
299 
300 void ReportStats::generateCSV(std::ostream &os, char ch_prefix)
301 {
302  if (!os)
303  throw std::ios_base::failure("Output stream is in an invalid state.");
304 
305  os << this->getHeader() << std::endl
306  << std::endl
307  << "Notes" << std::endl
308  << this->getFooter() << std::endl
309  << std::endl
310  << "Main event," << this->getMainEventTypeStats().event_id << "," << this->getMainEventTypeStats().name << std::endl
311  << std::endl
312  << ",,,,,Wall Time,,,,,,,,,,,,,CPU Time" << std::endl
313  << "ID,Event,Total Wall Time,Samples per sec,Samples per sec trimmed,"
314  // wall
315  << "Average,Standard Deviation,Time Unit,Time Factor,Min,Max,Median,Trimmed Average,Trimmed Standard Deviation,1-th percentile,10-th percentile,90-th percentile,99-th percentile,"
316  // cpu
317  << "Average,Standard Deviation,Time Unit,Time Factor,Min,Max,Median,Trimmed Average,Trimmed Standard Deviation,1-th percentile,10-th percentile,90-th percentile,99-th percentile,Input Samples" << std::endl;
318  if (!os)
319  throw std::ios_base::failure("Error writing statistics report header to stream.");
320  for (std::uint64_t event_stats_i = 0; event_stats_i < m_event_stats.size(); ++event_stats_i)
321  if (m_event_stats[event_stats_i])
322  generateCSV(os, *m_event_stats[event_stats_i], ch_prefix);
323 }
324 
325 void ReportStats::generateCSV(std::ostream &os, const ReportEventTypeStats &stats, char ch_prefix, bool new_line)
326 {
327  if (!os)
328  throw std::ios_base::failure("Output stream is in an invalid state.");
329 
334 
335  os << stats.event_id << "," << stats.name << ","
336  << hebench::Utilities::convertDoubleToStr(stats.total_time * prefix_wall.time_interval_ratio_den) << ","
337  << hebench::Utilities::convertDoubleToStr(stats.ops_per_sec) << ","
338  << hebench::Utilities::convertDoubleToStr(stats.ops_per_sec_trim) << ","
339  << hebench::Utilities::convertDoubleToStr(stats.wall_time_ave * prefix_wall.time_interval_ratio_den) << ","
340  << hebench::Utilities::convertDoubleToStr(std::sqrt(stats.wall_time_variance) * prefix_wall.time_interval_ratio_den) << ","
341  << prefix_wall.symbol << "s," << hebench::Utilities::convertDoubleToStr(1.0 / prefix_wall.time_interval_ratio_den) << ","
342  << hebench::Utilities::convertDoubleToStr(stats.wall_time_min * prefix_wall.time_interval_ratio_den) << ","
343  << hebench::Utilities::convertDoubleToStr(stats.wall_time_max * prefix_wall.time_interval_ratio_den) << ","
344  << hebench::Utilities::convertDoubleToStr(stats.wall_time_median * prefix_wall.time_interval_ratio_den) << ","
345  << hebench::Utilities::convertDoubleToStr(stats.wall_time_ave_trim * prefix_wall.time_interval_ratio_den) << ","
346  << hebench::Utilities::convertDoubleToStr(std::sqrt(stats.wall_time_variance_trim) * prefix_wall.time_interval_ratio_den) << ","
347  << hebench::Utilities::convertDoubleToStr(stats.wall_time_1 * prefix_wall.time_interval_ratio_den) << ","
348  << hebench::Utilities::convertDoubleToStr(stats.wall_time_10 * prefix_wall.time_interval_ratio_den) << ","
349  << hebench::Utilities::convertDoubleToStr(stats.wall_time_90 * prefix_wall.time_interval_ratio_den) << ","
350  << hebench::Utilities::convertDoubleToStr(stats.wall_time_99 * prefix_wall.time_interval_ratio_den) << ","
351  << hebench::Utilities::convertDoubleToStr(stats.cpu_time_ave * prefix_cpu.time_interval_ratio_den) << ","
352  << hebench::Utilities::convertDoubleToStr(std::sqrt(stats.cpu_time_variance) * prefix_cpu.time_interval_ratio_den) << ","
353  << prefix_cpu.symbol << "s," << hebench::Utilities::convertDoubleToStr(1.0 / prefix_cpu.time_interval_ratio_den) << ","
354  << hebench::Utilities::convertDoubleToStr(stats.cpu_time_min * prefix_cpu.time_interval_ratio_den) << ","
355  << hebench::Utilities::convertDoubleToStr(stats.cpu_time_max * prefix_cpu.time_interval_ratio_den) << ","
356  << hebench::Utilities::convertDoubleToStr(stats.cpu_time_median * prefix_cpu.time_interval_ratio_den) << ","
357  << hebench::Utilities::convertDoubleToStr(stats.cpu_time_ave_trim * prefix_cpu.time_interval_ratio_den) << ","
358  << hebench::Utilities::convertDoubleToStr(std::sqrt(stats.cpu_time_variance_trim) * prefix_cpu.time_interval_ratio_den) << ","
359  << hebench::Utilities::convertDoubleToStr(stats.cpu_time_1 * prefix_cpu.time_interval_ratio_den) << ","
360  << hebench::Utilities::convertDoubleToStr(stats.cpu_time_10 * prefix_cpu.time_interval_ratio_den) << ","
361  << hebench::Utilities::convertDoubleToStr(stats.cpu_time_90 * prefix_cpu.time_interval_ratio_den) << ","
362  << hebench::Utilities::convertDoubleToStr(stats.cpu_time_99 * prefix_cpu.time_interval_ratio_den) << ","
363  << stats.input_sample_count;
364  if (new_line)
365  os << std::endl;
366 
367  if (!os)
368  throw std::ios_base::failure("Error writing statistics row to stream.");
369 }
370 
371 void ReportStats::generateSummaryCSV(std::ostream &os, char ch_prefix)
372 {
373  if (!os)
374  throw std::ios_base::failure("Output stream is in an invalid state.");
375 
376  os << this->getHeader() << std::endl
377  << std::endl
378  << "Notes" << std::endl
379  << this->getFooter() << std::endl
380  << std::endl
381  << "Main event," << this->getMainEventTypeStats().event_id << "," << this->getMainEventTypeStats().name << std::endl
382  << std::endl
383  << ",,,Wall Time,,,,CPU Time" << std::endl
384  << "ID,Event,Samples per sec,"
385  << "Average,Standard Deviation,Time Unit,Time Factor,"
386  << "Average,Standard Deviation,Time Unit,Time Factor,Input Samples" << std::endl;
387  if (!os)
388  throw std::ios_base::failure("Error writing summary report header to stream.");
389  for (std::uint64_t event_stats_i = 0; event_stats_i < m_event_stats.size(); ++event_stats_i)
390  if (m_event_stats[event_stats_i])
391  generateSummaryCSV(os, *m_event_stats[event_stats_i], ch_prefix);
392 }
393 
394 void ReportStats::generateSummaryCSV(std::ostream &os, const ReportEventTypeStats &stats, char ch_prefix, bool new_line)
395 {
396  if (!os)
397  throw std::ios_base::failure("Output stream is in an invalid state.");
398 
403  os << stats.event_id << "," << stats.name << ","
404  << hebench::Utilities::convertDoubleToStr(stats.ops_per_sec) << ","
405  << hebench::Utilities::convertDoubleToStr(stats.wall_time_ave * prefix_wall.time_interval_ratio_den) << ","
406  << hebench::Utilities::convertDoubleToStr(std::sqrt(stats.wall_time_variance) * prefix_wall.time_interval_ratio_den) << ","
407  << prefix_wall.symbol << "s," << hebench::Utilities::convertDoubleToStr(1.0 / prefix_wall.time_interval_ratio_den) << ","
408  << hebench::Utilities::convertDoubleToStr(stats.cpu_time_ave * prefix_cpu.time_interval_ratio_den) << ","
409  << hebench::Utilities::convertDoubleToStr(std::sqrt(stats.cpu_time_variance) * prefix_cpu.time_interval_ratio_den) << ","
410  << prefix_cpu.symbol << "s," << hebench::Utilities::convertDoubleToStr(1.0 / prefix_cpu.time_interval_ratio_den) << ","
411  << stats.input_sample_count;
412  if (new_line)
413  os << std::endl;
414 
415  if (!os)
416  throw std::ios_base::failure("Error writing summary row to stream.");
417 }
418 
419 } // namespace ReportGen
420 } // namespace hebench
Extracts and maintains the timing report events of the same type.
std::uint32_t getID() const
ID of the event type.
const std::vector< double > & getWallEvents() const
Collection of contained wall-timed events of the same type extracted from the report.
void computeStats(ReportEventTypeStats &result) const
Computes statistics for this event type based on the contained events.
ReportEventTypeStats computeStats() const
Computes statistics for this event type based on the contained events.
EventType(const cpp::TimingReport &report, std::uint32_t event_id)
Constructs an event type from a report.
const std::vector< double > & getCPUEvents() const
Collection of contained CPU-timed events of the same type extracted from the report.
const std::string & getName() const
Name of the event type as per the report.
std::uint64_t getMainEventTypeStatsIndex() const
const std::string & getFooter() const
const ReportEventTypeStats & getEventTypeStats(std::uint64_t index) const
void generateCSV(std::ostream &os, char ch_prefix)
Generates complete CSV stats for this report.
const ReportEventTypeStats & getEventTypeStatsByID(std::uint32_t id) const
void generateSummaryCSV(std::ostream &os, char ch_prefix)
Generates summary CSV for this report.
const std::string & getHeader() const
const ReportEventTypeStats & getMainEventTypeStats() const
ReportStats(const cpp::TimingReport &report)
static void setTimingPrefix(TimingPrefixedSeconds &prefix, double seconds, char ch_prefix)
Converts the time in seconds to the specified time unit.
static double computeElapsedWallTime(const TimingReportEventC &event)
void getEvent(TimingReportEventC &p_event, uint64_t index) const
std::string getEventTypeHeader(uint32_t event_type_id) const
static double computeElapsedCPUTime(const TimingReportEventC &event)
int64_t time_interval_ratio_den
Denominator of timing scale ratio with respect to a unit.
uint64_t input_sample_count
Number of input samples used.
void computeStats(StatisticsResult &result, const double *data, std::size_t count)
char symbol[MAX_SYMBOL_BUFFER_SIZE]
Symbol for the prefix.
uint32_t event_type_id
ID specifying the event type.
uint32_t event_id
ID specifying the event type.