HEBench
hebench_logreg.cpp
Go to the documentation of this file.
1 
2 // Copyright (C) 2021 Intel Corporation
3 // SPDX-License-Identifier: Apache-2.0
4 
5 #include <array>
6 #include <cassert>
7 #include <cmath>
8 #include <iterator>
9 #include <random>
10 #include <sstream>
11 #include <type_traits>
12 #include <utility>
13 #include <vector>
14 
15 #include "../include/hebench_logreg.h"
16 #include "hebench/modules/general/include/hebench_math_utils.h"
17 
18 namespace hebench {
19 namespace TestHarness {
20 namespace LogisticRegression {
21 
22 //------------------------------------
23 // class BenchmarkDescriptionCategory
24 //------------------------------------
25 
29  };
30 
31 std::uint64_t BenchmarkDescriptorCategory::fetchVectorSize(const std::vector<hebench::APIBridge::WorkloadParam> &w_params)
32 {
33  assert(WorkloadParameterCount == 1);
34  assert(OpParameterCount == 3);
35  assert(OpResultCount == 1);
36 
37  std::uint64_t retval;
38 
39  if (w_params.size() < WorkloadParameterCount)
40  {
41  std::stringstream ss;
42  ss << "Insufficient workload parameters in 'w_params'. Expected " << WorkloadParameterCount
43  << ", but " << w_params.size() << "received.";
44  throw std::invalid_argument(IL_LOG_MSG_CLASS(ss.str()));
45  } // end if
46 
47  for (std::size_t i = 0; i < WorkloadParameterCount; ++i)
48  if (w_params[i].data_type != WorkloadParameterType[i])
49  {
50  std::stringstream ss;
51  ss << "Invalid type for workload parameter " << i
52  << ". Expected type ID " << WorkloadParameterType[i] << ", but " << w_params[i].data_type << " received.";
53  throw std::invalid_argument(IL_LOG_MSG_CLASS(ss.str()));
54  } // end if
55  else if (w_params[i].u_param <= 0)
56  {
57  std::stringstream ss;
58  ss << "Invalid number of elements for vector in workload parameter " << i
59  << ". Expected positive integer, but " << w_params[i].u_param << " received.";
60  throw std::invalid_argument(IL_LOG_MSG_CLASS(ss.str()));
61  } // end if
62 
63  retval = w_params.at(0).u_param;
64 
65  return retval;
66 }
67 
69  const std::vector<hebench::APIBridge::WorkloadParam> &w_params) const
70 {
71  bool retval = false;
72 
73  // return true if benchmark is supported
78  {
79  try
80  {
81  fetchVectorSize(w_params);
82  retval = true;
83  }
84  catch (...)
85  {
86  // workload not supported
87  retval = false;
88  }
89  } // end if
90 
91  return retval;
92 }
93 
95  const Engine &engine,
96  const BenchmarkDescription::Backend &backend_desc,
97  const BenchmarkDescription::Configuration &config) const
98 {
99  (void)engine;
100  std::stringstream ss;
101 
102  output.concrete_descriptor = backend_desc.descriptor;
103  if (getForceConfigValues())
104  {
106  config.default_min_test_time_ms == 0 ?
107  backend_desc.descriptor.cat_params.min_test_time_ms :
109  } // end if
110  else
111  {
113  backend_desc.descriptor.cat_params.min_test_time_ms != 0 ?
114  backend_desc.descriptor.cat_params.min_test_time_ms :
116  } // end else
117 
118  // workload name
119 
120  std::uint64_t vector_size = fetchVectorSize(config.w_params);
121  ss << BaseWorkloadName;
122  switch (backend_desc.descriptor.workload)
123  {
125  ss << " PolyD3";
126  break;
128  ss << " PolyD5";
129  break;
131  ss << " PolyD7";
132  break;
133  default:
134  // standard sigmoid
135  break;
136  } // end switch
137 
138  output.workload_base_name = ss.str();
139  ss << " " << vector_size << " features";
140  output.workload_name = ss.str();
142 }
143 
144 //---------------------------
145 // class DataGeneratorHelper
146 //---------------------------
147 
152 {
153 private:
154  IL_DECLARE_CLASS_NAME(LogisticRegression::DataGeneratorHelper)
155 
156 public:
159  void *result, const void *w, const void *b, const void *input,
160  std::uint64_t feature_count);
161 
162 protected:
163  DataGeneratorHelper() = default;
164 
165 private:
166  template <class T, class Container> // T must always be arithmetic type, Container elements must be of type T
167  static double evaluatePolynomial(T x, const Container &coeff);
168 
169  template <unsigned int degree> // degree must be only 0, 3, 5, or 7
170  static double sigmoid(double x);
171 
172  template <class T> // T must always be floating point
174  T &result, const T *p_w, const T &b, const T *p_input,
175  std::uint64_t feature_count);
176 };
177 
178 template <class T, class Container>
179 inline double DataGeneratorHelper::evaluatePolynomial(T x, const Container &coeff)
180 {
181  // Horner's method follows:
182  // a_n * x^n + a_n-1 * x^(n-1) +... + a_1 * x + a_0
183  // == (...(((a_n * x + a_n-1) * x + a_n-2) * x ... + a_1) * x + a_0
184  auto it = std::rbegin(coeff);
185  T retval = *it;
186  for (++it; it != std::rend(coeff); ++it)
187  retval = retval * x + *it;
188 
189  return retval;
190 }
191 
192 template <>
193 inline double DataGeneratorHelper::sigmoid<0>(double x)
194 {
195  return 1.0 / (1.0 + std::exp(-x));
196 }
197 
198 template <>
199 inline double DataGeneratorHelper::sigmoid<3>(double x)
200 {
201  // f3(x) = 0.5 + 1.20096(x/8) - 0.81562(x/8)^3
202  static const std::array<double, 4> poly = { 0.5, 0.15012, 0.0, -0.0015930078125 };
203  return evaluatePolynomial(x, poly);
204 }
205 
206 template <>
207 inline double DataGeneratorHelper::sigmoid<5>(double x)
208 {
209  // f5(x) = 0.5 + 1.53048(x/8) - 2.3533056(x/8)^3 + 1.3511295(x/8)^5
210  static const std::array<double, 6> poly = { 0.5, 0.19131, 0.0, -0.0045963, 0.0, 0.0000412332000732421875 };
211  return evaluatePolynomial(x, poly);
212 }
213 
214 template <>
215 inline double DataGeneratorHelper::sigmoid<7>(double x)
216 {
217  // f7(x) = 0.5 + 1.73496(x/8) - 4.19407(x/8)^3 + 5.43402(x/8)^5 - 2.50739(x/8)^7
218  static const std::array<double, 8> poly = { 0.5, 0.21687, 0.0, -0.00819154296875, 0.0, 0.0001658331298828125, 0.0, -0.00000119561672210693359375 };
219  return evaluatePolynomial(x, poly);
220 }
221 
222 template <class T>
224  T &result, const T *p_w, const T &b, const T *p_input,
225  std::uint64_t feature_count)
226 {
227  if (!p_w)
228  throw std::invalid_argument(IL_LOG_MSG_CLASS("Invalid null 'p_w'."));
229  if (!p_input)
230  throw std::invalid_argument(IL_LOG_MSG_CLASS("Invalid null 'p_input'."));
231 
232  T linear_regression = std::inner_product(p_w, p_w + feature_count, p_input, static_cast<T>(0))
233  + b;
234  switch (poly_deg)
235  {
237  result = static_cast<T>(sigmoid<3>(static_cast<double>(linear_regression)));
238  break;
239 
241  result = static_cast<T>(sigmoid<5>(static_cast<double>(linear_regression)));
242  break;
243 
245  result = static_cast<T>(sigmoid<7>(static_cast<double>(linear_regression)));
246  break;
247 
248  default:
249  result = static_cast<T>(sigmoid<0>(static_cast<double>(linear_regression)));
250  break;
251  } // end switch
252 }
253 
256  void *p_result, const void *p_w, const void *p_bias, const void *p_input,
257  std::uint64_t feature_count)
258 {
259  if (!p_result)
260  throw std::invalid_argument(IL_LOG_MSG_CLASS("Invalid null 'p_result'."));
261  if (!p_bias)
262  throw std::invalid_argument(IL_LOG_MSG_CLASS("Invalid null 'p_bias'."));
263 
264  switch (data_type)
265  {
267  logisticRegressionInference<float>(poly_deg,
268  *reinterpret_cast<float *>(p_result),
269  reinterpret_cast<const float *>(p_w),
270  *reinterpret_cast<const float *>(p_bias),
271  reinterpret_cast<const float *>(p_input),
272  feature_count);
273  break;
274 
276  logisticRegressionInference<double>(poly_deg,
277  *reinterpret_cast<double *>(p_result),
278  reinterpret_cast<const double *>(p_w),
279  *reinterpret_cast<const double *>(p_bias),
280  reinterpret_cast<const double *>(p_input),
281  feature_count);
282  break;
283 
284  default:
285  throw std::invalid_argument(IL_LOG_MSG_CLASS("Data type not supported."));
286  break;
287  } // end switch
288 }
289 
290 //---------------------
291 // class DataGenerator
292 //---------------------
293 
295  std::uint64_t vector_size,
296  std::uint64_t batch_size_input,
298 {
300  retval->init(polynomial_degree, vector_size, batch_size_input, data_type);
301  return retval;
302 }
303 
305  std::uint64_t vector_size,
306  std::uint64_t batch_size_input,
308  const std::string &dataset_filename)
309 {
311  retval->init(polynomial_degree, vector_size, batch_size_input, data_type, dataset_filename);
312  return retval;
313 }
314 
315 DataLoader::DataLoader() :
316  m_polynomial_degree(PolynomialDegree::None),
317  m_vector_size(0)
318 {
319 }
320 
321 void DataLoader::init(PolynomialDegree polynomial_degree,
322  std::uint64_t vector_size,
323  std::uint64_t batch_size_input,
325 {
326  // Load/generate and initialize the data for logistic regression
327 
328  assert(InputDim0 + OutputDim0 >= 4);
329 
330  // number of samples in each input parameter and output
331  std::size_t batch_sizes[InputDim0 + OutputDim0] = {
332  1, // W
333  1, // b
334  batch_size_input, // X
335  batch_size_input // result
336  };
337  m_polynomial_degree = polynomial_degree;
338  m_vector_size = vector_size;
339 
340  // compute buffer size in bytes for each vector
341  std::uint64_t vector_sample_sizes[InputDim0 + OutputDim0] = {
342  vector_size, // W
343  1, // b
344  vector_size, // X
345  1 // result
346  };
347 
348  // allocate memory for each vector buffer
349  DataLoaderCompute::init(data_type,
350  InputDim0, // number of input components
351  batch_sizes, // number of samples per input component
352  vector_sample_sizes, // number of elements in each vector of the input
353  OutputDim0, // number of output components
354  vector_sample_sizes + InputDim0, // number of elements in each vector of the output
355  true); // allocate memory for results?
356 
357  // at this point all NativeDataBuffers have been allocated and pointed to the correct locations
358 
359  // fill up each vector data
360 
361  // input
362  for (std::size_t vector_i = 0; vector_i < InputDim0; ++vector_i)
363  {
364  for (std::uint64_t i = 0; i < batch_sizes[vector_i]; ++i)
365  {
366  // generate the data
368  getParameterData(vector_i).p_buffers[i].p,
369  getParameterData(vector_i).p_buffers[i].size / PartialDataLoader::sizeOf(data_type),
370  0.0, 1.0);
371  } // end for
372  } // end for
373 
374  // output
375  //#pragma omp parallel for
376  for (std::uint64_t input_i = 0; input_i < batch_sizes[2]; ++input_i)
377  {
378  // find the index for the result buffer based on the input indices
379  std::uint64_t ppi[] = { 0, 0, input_i };
380  std::uint64_t r_i = getResultIndex(ppi);
381 
382  // generate the data
383  DataGeneratorHelper::logisticRegressionInference(data_type, polynomial_degree,
384  getResultData(0).p_buffers[r_i].p, // result
385  getParameterData(Index_W).p_buffers[0].p, // W
386  getParameterData(Index_b).p_buffers[0].p, // b
387  getParameterData(Index_X).p_buffers[input_i].p, // X
388  vector_size);
389  } // end for
390 
391  // all data has been generated at this point
392 }
393 
394 void DataLoader::init(PolynomialDegree polynomial_degree,
395  std::uint64_t expected_vector_size,
396  std::uint64_t max_batch_size_input,
398  const std::string &dataset_filename)
399 {
400  // Load/generate and initialize the data for logistic regression
401 
402  assert(InputDim0 + OutputDim0 >= 4);
403 
404  // number of samples in each input parameter and output
405  std::size_t batch_sizes[InputDim0 + OutputDim0] = {
406  1, // W
407  1, // b
408  max_batch_size_input, // X
409  max_batch_size_input // result
410  };
411  m_polynomial_degree = polynomial_degree;
412  m_vector_size = expected_vector_size;
413 
414  // compute buffer size in bytes for each vector
415  std::uint64_t vector_sample_sizes[InputDim0 + OutputDim0] = {
416  expected_vector_size, // W
417  1, // b
418  expected_vector_size, // X
419  1 // result
420  };
421 
422  // allocate memory for each vector buffer
423  DataLoaderCompute::init(dataset_filename, data_type,
424  InputDim0, // number of input components
425  batch_sizes, // number of samples per input component
426  vector_sample_sizes, // number of elements in each vector of the input
427  OutputDim0, // number of output components
428  vector_sample_sizes + InputDim0); // number of elements in each vector of the output
429 
430  // at this point all NativeDataBuffers have been allocated, pointed to the correct locations
431  // and buffers loaded with data from dataset_filename
432 }
433 
434 void DataLoader::computeResult(std::vector<hebench::APIBridge::NativeDataBuffer *> &result,
435  const std::uint64_t *param_data_pack_indices,
437 {
438  // as protected method, parameters should be valid when called
439 
440  assert(param_data_pack_indices[Index_W] == 0 && param_data_pack_indices[Index_b] == 0);
441 
442  // generate the output
443  DataGeneratorHelper::logisticRegressionInference(data_type, m_polynomial_degree,
444  result.front()->p, // result
445  this->getParameterData(Index_W).p_buffers[0].p, // W
446  this->getParameterData(Index_b).p_buffers[0].p, // b
447  this->getParameterData(Index_X).p_buffers[param_data_pack_indices[Index_X]].p, // X
448  m_vector_size);
449 }
450 } // namespace LogisticRegression
451 } // namespace TestHarness
452 } // namespace hebench
const hebench::APIBridge::BenchmarkDescriptor & descriptor
Benchmark backend descriptor, as retrieved by backend, corresponding to the registration handle h_des...
std::vector< hebench::APIBridge::WorkloadParam > w_params
Set of arguments for workload parameters.
std::uint64_t default_min_test_time_ms
Default minimum test time in milliseconds.
Static helper class to generate vector data for all supported data types.
static void generateRandomVectorN(hebench::APIBridge::DataType data_type, void *result, std::uint64_t elem_count, double mean, double stddev)
Generates normally distributed random data of the specified type.
static std::size_t sizeOf(hebench::APIBridge::DataType data_type)
static hebench::APIBridge::WorkloadParamType::WorkloadParamType WorkloadParameterType[WorkloadParameterCount]
bool matchBenchmarkDescriptor(const hebench::APIBridge::BenchmarkDescriptor &bench_desc, const std::vector< hebench::APIBridge::WorkloadParam > &w_params) const override
Determines if the represented benchmark can perform the workload described by a specified HEBench ben...
void completeWorkloadDescription(WorkloadDescriptionOutput &output, const Engine &engine, const BenchmarkDescription::Backend &backend_desc, const BenchmarkDescription::Configuration &config) const override
Completes the description for the matched benchmark.
static std::uint64_t fetchVectorSize(const std::vector< hebench::APIBridge::WorkloadParam > &w_params)
Static helper class to generate data for all supported data types.
static void logisticRegressionInference(hebench::APIBridge::DataType data_type, DataLoader::PolynomialDegree poly_deg, void *result, const void *w, const void *b, const void *input, std::uint64_t feature_count)
void computeResult(std::vector< hebench::APIBridge::NativeDataBuffer * > &result, const std::uint64_t *param_data_pack_indices, hebench::APIBridge::DataType data_type) override
Computes result of the operation on the input data given the of the input sample.
static DataLoader::Ptr create(PolynomialDegree polynomial_degree, std::uint64_t vector_size, std::uint64_t batch_size_input, hebench::APIBridge::DataType data_type)
static bool getForceConfigValues()
Specifies whether frontend will override backend descriptors using configuration data or not.
std::size_t operation_params_count
Number of parameters for the represented workload operation.
std::string workload_name
Human-readable friendly name for the represented workload to be used for its description on the repor...
hebench::APIBridge::BenchmarkDescriptor concrete_descriptor
Benchmark descriptor completed with concrete values assigned to configurable fields.
std::string workload_base_name
Human-readable friendly name for the represented workload to be used for its description on the repor...
Bundles values that need to be filled by a workload during completeWorkloadDescription().
const hebench::APIBridge::DataPack & getResultData(std::uint64_t param_position) const override
Data pack corresponding to the specified component of the result.
const hebench::APIBridge::DataPack & getParameterData(std::uint64_t param_position) const override
Data pack for specified operation parameter (operand).
void init(hebench::APIBridge::DataType data_type, std::size_t input_dim, const std::size_t *input_sample_count_per_dim, const std::uint64_t *input_count_per_dim, std::size_t output_dim, const std::uint64_t *output_count_per_dim, bool allocate_output)
Initializes dimensions of inputs and outputs. No allocation is performed.
std::uint64_t getResultIndex(const std::uint64_t *param_data_pack_indices) const override
Computes the index of the result NativeDataBuffer given the indices of the input data.
WorkloadParamType
Defines the possible data types for a workload flexible parameter.
Definition: types.h:303
@ Float64
64 bits IEEE 754 standard floating point real numbers.
Definition: types.h:306
@ UInt64
64 bits unsigned integers.
Definition: types.h:305
DataType
Defines data types for a workload.
Definition: types.h:379
@ Float32
32 bits IEEE 754 standard floating point real numbers.
Definition: types.h:382
std::uint64_t min_test_time_ms
Specifies the minimum time, in milliseconds, to run the test.
Definition: types.h:447
CategoryParams cat_params
Parameters for the category.
Definition: types.h:532
Workload workload
Workload for the benchmark.
Definition: types.h:529
@ LogisticRegression_PolyD3
Definition: types.h:203
@ LogisticRegression_PolyD7
Definition: types.h:247
@ LogisticRegression_PolyD5
Definition: types.h:225
Defines a benchmark test.
Definition: types.h:527