HEBench
hebench_matmult.cpp
Go to the documentation of this file.
1 
2 // Copyright (C) 2021 Intel Corporation
3 // SPDX-License-Identifier: Apache-2.0
4 
5 #include <cassert>
6 #include <sstream>
7 #include <stdexcept>
8 
9 #include "../include/hebench_matmult.h"
10 
11 namespace hebench {
12 namespace TestHarness {
13 namespace MatrixMultiply {
14 
15 //------------------------------------
16 // class BenchmarkDescriptionCategory
17 //------------------------------------
18 
24  };
25 
26 std::array<std::pair<std::uint64_t, std::uint64_t>, BenchmarkDescriptorCategory::OpParameterCount>
27 BenchmarkDescriptorCategory::fetchMatrixSizes(const std::vector<hebench::APIBridge::WorkloadParam> &w_params)
28 {
29  assert(WorkloadParameterCount == 3);
30  assert(OpParameterCount == 2);
31  assert(OpResultCount == 1);
32 
33  std::array<std::pair<std::uint64_t, std::uint64_t>, OpParameterCount> retval;
34 
35  if (w_params.size() < WorkloadParameterCount)
36  {
37  std::stringstream ss;
38  ss << "Insufficient workload parameters in 'w_params'. Expected " << WorkloadParameterCount
39  << ", but " << w_params.size() << "received.";
40  throw std::invalid_argument(IL_LOG_MSG_CLASS(ss.str()));
41  } // end if
42 
43  // validate workload parameters
44  for (std::size_t i = 0; i < WorkloadParameterCount; ++i)
45  if (w_params[i].data_type != WorkloadParameterType[i])
46  {
47  std::stringstream ss;
48  ss << "Invalid type for workload parameter " << i
49  << ". Expected type ID " << WorkloadParameterType[i] << ", but " << w_params[i].data_type << " received.";
50  throw std::invalid_argument(IL_LOG_MSG_CLASS(ss.str()));
51  } // end if
52  else if (w_params[i].u_param <= 0)
53  {
54  std::stringstream ss;
55  ss << "Invalid matrix size in workload parameter " << i
56  << ". Expected positive integer, but " << w_params[i].u_param << " received.";
57  throw std::invalid_argument(IL_LOG_MSG_CLASS(ss.str()));
58  } // end if
59 
60  retval.at(0) = std::make_pair(w_params.at(0).u_param, w_params.at(1).u_param);
61  retval.at(1) = std::make_pair(w_params.at(1).u_param, w_params.at(2).u_param);
62 
63  return retval;
64 }
65 
67  const Engine &engine,
68  const BenchmarkDescription::Backend &backend_desc,
69  const BenchmarkDescription::Configuration &config) const
70 {
71  (void)engine;
72  std::stringstream ss;
73 
74  output.concrete_descriptor = backend_desc.descriptor;
76  backend_desc.descriptor,
77  config,
79 
80  // workload name
81 
82  auto mat_dims = fetchMatrixSizes(config.w_params);
83  ss << BaseWorkloadName << " ("
84  << mat_dims[0].first << "x" << mat_dims[0].second << ") x ("
85  << mat_dims[1].first << "x" << mat_dims[1].second << ")";
86 
87  output.workload_name = ss.str();
90 }
91 
93  const std::vector<hebench::APIBridge::WorkloadParam> &w_params) const
94 {
95  bool retval = false;
96 
97  // return true if benchmark is supported
99  {
100  try
101  {
102  fetchMatrixSizes(w_params);
103  retval = true;
104  }
105  catch (...)
106  {
107  // workload not supported
108  retval = false;
109  }
110  } // end if
111 
112  return retval;
113 }
114 
115 //---------------------------
116 // class DataGeneratorHelper
117 //---------------------------
118 
123 {
124 private:
125  IL_DECLARE_CLASS_NAME(MatrixMultiply::DataGeneratorHelper)
126 
127 public:
129  void *mat_result, std::uint64_t rows, std::uint64_t cols,
130  double mean, double stddev);
131  static void matMul(hebench::APIBridge::DataType data_type,
132  void *mat_result, const void *mat_a, const void *mat_b,
133  std::uint64_t rows_a, std::uint64_t cols_a, std::uint64_t cols_b);
134 
135 protected:
137 
138 private:
139  template <class T>
140  static void matMul(T *mat_result, const T *mat_a, const T *mat_b,
141  std::uint64_t rows_a, std::uint64_t cols_a, std::uint64_t cols_b)
142  {
143  if (!mat_result)
144  throw std::invalid_argument(IL_LOG_MSG_CLASS("Invalid null `mat_result`"));
145  if (!mat_a)
146  throw std::invalid_argument(IL_LOG_MSG_CLASS("Invalid null `mat_a`"));
147  if (!mat_b)
148  throw std::invalid_argument(IL_LOG_MSG_CLASS("Invalid null `mat_b`"));
149  // perform matrix multiplication (straight-forward way,
150  // maybe optimize later)
151  for (std::uint64_t row_a = 0; row_a < rows_a; ++row_a)
152  for (std::uint64_t col_b = 0; col_b < cols_b; ++col_b)
153  {
154  mat_result[row_a * cols_b + col_b] = 0;
155  for (std::uint64_t col_a = 0; col_a < cols_a; ++col_a)
156  {
157  std::uint64_t row_b = col_a;
158  mat_result[row_a * cols_b + col_b] += mat_a[row_a * cols_a + col_a] * mat_b[row_b * cols_b + col_b];
159  } // end for
160  } // end for
161  }
162 };
163 
165  void *mat_result, std::uint64_t rows, std::uint64_t cols,
166  double mean, double stddev)
167 {
169  mat_result, rows * cols,
170  mean, stddev);
171 }
172 
174  void *mat_result,
175  const void *mat_a, const void *mat_b,
176  uint64_t rows_a, uint64_t cols_a, uint64_t cols_b)
177 {
178  switch (data_type)
179  {
181  matMul<std::int32_t>(reinterpret_cast<std::int32_t *>(mat_result),
182  reinterpret_cast<const std::int32_t *>(mat_a), reinterpret_cast<const std::int32_t *>(mat_b),
183  rows_a, cols_a, cols_b);
184  break;
185 
187  matMul<std::int64_t>(reinterpret_cast<std::int64_t *>(mat_result),
188  reinterpret_cast<const std::int64_t *>(mat_a), reinterpret_cast<const std::int64_t *>(mat_b),
189  rows_a, cols_a, cols_b);
190  break;
191 
193  matMul<float>(reinterpret_cast<float *>(mat_result),
194  reinterpret_cast<const float *>(mat_a), reinterpret_cast<const float *>(mat_b),
195  rows_a, cols_a, cols_b);
196  break;
197 
199  matMul<double>(reinterpret_cast<double *>(mat_result),
200  reinterpret_cast<const double *>(mat_a), reinterpret_cast<const double *>(mat_b),
201  rows_a, cols_a, cols_b);
202  break;
203 
204  default:
205  throw std::invalid_argument(IL_LOG_MSG_CLASS("Unknown data type."));
206  break;
207  } // end switch
208 }
209 
210 //------------------
211 // class DataLoader
212 //------------------
213 
214 DataLoader::Ptr DataLoader::create(std::uint64_t rows_a, std::uint64_t cols_a, std::uint64_t cols_b,
215  std::uint64_t batch_size_mat_a,
216  std::uint64_t batch_size_mat_b,
218 {
220  retval->init(rows_a, cols_a, cols_b, batch_size_mat_a, batch_size_mat_b, data_type);
221  return retval;
222 }
223 
224 DataLoader::Ptr DataLoader::create(std::uint64_t rows_a, std::uint64_t cols_a, std::uint64_t cols_b,
225  std::uint64_t expected_sample_size_mat_a,
226  std::uint64_t expected_sample_size_mat_b,
228  const std::string &dataset_filename)
229 {
231  retval->init(rows_a, cols_a, cols_b,
232  expected_sample_size_mat_a, expected_sample_size_mat_b,
233  data_type,
234  dataset_filename);
235  return retval;
236 }
237 
238 DataLoader::DataLoader() :
239  m_rows_a(0), m_cols_a(0), m_cols_b(0)
240 {
241 }
242 
243 void DataLoader::init(std::uint64_t rows_a, std::uint64_t cols_a, std::uint64_t cols_b,
244  std::uint64_t batch_size_mat_a,
245  std::uint64_t batch_size_mat_b,
247 {
248  // Load/generate and initialize the data for matrix multiplication:
249  // M2 = M0 * M1
250 
251  // number of samples in each input parameter and output
252  std::size_t batch_sizes[InputDim0 + OutputDim0] = {
253  batch_size_mat_a,
254  batch_size_mat_b,
255  batch_size_mat_a * batch_size_mat_b
256  };
257 
258  // store the dimensions of each matrix
259  std::pair<std::uint64_t, std::uint64_t> mat_dims[InputDim0 + OutputDim0]; // rows <=> first, cols <=> second
260  mat_dims[0] = std::make_pair(rows_a, cols_a);
261  mat_dims[1] = std::make_pair(cols_a, cols_b);
262  mat_dims[2] = std::make_pair(rows_a, cols_b);
263 
264  m_rows_a = rows_a;
265  m_cols_a = cols_a;
266  m_cols_b = cols_b;
267 
268  // compute number of elements in vector to hold each matrix data
269  // matrices are kept in a single vector in row major order
270  std::uint64_t sample_vector_sizes[InputDim0 + OutputDim0];
271  for (std::size_t i = 0; i < InputDim0 + OutputDim0; ++i)
272  {
273  sample_vector_sizes[i] = mat_dims[i].first * mat_dims[i].second;
274  } // end for
275 
276  // initialize data packs and allocate memory
277  PartialDataLoader::init(data_type,
278  InputDim0, batch_sizes, sample_vector_sizes,
279  OutputDim0, sample_vector_sizes + InputDim0,
280  true);
281 
282  // at this point all NativeDataBuffers have been allocated and pointed to the correct locations
283 
284  // fill up the matrices data
285 
286  // input
287  for (std::size_t mat_i = 0; mat_i < InputDim0; ++mat_i)
288  {
289  for (std::uint64_t i = 0; i < batch_sizes[mat_i]; ++i)
290  {
291  // generate the data
293  getParameterData(mat_i).p_buffers[i].p,
294  mat_dims[mat_i].first, // rows
295  mat_dims[mat_i].second, // columns
296  0.0, 10.0);
297  } // end for
298  } // end for
299 
300  // output
301  //#pragma omp parallel for collapse(2)
302  for (std::uint64_t m0_i = 0; m0_i < batch_sizes[0]; ++m0_i)
303  {
304  for (std::uint64_t m1_i = 0; m1_i < batch_sizes[1]; ++m1_i)
305  {
306  // find the index for the result buffer based on the input indices
307  std::uint64_t ppi[] = { m0_i, m1_i };
308  std::uint64_t r_i = getResultIndex(ppi);
309 
310  // generate the data
311  DataGeneratorHelper::matMul(data_type,
312  getResultData(0).p_buffers[r_i].p,
313  getParameterData(0).p_buffers[m0_i].p,
314  getParameterData(1).p_buffers[m1_i].p,
315  mat_dims[0].first, mat_dims[0].second, // dims for m0
316  mat_dims[1].second); // dims for m1
317  } // end for
318  } // end for
319 
320  // all data has been generated at this point
321 }
322 
323 void DataLoader::init(std::uint64_t rows_a, std::uint64_t cols_a, std::uint64_t cols_b,
324  std::uint64_t max_sample_size_mat_a,
325  std::uint64_t max_sample_size_mat_b,
327  const std::string &dataset_filename)
328 {
329  // Load/generate and initialize the data for matrix multiplication:
330  // M2 = M0 * M1
331 
332  // number of samples in each input parameter and output
333  std::size_t max_sample_sizes[InputDim0 + OutputDim0] = {
334  max_sample_size_mat_a,
335  max_sample_size_mat_b,
336  max_sample_size_mat_a * max_sample_size_mat_b
337  };
338 
339  // store the dimensions of each matrix
340  std::pair<std::uint64_t, std::uint64_t> mat_dims[InputDim0 + OutputDim0]; // rows <=> first, cols <=> second
341  mat_dims[0] = std::make_pair(rows_a, cols_a);
342  mat_dims[1] = std::make_pair(cols_a, cols_b);
343  mat_dims[2] = std::make_pair(rows_a, cols_b);
344 
345  m_rows_a = rows_a;
346  m_cols_a = cols_a;
347  m_cols_b = cols_b;
348 
349  // compute number of elements in vector to hold each matrix data
350  // matrices are kept in a single vector in row major order
351  std::uint64_t sample_vector_sizes[InputDim0 + OutputDim0];
352  for (std::size_t i = 0; i < InputDim0 + OutputDim0; ++i)
353  {
354  sample_vector_sizes[i] = mat_dims[i].first * mat_dims[i].second;
355  } // end for
356 
357  PartialDataLoader::init(dataset_filename, data_type,
358  InputDim0, max_sample_sizes, sample_vector_sizes,
359  OutputDim0, sample_vector_sizes + InputDim0);
360 
361  // at this point all NativeDataBuffers have been allocated, pointed to the correct locations
362  // and buffers loaded with data from dataset_filename
363 }
364 
365 void DataLoader::computeResult(std::vector<hebench::APIBridge::NativeDataBuffer *> &result,
366  const std::uint64_t *param_data_pack_indices,
368 {
369  // as protected method, parameters should be valid when called
370 
371  // generate the output
372  DataGeneratorHelper::matMul(data_type,
373  result.front()->p,
374  this->getParameterData(0).p_buffers[param_data_pack_indices[0]].p,
375  this->getParameterData(1).p_buffers[param_data_pack_indices[1]].p,
376  m_rows_a, m_cols_a, // dims for m0
377  m_cols_b); // dims for m1
378 }
379 
380 } // namespace MatrixMultiply
381 } // namespace TestHarness
382 } // namespace hebench
const hebench::APIBridge::BenchmarkDescriptor & descriptor
Benchmark backend descriptor, as retrieved by backend, corresponding to the registration handle h_des...
std::vector< hebench::APIBridge::WorkloadParam > w_params
Set of arguments for workload parameters.
static void generateRandomVectorN(hebench::APIBridge::DataType data_type, void *result, std::uint64_t elem_count, double mean, double stddev)
Generates normally distributed random data of the specified type.
static std::array< std::pair< std::uint64_t, std::uint64_t >, OpParameterCount > fetchMatrixSizes(const std::vector< hebench::APIBridge::WorkloadParam > &w_params)
fetchMatrixSizes
bool matchBenchmarkDescriptor(const hebench::APIBridge::BenchmarkDescriptor &bench_desc, const std::vector< hebench::APIBridge::WorkloadParam > &w_params) const override
Determines if the represented benchmark can perform the workload described by a specified HEBench ben...
void completeWorkloadDescription(WorkloadDescriptionOutput &output, const Engine &engine, const BenchmarkDescription::Backend &backend_desc, const BenchmarkDescription::Configuration &config) const override
Completes the description for the matched benchmark.
static hebench::APIBridge::WorkloadParamType::WorkloadParamType WorkloadParameterType[WorkloadParameterCount]
Static helper class to generate matrix data for all supported data types.
static void generateRandomMatrixN(hebench::APIBridge::DataType data_type, void *mat_result, std::uint64_t rows, std::uint64_t cols, double mean, double stddev)
static void matMul(hebench::APIBridge::DataType data_type, void *mat_result, const void *mat_a, const void *mat_b, std::uint64_t rows_a, std::uint64_t cols_a, std::uint64_t cols_b)
static DataLoader::Ptr create(std::uint64_t rows_a, std::uint64_t cols_a, std::uint64_t cols_b, std::uint64_t batch_size_mat_a, std::uint64_t batch_size_mat_b, hebench::APIBridge::DataType data_type)
void computeResult(std::vector< hebench::APIBridge::NativeDataBuffer * > &result, const std::uint64_t *param_data_pack_indices, hebench::APIBridge::DataType data_type) override
Computes result of the operation on the input data given the of the input sample.
static void completeCategoryParams(hebench::APIBridge::BenchmarkDescriptor &out_descriptor, const hebench::APIBridge::BenchmarkDescriptor &in_descriptor, const BenchmarkDescription::Configuration &config, bool force_config)
Completes common elements of category parameters in a descriptor using the specified configuration.
static bool getForceConfigValues()
Specifies whether frontend will override backend descriptors using configuration data or not.
std::size_t operation_params_count
Number of parameters for the represented workload operation.
std::string workload_name
Human-readable friendly name for the represented workload to be used for its description on the repor...
hebench::APIBridge::BenchmarkDescriptor concrete_descriptor
Benchmark descriptor completed with concrete values assigned to configurable fields.
std::string workload_base_name
Human-readable friendly name for the represented workload to be used for its description on the repor...
Bundles values that need to be filled by a workload during completeWorkloadDescription().
const hebench::APIBridge::DataPack & getResultData(std::uint64_t param_position) const override
Data pack corresponding to the specified component of the result.
const hebench::APIBridge::DataPack & getParameterData(std::uint64_t param_position) const override
Data pack for specified operation parameter (operand).
void init(hebench::APIBridge::DataType data_type, std::size_t input_dim, const std::size_t *input_sample_count_per_dim, const std::uint64_t *input_count_per_dim, std::size_t output_dim, const std::uint64_t *output_count_per_dim, bool allocate_output)
Initializes dimensions of inputs and outputs. No allocation is performed.
std::uint64_t getResultIndex(const std::uint64_t *param_data_pack_indices) const override
Computes the index of the result NativeDataBuffer given the indices of the input data.
WorkloadParamType
Defines the possible data types for a workload flexible parameter.
Definition: types.h:303
@ Float64
64 bits IEEE 754 standard floating point real numbers.
Definition: types.h:306
@ Int64
64 bits signed integers.
Definition: types.h:304
@ UInt64
64 bits unsigned integers.
Definition: types.h:305
DataType
Defines data types for a workload.
Definition: types.h:379
@ Float32
32 bits IEEE 754 standard floating point real numbers.
Definition: types.h:382
@ Int32
32 bits signed integers.
Definition: types.h:380
Workload workload
Workload for the benchmark.
Definition: types.h:529
Defines a benchmark test.
Definition: types.h:527