14 #include "../include/hebench_idata_loader.h"
17 namespace TestHarness {
25 std::size_t retval = 0;
30 retval =
sizeof(std::int32_t);
34 retval =
sizeof(std::int64_t);
38 retval =
sizeof(float);
42 retval =
sizeof(double);
46 throw std::invalid_argument(IL_LOG_MSG_CLASS(
"Unknown data type."));
73 retval->pack_count = data_pack_count;
74 retval->p_data_packs = p_data_packs;
79 delete[] p_data_packs;
106 retval->buffer_count = buffer_count;
107 retval->param_position = param_position;
108 retval->p_buffers = p_buffers;
124 std::uint8_t *p_buffer =
nullptr;
128 p_buffer =
new std::uint8_t[size];
136 std::uint8_t *p_tmp = reinterpret_cast<std::uint8_t *>(p->p);
145 retval->p = p_buffer;
161 template <
typename T>
168 std::size_t input_dim,
169 const std::size_t *input_sample_count_per_dim,
170 const std::uint64_t *input_count_per_dim,
171 std::size_t output_dim,
172 const std::uint64_t *output_count_per_dim,
173 bool allocate_output);
175 const std::string &filename,
176 std::size_t expected_input_dim,
177 const std::size_t *max_input_sample_count_per_dim,
178 const std::uint64_t *expected_input_count_per_dim,
179 std::size_t expected_output_dim,
180 const std::uint64_t *expected_output_count_per_dim);
183 template <
typename T>
185 std::size_t input_dim,
186 const std::size_t *input_sample_count_per_dim,
187 const std::uint64_t *input_count_per_dim,
188 std::size_t output_dim,
189 const std::uint64_t *output_count_per_dim,
190 bool allocate_output)
193 throw std::invalid_argument(IL_LOG_MSG_CLASS(
"Invalid input dimensions: 'input_dim' must be positive."));
195 throw std::invalid_argument(IL_LOG_MSG_CLASS(
"Invalid output dimensions: 'output_dim' must be positive."));
197 data_loader.m_input_data.resize(input_dim);
198 data_loader.m_output_data.resize(output_dim);
200 std::size_t output_sample_count_per_dim = 1;
201 for (std::size_t i = 0; i < data_loader.m_input_data.size(); ++i)
203 if (input_sample_count_per_dim[i] <= 0)
205 std::stringstream ss;
206 ss <<
"Invalid batch size for dimension " << i <<
": 'input_count_per_dim[" << i <<
"]' must be positive.";
207 throw std::invalid_argument(IL_LOG_MSG_CLASS(ss.str()));
210 output_sample_count_per_dim *= input_sample_count_per_dim[i];
211 data_loader.m_input_data[i] = data_loader.
createDataPack(input_sample_count_per_dim[i], i);
214 for (std::size_t i = 0; i < data_loader.m_output_data.size(); ++i)
216 data_loader.m_output_data[i] = data_loader.
createDataPack(output_sample_count_per_dim, i);
221 std::size_t single_size =
sizeof(T);
222 std::vector<std::uint64_t> input_buffer_sizes(input_dim);
223 std::vector<std::uint64_t> output_buffer_sizes(output_dim);
224 std::transform(input_count_per_dim, input_count_per_dim + input_dim, input_buffer_sizes.begin(),
225 [single_size](std::uint64_t n) -> std::uint64_t { return n * single_size; });
226 std::transform(output_count_per_dim, output_count_per_dim + output_dim, output_buffer_sizes.begin(),
227 [single_size](std::uint64_t n) -> std::uint64_t { return n * single_size; });
228 data_loader.allocate(input_buffer_sizes.data(), input_buffer_sizes.size(),
229 output_buffer_sizes.data(), output_buffer_sizes.size(),
233 template <
typename T>
235 const std::string &filename,
236 std::size_t expected_input_dim,
237 const std::size_t *max_input_sample_count_per_dim,
238 const std::uint64_t *expected_input_count_per_dim,
239 std::size_t expected_output_dim,
240 const std::uint64_t *expected_output_count_per_dim)
242 if (expected_input_dim <= 0)
243 throw std::invalid_argument(IL_LOG_MSG_CLASS(
"Invalid input dimensions: 'expected_input_dim' must be positive."));
244 if (expected_output_dim <= 0)
245 throw std::invalid_argument(IL_LOG_MSG_CLASS(
"Invalid output dimensions: 'expected_output_dim' must be positive."));
248 std::size_t max_output_sample_count = 1;
253 if (dataset.
inputs.size() != expected_input_dim)
255 std::stringstream ss;
256 ss <<
"Loaded input dimensions do not match the number of parameters for the operation. "
257 <<
"Expected " << expected_input_dim <<
", but " << dataset.
inputs.size() <<
"loaded.";
258 throw std::runtime_error(IL_LOG_MSG_CLASS(ss.str()));
260 for (std::size_t input_dim_i = 0; input_dim_i < dataset.
inputs.size(); ++input_dim_i)
262 std::vector<std::vector<T>> &input_component = dataset.
inputs[input_dim_i];
263 if (input_component.size() < max_input_sample_count_per_dim[input_dim_i])
265 std::stringstream ss;
266 ss <<
"Insufficient data loaded for operation input parameter " << input_dim_i <<
". "
267 <<
"Expected " << max_input_sample_count_per_dim[input_dim_i] <<
" samples, but "
268 << input_component.size() <<
" found.";
269 throw std::runtime_error(IL_LOG_MSG_CLASS(ss.str()));
271 if (input_component.size() > max_input_sample_count_per_dim[input_dim_i])
273 input_component.resize(max_input_sample_count_per_dim[input_dim_i]);
274 max_output_sample_count *= input_component.size();
276 for (std::size_t input_sample_i = 0; input_sample_i < input_component.size(); ++input_sample_i)
278 if (input_component[input_sample_i].size() != expected_input_count_per_dim[input_dim_i])
280 std::stringstream ss;
281 ss <<
"Incorrect vector size loaded for input dimension " << input_dim_i <<
", sample " << input_sample_i <<
". "
282 <<
"Expected vector with " << expected_input_count_per_dim[input_dim_i] <<
" elements, but "
283 << input_component[input_sample_i].size() <<
" received.";
284 throw std::runtime_error(IL_LOG_MSG_CLASS(ss.str()));
291 && dataset.
outputs.size() != expected_output_dim)
293 std::stringstream ss;
294 ss <<
"Loaded output dimensions do not match the dimensions of the result for the operation. "
295 <<
"Expected " << expected_output_dim <<
", but " << dataset.
outputs.size() <<
"loaded.";
296 throw std::runtime_error(IL_LOG_MSG_CLASS(ss.str()));
298 for (std::size_t output_dim_i = 0; output_dim_i < dataset.
outputs.size(); ++output_dim_i)
300 std::vector<std::vector<T>> &output_component = dataset.
outputs[output_dim_i];
301 if (output_component.size() < max_output_sample_count)
304 std::stringstream ss;
305 ss <<
"Insufficient ground-truth output samples loaded for output component " << output_dim_i <<
". "
306 <<
"Expected, at least, " << max_input_sample_count_per_dim <<
" samples, but "
307 << output_component.size() <<
" received.";
308 throw std::runtime_error(IL_LOG_MSG_CLASS(ss.str()));
310 if (output_component.size() > max_output_sample_count)
312 output_component.resize(max_output_sample_count);
313 for (std::size_t output_sample_i = 0; output_sample_i < output_component.size(); ++output_sample_i)
315 if (output_component[output_sample_i].size() != expected_output_count_per_dim[output_dim_i])
317 std::stringstream ss;
318 ss <<
"Incorrect vector size loaded for output dimension " << output_dim_i <<
", sample " << output_sample_i <<
". "
319 <<
"Expected vector with " << expected_output_count_per_dim[output_dim_i] <<
" elements, but "
320 << output_component[output_sample_i].size() <<
" received.";
321 throw std::runtime_error(IL_LOG_MSG_CLASS(ss.str()));
328 std::vector<std::size_t> input_sample_count_per_dim(dataset.
inputs.size());
329 for (std::size_t i = 0; i < dataset.
inputs.size(); ++i)
330 input_sample_count_per_dim[i] = dataset.
inputs[i].size();
332 dataset.
inputs.size(), input_sample_count_per_dim.data(), expected_input_count_per_dim,
333 expected_output_dim, expected_output_count_per_dim, !dataset.
outputs.empty());
340 for (std::size_t input_dim_i = 0; input_dim_i < dataset.
inputs.size(); ++input_dim_i)
343 const std::vector<std::vector<T>> &input_component = dataset.
inputs[input_dim_i];
347 for (std::size_t sample_i = 0; sample_i < data_pack.
buffer_count; ++sample_i)
350 const std::vector<T> &input_component_sample = input_component[sample_i];
352 assert(param_sample.
p
353 && param_sample.
size == input_component_sample.size() *
sizeof(T));
354 std::memcpy(param_sample.
p, input_component_sample.data(), param_sample.
size);
358 for (std::size_t output_dim_i = 0; output_dim_i < dataset.
outputs.size(); ++output_dim_i)
361 const std::vector<std::vector<T>> &output_component = dataset.
outputs[output_dim_i];
365 for (std::size_t sample_i = 0; sample_i < data_pack.
buffer_count; ++sample_i)
368 const std::vector<T> &output_component_sample = output_component[sample_i];
370 assert(result_component_sample.
p
371 && result_component_sample.
size == output_component_sample.size() *
sizeof(T));
372 std::memcpy(result_component_sample.
p, output_component_sample.data(), result_component_sample.
size);
383 m_b_is_output_allocated(false),
384 m_b_initialized(false)
389 std::size_t input_dim,
390 const std::size_t *input_sample_count_per_dim,
391 const std::uint64_t *input_count_per_dim,
392 std::size_t output_dim,
393 const std::uint64_t *output_count_per_dim,
394 bool allocate_output)
400 output_dim, output_count_per_dim,
406 output_dim, output_count_per_dim,
412 output_dim, output_count_per_dim,
418 output_dim, output_count_per_dim,
423 throw std::invalid_argument(IL_LOG_MSG_CLASS(
"Unknown 'data_type'."));
427 m_data_type = data_type;
428 m_b_initialized =
true;
433 std::size_t expected_input_dim,
434 const std::size_t *max_input_sample_count_per_dim,
435 const std::uint64_t *expected_input_count_per_dim,
436 std::size_t expected_output_dim,
437 const std::uint64_t *expected_output_count_per_dim)
443 expected_input_dim, max_input_sample_count_per_dim, expected_input_count_per_dim,
444 expected_output_dim, expected_output_count_per_dim);
449 expected_input_dim, max_input_sample_count_per_dim, expected_input_count_per_dim,
450 expected_output_dim, expected_output_count_per_dim);
455 expected_input_dim, max_input_sample_count_per_dim, expected_input_count_per_dim,
456 expected_output_dim, expected_output_count_per_dim);
461 expected_input_dim, max_input_sample_count_per_dim, expected_input_count_per_dim,
462 expected_output_dim, expected_output_count_per_dim);
466 throw std::invalid_argument(IL_LOG_MSG_CLASS(
"Unknown 'data_type'."));
470 m_data_type = data_type;
471 m_b_initialized =
true;
474 void PartialDataLoader::allocate(
const std::uint64_t *input_buffer_sizes,
475 std::size_t input_buffer_sizes_count,
476 const std::uint64_t *output_buffer_sizes,
477 std::size_t output_buffer_sizes_count,
478 bool allocate_output)
482 if (!input_buffer_sizes)
484 std::stringstream ss;
485 ss <<
"Invalid null `input_buffer_sizes`.";
486 throw std::invalid_argument(IL_LOG_MSG_CLASS(ss.str()));
488 if (!output_buffer_sizes)
490 std::stringstream ss;
491 ss <<
"Invalid null `output_buffer_sizes`.";
492 throw std::invalid_argument(IL_LOG_MSG_CLASS(ss.str()));
495 m_b_is_output_allocated = allocate_output;
498 std::vector<std::uint64_t> input_batch_sizes(m_input_data.size());
499 for (std::size_t i = 0; i < m_input_data.size(); ++i)
500 input_batch_sizes[i] = m_input_data[i]->buffer_count;
501 std::vector<std::uint64_t> output_batch_sizes(m_output_data.size());
502 for (std::size_t i = 0; i < m_output_data.size(); ++i)
503 output_batch_sizes[i] = m_output_data[i]->buffer_count;
505 if (input_buffer_sizes_count < input_batch_sizes.size())
507 std::stringstream ss;
508 ss <<
"Invalid number of input buffers `input_buffer_sizes_count`. Expected, at least "
509 << input_batch_sizes.size() <<
", but " << input_buffer_sizes_count <<
" received.";
510 throw std::invalid_argument(IL_LOG_MSG_CLASS(ss.str()));
512 if (output_buffer_sizes_count < output_batch_sizes.size())
514 std::stringstream ss;
515 ss <<
"Invalid number of output buffers `output_buffer_sizes_count`. Expected, at least "
516 << output_batch_sizes.size() <<
", but " << output_buffer_sizes_count <<
" received.";
517 throw std::invalid_argument(IL_LOG_MSG_CLASS(ss.str()));
521 std::uint64_t total_raw_size = 0;
522 std::uint64_t output_start;
523 for (std::size_t i = 0; i < input_batch_sizes.size(); ++i)
524 total_raw_size += input_buffer_sizes[i] * input_batch_sizes[i];
525 output_start = total_raw_size;
526 for (std::size_t i = 0; i < output_batch_sizes.size(); ++i)
527 total_raw_size += output_buffer_sizes[i] * (allocate_output ? output_batch_sizes[i] : 1);
530 m_raw_buffer.resize(total_raw_size, 0);
542 std::vector<std::uint8_t *> input_buffers(input_batch_sizes.size(), m_raw_buffer.data());
543 for (std::uint64_t i = 1; i < input_batch_sizes.size(); ++i)
544 input_buffers[i] = input_buffers[i - 1] + input_buffer_sizes[i - 1] * input_batch_sizes[i - 1];
545 if (!input_buffers.empty())
548 assert(input_buffers.front() == m_raw_buffer.data());
550 std::vector<std::uint8_t *> output_buffers;
553 output_buffers.resize(output_batch_sizes.size(), m_raw_buffer.data() + output_start);
554 for (std::uint64_t i = 1; i < output_batch_sizes.size(); ++i)
555 output_buffers[i] = output_buffers[i - 1] + output_buffer_sizes[i - 1] * output_batch_sizes[i - 1];
556 if (!output_buffers.empty())
559 assert(output_buffers.front() == m_raw_buffer.data() + output_start);
566 for (std::size_t param_i = 0; param_i < m_input_data.size(); ++param_i)
569 for (std::uint64_t i = 0; i < input_batch_sizes[param_i]; ++i)
572 m_input_data[param_i]->p_buffers[i].p = input_buffers[param_i] + i * input_buffer_sizes[param_i];
573 m_input_data[param_i]->p_buffers[i].size = input_buffer_sizes[param_i];
574 m_input_data[param_i]->p_buffers[i].tag = 0;
578 for (std::size_t output_i = 0; output_i < m_output_data.size(); ++output_i)
581 for (std::uint64_t i = 0; i < output_batch_sizes[output_i]; ++i)
584 m_output_data[output_i]->p_buffers[i].p = allocate_output ?
585 output_buffers[output_i] + i * output_buffer_sizes[output_i] :
587 m_output_data[output_i]->p_buffers[i].size = output_buffer_sizes[output_i];
588 m_output_data[output_i]->p_buffers[i].tag = 0;
597 if (!m_b_initialized)
598 throw std::logic_error(IL_LOG_MSG_CLASS(
"Not initialized."));
600 std::vector<std::shared_ptr<hebench::APIBridge::DataPack>> retval(m_output_data.size());
602 for (std::size_t result_component_i = 0; result_component_i < m_output_data.size(); ++result_component_i)
604 if (!m_output_data[result_component_i]
605 || !m_output_data[result_component_i]->p_buffers)
606 throw std::logic_error(IL_LOG_MSG_CLASS(
"Description for output component " +
std::to_string(result_component_i) +
" is not initialized."));
607 if (result_index >= m_output_data[result_component_i]->buffer_count)
609 std::stringstream ss;
610 ss <<
"Out of range `result_index`."
611 <<
" Expected value less than " << m_output_data[result_component_i]->buffer_count <<
", but "
612 << result_index <<
" received.";
613 throw std::out_of_range(IL_LOG_MSG_CLASS(ss.str()));
615 retval[result_component_i] =
622 for (std::uint64_t buffer_i = 0; buffer_i < p->buffer_count; ++buffer_i)
624 hebench::APIBridge::NativeDataBuffer &buffer = p->p_buffers[buffer_i];
626 delete[] reinterpret_cast<std::int8_t *>(buffer.p);
633 retval[result_component_i]->param_position = result_component_i;
634 retval[result_component_i]->buffer_count = 1;
636 retval[result_component_i]->p_buffers[0] = m_output_data[result_component_i]->p_buffers[result_index];
637 retval[result_component_i]->p_buffers[0].p =
new std::int8_t[retval[result_component_i]->p_buffers[0].size];
645 if (!m_b_initialized)
646 throw std::logic_error(IL_LOG_MSG_CLASS(
"Not initialized."));
648 if (!m_input_data.at(param_position))
649 throw std::runtime_error(IL_LOG_MSG_CLASS(
"Invalid null element accessed at 'param_position'."));
651 return *m_input_data.at(param_position);
656 if (!m_b_initialized)
657 throw std::logic_error(IL_LOG_MSG_CLASS(
"Not initialized."));
659 if (!m_output_data.at(param_position))
660 throw std::runtime_error(IL_LOG_MSG_CLASS(
"Invalid null element accessed at 'param_position'."));
662 return *m_output_data.at(param_position);
667 if (!m_b_initialized)
668 throw std::logic_error(IL_LOG_MSG_CLASS(
"Not initialized."));
671 std::vector<const hebench::APIBridge::NativeDataBuffer *> &retval = p_retval->result;
672 std::uint64_t r_i = getResultIndex(param_data_pack_indices);
673 p_retval->sample_index = r_i;
675 retval.resize(getResultCount());
676 for (std::size_t result_component_i = 0; result_component_i < retval.size(); ++result_component_i)
683 std::stringstream ss;
684 ss <<
"Unexpected error! Result sample " << r_i <<
" for result component " << result_component_i <<
" not found.";
685 throw std::logic_error(IL_LOG_MSG_CLASS(ss.str()));
687 retval[result_component_i] = result_component.
p_buffers + r_i;
693 std::uint64_t PartialDataLoader::getResultIndex(
const std::uint64_t *param_data_pack_indices)
const
695 if (!m_b_initialized)
696 throw std::logic_error(IL_LOG_MSG_CLASS(
"Not initialized."));
698 if (!param_data_pack_indices)
699 throw std::invalid_argument(IL_LOG_MSG_CLASS(
"Invalid null argument 'param_data_pack_indices'."));
701 std::uint64_t retval = getParameterCount() > 0 ?
702 param_data_pack_indices[0] :
705 for (std::size_t param_i = 1; param_i < getParameterCount(); ++param_i)
707 assert(getParameterData(param_i).param_position == param_i);
708 if (param_data_pack_indices[param_i] >= getParameterData(param_i).buffer_count)
710 std::stringstream ss;
711 ss <<
"Index out of range: 'param_data_pack_indices['" << param_i <<
"] == " << param_data_pack_indices[param_i] <<
". "
712 <<
"Expected value less than " << getParameterData(param_i).buffer_count <<
".";
713 throw std::out_of_range(IL_LOG_MSG_CLASS(ss.str()));
715 retval = param_data_pack_indices[param_i] + getParameterData(param_i).buffer_count * retval;
static ExternalDataset< T > loadFromCSV(const std::string &filename, std::uint64_t max_loaded_size=0)
Loads a dataset from an external csv file that follows the defined structure.
static std::size_t sizeOf(hebench::APIBridge::DataType data_type)
static unique_ptr_custom_deleter< hebench::APIBridge::NativeDataBuffer > createDataBuffer(std::uint64_t size, std::int64_t tag)
static unique_ptr_custom_deleter< hebench::APIBridge::DataPackCollection > createDataPackCollection(std::uint64_t data_pack_count)
Creates shallow packed data that self cleans up.
std::shared_ptr< ResultData > ResultDataPtr
hebench::TestHarness::unique_ptr_custom_deleter< T > unique_ptr_custom_deleter
static unique_ptr_custom_deleter< hebench::APIBridge::DataPack > createDataPack(std::uint64_t buffer_count, std::uint64_t param_position)
Creates shallow data pack that self cleans up.
static void loadFromFile(PartialDataLoader &data_loader, const std::string &filename, std::size_t expected_input_dim, const std::size_t *max_input_sample_count_per_dim, const std::uint64_t *expected_input_count_per_dim, std::size_t expected_output_dim, const std::uint64_t *expected_output_count_per_dim)
static void init(PartialDataLoader &data_loader, std::size_t input_dim, const std::size_t *input_sample_count_per_dim, const std::uint64_t *input_count_per_dim, std::size_t output_dim, const std::uint64_t *output_count_per_dim, bool allocate_output)
Base class for data loaders and data generators.
std::vector< std::shared_ptr< hebench::APIBridge::DataPack > > getResultTempDataPacks() const
Retrieves a pre-allocated result providing memory space to store a single operation result sample.
std::uint64_t getResultCount() const override
Number of components in a result for the represented operation.
std::uint64_t getParameterCount() const override
Number of parameter components (operands) for the represented operation.
void init(hebench::APIBridge::DataType data_type, std::size_t input_dim, const std::size_t *input_sample_count_per_dim, const std::uint64_t *input_count_per_dim, std::size_t output_dim, const std::uint64_t *output_count_per_dim, bool allocate_output)
Initializes dimensions of inputs and outputs. No allocation is performed.
@ Float64
64 bits IEEE 754 standard floating point real numbers.
@ Int64
64 bits signed integers.
DataType
Defines data types for a workload.
@ Float32
32 bits IEEE 754 standard floating point real numbers.
@ Int32
32 bits signed integers.
DataPack * p_data_packs
Collection of data packs.
std::uint64_t size
Size of underlying data.
std::uint64_t param_position
The 0-based position of this parameter in the corresponding function call.
void * p
Pointer to underlying data.
std::uint64_t buffer_count
Number of data buffers in p_buffers.
NativeDataBuffer * p_buffers
Array of data buffers for parameter.
Defines a data package for an operation.
Defines a collection of data packs.
Structure to contain flexible data.
std::vector< std::vector< std::vector< T > > > inputs
Contains the samples for each input parameter as loaded from external source.
std::vector< std::vector< std::vector< T > > > outputs
Contains the samples for each result component as loaded from external source.
std::string to_string(const std::string_view &s)