Skip to content

data_utils

Convenience functions and classes for data generation scripts.

DataGen

Base data generator class.

Base data generator class which can be inherited to easily develop a custom data generator script for any kernel.

Source code in util/sim/data_utils.py
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
class DataGen:
    """Base data generator class.

    Base data generator class which can be inherited to easily develop a
    custom data generator script for any kernel.
    """

    def parser(self):
        """Default argument parser for data generation scripts.

        It is an instance of the `ArgumentParser` class from the `argparse`
        module. Subclasses can extend this and add custom arguments via
        the parser's `add_argument()` method.
        """
        parser = argparse.ArgumentParser(description='Generate data for kernels')
        parser.add_argument(
            "-c", "--cfg",
            type=pathlib.Path,
            required=True,
            help='Select param config file kernel'
        )
        parser.add_argument(
            '--section',
            type=str,
            help='Section to store matrices in')
        parser.add_argument(
            'output',
            type=pathlib.Path,
            help='Path of the output header file')
        return parser

    def parse_args(self):
        """Parse default data generation script arguments.

        Returns the arguments passed to the data generation script, parsed
        using the `parser()` method.
        """
        return self.parser().parse_args()

    def emit_header(self, **kwargs):
        """Emits a C header containing generated data.

        The base implementation emits a string which only contains a
        license header. Subclasses should extend this method and append
        the generated data to the license header.

        Returns:
            A string with the generated C header contents.
        """
        return emit_license()

    def main(self):
        """Default main function for data generation scripts."""
        args = self.parse_args()

        # Load param config file
        with args.cfg.open() as f:
            param = json5.loads(f.read())
        param['section'] = args.section

        # Emit header file
        with open(args.output, 'w') as f:
            f.write(self.emit_header(**param))

emit_header(**kwargs)

Emits a C header containing generated data.

The base implementation emits a string which only contains a license header. Subclasses should extend this method and append the generated data to the license header.

Returns:

Type Description

A string with the generated C header contents.

Source code in util/sim/data_utils.py
334
335
336
337
338
339
340
341
342
343
344
def emit_header(self, **kwargs):
    """Emits a C header containing generated data.

    The base implementation emits a string which only contains a
    license header. Subclasses should extend this method and append
    the generated data to the license header.

    Returns:
        A string with the generated C header contents.
    """
    return emit_license()

main()

Default main function for data generation scripts.

Source code in util/sim/data_utils.py
346
347
348
349
350
351
352
353
354
355
356
357
def main(self):
    """Default main function for data generation scripts."""
    args = self.parse_args()

    # Load param config file
    with args.cfg.open() as f:
        param = json5.loads(f.read())
    param['section'] = args.section

    # Emit header file
    with open(args.output, 'w') as f:
        f.write(self.emit_header(**param))

parse_args()

Parse default data generation script arguments.

Returns the arguments passed to the data generation script, parsed using the parser() method.

Source code in util/sim/data_utils.py
326
327
328
329
330
331
332
def parse_args(self):
    """Parse default data generation script arguments.

    Returns the arguments passed to the data generation script, parsed
    using the `parser()` method.
    """
    return self.parser().parse_args()

parser()

Default argument parser for data generation scripts.

It is an instance of the ArgumentParser class from the argparse module. Subclasses can extend this and add custom arguments via the parser's add_argument() method.

Source code in util/sim/data_utils.py
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
def parser(self):
    """Default argument parser for data generation scripts.

    It is an instance of the `ArgumentParser` class from the `argparse`
    module. Subclasses can extend this and add custom arguments via
    the parser's `add_argument()` method.
    """
    parser = argparse.ArgumentParser(description='Generate data for kernels')
    parser.add_argument(
        "-c", "--cfg",
        type=pathlib.Path,
        required=True,
        help='Select param config file kernel'
    )
    parser.add_argument(
        '--section',
        type=str,
        help='Section to store matrices in')
    parser.add_argument(
        'output',
        type=pathlib.Path,
        help='Path of the output header file')
    return parser

ctype_from_precision_t(prec)

Convert precision_t type to a C type string.

Parameters:

Name Type Description Default
prec

A value of type precision_t. Accepts both enum strings (e.g. "FP64") and integer enumeration values (e.g. 8).

required
Source code in util/sim/data_utils.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def ctype_from_precision_t(prec):
    """Convert `precision_t` type to a C type string.

    Args:
        prec: A value of type `precision_t`. Accepts both enum strings
            (e.g. "FP64") and integer enumeration values (e.g. 8).
    """
    precision_t_to_ctype_map = {
        8: 'double',
        4: 'float',
        2: '__fp16',
        1: '__fp8'
    }
    return precision_t_to_ctype_map[_integer_precision_t(prec)]

emit_license()

Emit license header.

Returns:

Type Description

A header string.

Source code in util/sim/data_utils.py
23
24
25
26
27
28
29
30
31
32
33
def emit_license():
    """Emit license header.

    Returns:
        A header string.
    """

    s = (f"// Copyright {datetime.now().year} ETH Zurich and University of Bologna.\n"
         f"// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n"
         f"// SPDX-License-Identifier: Apache-2.0\n")
    return s

ff_desc_from_precision_t(prec)

Convert precision_t type to a FlexFloat descriptor.

Parameters:

Name Type Description Default
prec

A value of type precision_t. Accepts both enum strings (e.g. "FP64") and integer enumeration values (e.g. 8).

required
Source code in util/sim/data_utils.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def ff_desc_from_precision_t(prec):
    """Convert `precision_t` type to a FlexFloat descriptor.

    Args:
        prec: A value of type `precision_t`. Accepts both enum strings
            (e.g. "FP64") and integer enumeration values (e.g. 8).
    """
    precision_t_to_ff_desc_map = {
        8: 'fp64',
        4: 'fp32',
        2: 'fp16',
        1: 'e5m2'
    }
    return precision_t_to_ff_desc_map[_integer_precision_t(prec)]

flatten(array)

Flatten various array types with a homogeneous API.

Parameters:

Name Type Description Default
array

Can be a Numpy array, a PyTorch tensor or a nested list.

required
Source code in util/sim/data_utils.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def flatten(array):
    """Flatten various array types with a homogeneous API.

    Args:
        array: Can be a Numpy array, a PyTorch tensor or a nested list.
    """
    if isinstance(array, np.ndarray):
        return array.flatten()
    elif isinstance(array, torch.Tensor):
        return array.numpy().flatten()
    elif isinstance(array, list):
        return np.array(array).flatten()
    # if scalar return it as a list
    elif isinstance(array, np.generic):
        return np.array([array]).flatten()
    else:
        raise TypeError(f"Unsupported type: {type(array)}")

from_buffer(byte_array, ctype='uint32_t')

Get structured data from raw bytes.

If ctype is a C type string, it returns a homogeneous list of the specified type from the raw data, using numpy's from_buffer method. Note that if ctype is equal to __fp8, given that there is no native fp8 format in Numpy, an array of FlexFloat objects is returned.

Alternatively, a dictionary can be passed to ctype to extract a struct from the raw data. In this case, it returns a dictionary with the same keys as in ctype. The values in the ctype dictionary should be format strings compatible with Python's struct library. The order of the keys in the ctype dictionary should reflect the order in which the variables appear in the raw data.

Source code in util/sim/data_utils.py
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
def from_buffer(byte_array, ctype='uint32_t'):
    """Get structured data from raw bytes.

    If `ctype` is a C type string, it returns a homogeneous list of the
    specified type from the raw data, using numpy's `from_buffer` method.
    Note that if `ctype` is equal to `__fp8`, given that there is no
    native fp8 format in Numpy, an array of FlexFloat objects is returned.

    Alternatively, a dictionary can be passed to `ctype` to extract a
    struct from the raw data. In this case, it returns a dictionary with
    the same keys as in `ctype`. The values in the `ctype` dictionary
    should be format strings compatible with Python's `struct` library.
    The order of the keys in the `ctype` dictionary should reflect the
    order in which the variables appear in the raw data.
    """
    # Types which have a direct correspondence in Numpy
    NP_DTYPE_FROM_CTYPE = {
        'uint32_t': np.uint32,
        'double': np.float64,
        'float': np.float32,
        '__fp16': np.float16
    }

    if isinstance(ctype, dict):
        # byte_array assumed little-endian
        struct_fields = ctype.keys()
        fmt_specifiers = ctype.values()
        fmt_string = ''.join(fmt_specifiers)
        field_values = struct.unpack(f'<{fmt_string}', byte_array)
        return dict(zip(struct_fields, field_values))
    elif ctype in NP_DTYPE_FROM_CTYPE.keys():
        dtype = NP_DTYPE_FROM_CTYPE[ctype]
        return np.frombuffer(byte_array, dtype=dtype)
    elif ctype == '__fp8':
        return ff.frombuffer(byte_array, 'e5m2')

generate_random_array(size, prec='FP64', seed=None)

Consistent random array generation for Snitch experiments.

Samples values between -1 and 1 from a uniform distribution and of the exact specified type, e.g. actual 64-bit doubles.

This function ensures that e.g. power measurements are not skewed by using integer values in the FPU.

Parameters:

Name Type Description Default
size

Tuple of array dimensions.

required
prec

A value of type precision_t. Accepts both enum strings (e.g. "FP64") and integer enumeration values (e.g. 8).

'FP64'
Source code in util/sim/data_utils.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def generate_random_array(size, prec='FP64', seed=None):
    """Consistent random array generation for Snitch experiments.

    Samples values between -1 and 1 from a uniform distribution and
    of the exact specified type, e.g. actual 64-bit doubles.

    This function ensures that e.g. power measurements are not skewed
    by using integer values in the FPU.

    Args:
        size: Tuple of array dimensions.
        prec: A value of type `precision_t`. Accepts both enum strings
            (e.g. "FP64") and integer enumeration values (e.g. 8).
    """
    # Generate in 64b precision and then cast down
    rand = np.random.default_rng(seed=seed).random(size=size, dtype=np.float64) * 2 - 1
    # Generate FlexFloat array for 8b floats, casted from 16b Numpy array
    if _integer_precision_t(prec) == 1:
        return ff.array(rand.astype(np.float16), ff_desc_from_precision_t(prec))
    else:
        return rand.astype(numpy_type_from_precision_t(prec))

numpy_type_from_precision_t(prec)

Convert precision_t type to PyTorch type.

Parameters:

Name Type Description Default
prec

A value of type precision_t. Accepts both enum strings (e.g. "FP64") and integer enumeration values (e.g. 8).

required
Source code in util/sim/data_utils.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def numpy_type_from_precision_t(prec):
    """Convert `precision_t` type to PyTorch type.

    Args:
        prec: A value of type `precision_t`. Accepts both enum strings
            (e.g. "FP64") and integer enumeration values (e.g. 8).
    """
    # Types which have a direct correspondence in Numpy
    precision_t_to_numpy_type_map = {
        8: np.float64,
        4: np.float32,
        2: np.float16
    }
    prec = _integer_precision_t(prec)
    assert prec != 1, "No direct correspondence between FP8 and Numpy"
    return precision_t_to_numpy_type_map[prec]

size_from_precision_t(prec)

Return the size in bytes of a precision_t type.

Parameters:

Name Type Description Default
prec

A value of type precision_t. Accepts both enum strings (e.g. "FP64") and integer enumeration values (e.g. 8).

required
Source code in util/sim/data_utils.py
44
45
46
47
48
49
50
51
def size_from_precision_t(prec):
    """Return the size in bytes of a `precision_t` type.

    Args:
        prec: A value of type `precision_t`. Accepts both enum strings
            (e.g. "FP64") and integer enumeration values (e.g. 8).
    """
    return _integer_precision_t(prec)

torch_type_from_precision_t(prec)

Convert precision_t type to PyTorch type.

Parameters:

Name Type Description Default
prec

A value of type precision_t. Accepts both enum strings (e.g. "FP64") and integer enumeration values (e.g. 8).

required
Source code in util/sim/data_utils.py
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def torch_type_from_precision_t(prec):
    """Convert `precision_t` type to PyTorch type.

    Args:
        prec: A value of type `precision_t`. Accepts both enum strings
            (e.g. "FP64") and integer enumeration values (e.g. 8).
    """
    precision_t_to_torch_type_map = {
        8: torch.float64,
        4: torch.float32,
        2: torch.float16,
        1: torch.float8_e4m3fn
    }
    return precision_t_to_torch_type_map[_integer_precision_t(prec)]

validate_tcdm_footprint(size, silent=False)

Check whether data of specified size fits in TCDM.

Throws an assertion error if the specified size exceeds the space available for the heap in TCDM.

Parameters:

Name Type Description Default
size

The size of the data in bytes.

required
silent

If True, will not print the size to stdout.

False
Source code in util/sim/data_utils.py
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
def validate_tcdm_footprint(size, silent=False):
    """Check whether data of specified size fits in TCDM.

    Throws an assertion error if the specified size exceeds the space
    available for the heap in TCDM.

    Args:
        size: The size of the data in bytes.
        silent: If True, will not print the size to stdout.
    """
    assert size < TCDM_HEAP_SIZE, \
        f'Total heap space required {humanize.naturalsize(size, binary=True)} exceeds ' \
        f'limit of {humanize.naturalsize(TCDM_HEAP_SIZE, binary=True)}'
    if not silent:
        print(f'Total heap space required {humanize.naturalsize(size, binary=True)}')