oclk namespace#

Submodules#

oclk.benchmark module#

oclk.benchmark.benchmark(config_file: str, style: str = 'none', output: str = '')#
oclk.benchmark.parse_args(args: List[KernelArg])#
oclk.benchmark.run_suite(suite: Suite) List[TimerResult]#

oclk.benchmark_config module#

class oclk.benchmark_config.ArgValueGenerator(*, method: str = 'constant', value: int | float | List[int | float] = 0)#

Bases: BaseModel

_abc_impl = <_abc._abc_data object>#
classmethod generate_check(v: str)#
method: str#
model_config: ClassVar[ConfigDict] = {}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'method': FieldInfo(annotation=str, required=False, default='constant'), 'value': FieldInfo(annotation=Union[int, float, List[Union[int, float]]], required=False, default=0)}#

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

value: int | float | List[int | float]#
class oclk.benchmark_config.Kernel(*, name: str, suffix: str = '', definition: str = '', local_work_size: List[int], global_work_size: List[int], args: List[KernelArg])#

Bases: BaseModel

_abc_impl = <_abc._abc_data object>#
args: List[KernelArg]#
definition: str#
global_work_size: List[int]#
local_work_size: List[int]#
model_config: ClassVar[ConfigDict] = {}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'args': FieldInfo(annotation=List[KernelArg], required=True), 'definition': FieldInfo(annotation=str, required=False, default=''), 'global_work_size': FieldInfo(annotation=List[int], required=True), 'local_work_size': FieldInfo(annotation=List[int], required=True), 'name': FieldInfo(annotation=str, required=True), 'suffix': FieldInfo(annotation=str, required=False, default='')}#

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

name: str#
suffix: str#
class oclk.benchmark_config.KernelArg(*, name: str = '', type: str, dtype: str = 'float32', shape: List | None = [1], value: ArgValueGenerator = ArgValueGenerator(method='constant', value=0))#

Bases: BaseModel

_abc_impl = <_abc._abc_data object>#
dtype: str#
model_config: ClassVar[ConfigDict] = {}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'dtype': FieldInfo(annotation=str, required=False, default='float32'), 'name': FieldInfo(annotation=str, required=False, default=''), 'shape': FieldInfo(annotation=Union[List, NoneType], required=False, default=[1]), 'type': FieldInfo(annotation=str, required=True), 'value': FieldInfo(annotation=ArgValueGenerator, required=False, default=ArgValueGenerator(method='constant', value=0))}#

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

name: str#
shape: List | None#
type: str#
classmethod type_check(v: str)#
value: ArgValueGenerator#
class oclk.benchmark_config.Suite(*, suite_name: str, kernel_file: str, kernels: List[Kernel], timer: Timer = Timer(prefix='', repeat=1, warmup=0))#

Bases: BaseModel

_abc_impl = <_abc._abc_data object>#
kernel_file: str#
kernels: List[Kernel]#
model_config: ClassVar[ConfigDict] = {}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'kernel_file': FieldInfo(annotation=str, required=True), 'kernels': FieldInfo(annotation=List[Kernel], required=True), 'suite_name': FieldInfo(annotation=str, required=True), 'timer': FieldInfo(annotation=Timer, required=False, default=Timer(prefix='', repeat=1, warmup=0))}#

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

suite_name: str#
timer: Timer#
class oclk.benchmark_config.Timer(*, prefix: str = '', repeat: int = 1, warmup: int = 0)#

Bases: BaseModel

_abc_impl = <_abc._abc_data object>#
model_config: ClassVar[ConfigDict] = {}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'prefix': FieldInfo(annotation=str, required=False, default=''), 'repeat': FieldInfo(annotation=int, required=False, default=1), 'warmup': FieldInfo(annotation=int, required=False, default=0)}#

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

prefix: str#
repeat: int#
warmup: int#
oclk.benchmark_config.dict_to_Kernel(d: Dict) KernelArg#
oclk.benchmark_config.dict_to_KernelArg(d: Dict) KernelArg#
oclk.benchmark_config.dict_to_Suite(d: Dict) Suite#

oclk.cli module#

oclk.cli.benchmark(name: str, kernel_file: str | None = '', file: str = '')#

Generate bench_{name}.yaml for benchmark

oclk.cli.main()#
oclk.cli.tune(name: str, file: str = '')#

Generate tune_{name}.py for tune

oclk.oclk_runner module#

class oclk.oclk_runner.Runner#

Bases: object

has_initialized: bool = False#
kernel_list: Dict[str, Dict[str, str]] = {}#
load_kernel(cl_file: str, kernel_name: str | List[str], compile_option: str | List[str] | None = None)#

Load kernel with filename and function name

Parameters:
  • cl_file – filename can be absolute or relative path

  • kernel_name – kernel_name is the kernel functions’ name

  • compile_option – compile option can be strings like “-DMY_DEF=1”, “-D” is necessary

release_kernel(kernel_name: str) int#

unload kernel from context, kernel name cannot be duplicated.

If you want to reload a kernel, you have to release it firstly.

Parameters:

kernel_name

Returns:

run(*, kernel_name: str, input: ~typing.List[~typing.Dict[str, int | float | ~numpy.array | ~typing.List[~typing.Dict[str, int | float | str]]]], local_work_size: ~typing.List[int] | ~typing.Tuple[int], global_work_size: ~typing.List[int] | ~typing.Tuple[int], output: ~typing.List[str] | None = None, wait: bool | None = True, timer: ~typing.Dict | ~oclk.oclk_runner.TimerArgs | None = <oclk.oclk_runner.TimerArgs object>) RunnerReturn#

run the kernel

Parameters:
  • kernel_name (str) – the name of the kernel

  • input (List[Dict[str, Union[int, float, np.array]]]) –

    Dictionary to set input args, in the same order as kernel function key “name”: name of this input key “value”: value of this input

    • np.array should be contiguous array

    • constant args:
      • python type: float -> c type: float

      • python type: int -> c type: long

      • or specify c type with field “type”, support types:
        • [unsigned] int

        • [unsigned] long

        • float

        • double

    • custom C struct arg: parse values as list, for example:

      [
          {"type": "float", "value": 1.234},
          {"type": "int", "value": 1234}
      ]
      

    key “type”: [Optional] c type string for “value”

    You can use input_maker() to create this

  • output (List[str]) – List of names to specify which array will be get back from GPU buffer

  • global_work_size (List[int]) – list of integer, specified global work size

  • local_work_size (List[int]) – list of integer, specified work sizes. same length as global_work_size. Note: local_work_size can be set to [-1,-1] (same length as global_work_size ), then will pass nullptr to clEnqueueNDRangeKernel

  • wait (Optional[bool]) – Optional, default true, wait for GPU

  • timer (Optional[Union[Dict, TimerArgs]]) – Optional, arguments to set up a timer for benchmark kernels

Returns:

time info and output arrays

Return type:

TimerResult

oclk.oclk_runner.RunnerCtx(filename, kernel_name, compile_option='')#
class oclk.oclk_runner.RunnerReturn#

Bases: object

including timer results and output array args

Variables:
  • timer_result – a TimerResult

  • results – list of return values

__init__()#
results: List[ndarray]#
timer_result: TimerResult#
class oclk.oclk_runner.TimerArgs(enable: bool, warmup: int, repeat: int, name: str)#

Bases: object

Set up the Timer

Variables:
  • enable – whether to use timer

  • warmup – warm up loop before timing

  • repeat – repeat \(n\) times and time it, the result will be \(result = total / n\)

  • name – timer name

__init__(enable: bool, warmup: int, repeat: int, name: str)#
class oclk.oclk_runner.TimerResult#

Bases: object

Time Results of Runner.run()

Variables:
  • name – The name of the Global Timer

  • cnt – How many times the timer called

  • avg – Average time among all calls

  • stdev – Standard deviation

  • total – Total time accumulated

__init__()#
avg: float#
cnt: int#
name: str#
stdev: float#
total: float#
oclk.oclk_runner.check_init(fn)#

oclk.tuner module#

class oclk.tuner.TuneArgGenerator(method, values)#

Bases: object

__init__(method, values)#
class oclk.tuner.Tuner(name='', **kwargs)#

Bases: object

__init__(name='', **kwargs)#
static exp2_range(start, end)#

generate exp2 values, from start(inclusive) to end(inclusive).

Parameters:
  • start

  • end

Returns:

static range_arg(name, start, end, step=1)#

decorator to generate ranged arguments, start,`end``,`step` are the same as range()

Parameters:
  • name – argument name

  • start – range start

  • end – range end

  • step – range step

run(kernel_file: str, kernel_name: str, compile_option: str, *, input: List[Dict[str, int | float | array]], local_work_size: List[int], global_work_size: List[int], output: List[str] | None = None, timer: Dict | TimerArgs | None = None) RunnerReturn#

Wrapper for Runner.run In this method, Runner will load a kernel and run kernel, finally release kernel

Parameters:
  • kernel_file – filename can be absolute or relative path

  • kernel_name – kernel_name is the kernel functions’ name

  • compile_option – compile option can be strings like “-DMY_DEF=1”, “-D” is necessary

  • input – see Runner.run

  • local_work_size – see Runner.run

  • global_work_size – see Runner.run

  • output – see Runner.run

  • timer – see Runner.run

Returns:

see Runner.run

abstract setup()#
abstract method, will be called before tunable methods,

used to initialize variables

top_result(k=5) List[Tuple[Dict[str, Any], float]]#

Get the top k results by ASC order

Parameters:

k (int) – top k

Returns:

top k results, for instance:

[
    (
        {
            key:value,
            key2:value2,
            key3:value3,
        },
        1.23
    ),
    (
        {
            key:value,
            key2:value2,
            key3:value3,
        },
        4.56
    ),
    (
        {
            key:value,
            key2:value2,
            key3:value3,
        },
        7.89
    )
]

Return type:

List[Tuple[Dict[str, Any], float]]

static tune()#

decorator, mark a tunable method.

NOTE: method should be passed kwargs only, must return a value as the metric.

all the returned values will be sorted by ASC order to pick the best. for instance, this value can be rtn.timer_result.avg

Raises:

TuningSkip – raise TuningSkip to skip an argument combination

tuner_registry = {}#
static values_arg(name, *args)#

decorator, add a values argument generator

Parameters:
  • name – the name of the argument

  • args – all possible values

static worksize_arg(name, dim_size: int, dim0: List[int], dim1: List[int] | None = None, dim2: List[int] | None = None)#

decorator to generate worksize arguments

Parameters:
  • name – argument name

  • dim_size – work dim size

  • dim0 – possible values for dim0

  • dim1 – possible values for dim1

  • dim2 – possible values for dim2

exception oclk.tuner.TuningSkip(*args, **kwargs)#

Bases: BaseException

__init__(*args, **kwargs)#

oclk.utils module#

oclk.utils.input_maker(**kwargs) List[Dict[str, int | float | array]]#

easily make an input arguments list for Runner.run()

for example:

input_maker(a=a, b=b, length=(arr_length, "int"), out=out)

then got a list of dict

[
    {"name": "a", "value": a},
    {"name": "b", "value": b},
    {"name": "length", "value": arr_length, "type": "int"},
    {"name": "out", "value": out},
]
Parameters:

kwargs – key value arguments, if value is tuple, should be (value, type)

Returns:

input arg list

Return type:

List[Dict[str, Any]]