oclk namespace#

Submodules#

oclk.benchmark module#

oclk.benchmark.benchmark(config_file: str, style: str = 'none', output: str = '')#

oclk.benchmark.parse_args(args: List[KernelArg])#

oclk.benchmark.run_suite(suite: Suite) → List[TimerResult]#

oclk.benchmark_config module#

class oclk.benchmark_config.ArgValueGenerator(*, method: str = 'constant', value: int | float | List[int | float] = 0)#

Bases: BaseModel

_abc_impl = <_abc._abc_data object>#

classmethod generate_check(v: str)#

method: str#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'method': FieldInfo(annotation=str, required=False, default='constant'), 'value': FieldInfo(annotation=Union[int, float, List[Union[int, float]]], required=False, default=0)}#

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

value: int | float | List[int | float]#

class oclk.benchmark_config.Kernel(*, name: str, suffix: str = '', definition: str = '', local_work_size: List[int], global_work_size: List[int], args: List[KernelArg])#

Bases: BaseModel

_abc_impl = <_abc._abc_data object>#

args: List[KernelArg]#

definition: str#

global_work_size: List[int]#

local_work_size: List[int]#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'args': FieldInfo(annotation=List[KernelArg], required=True), 'definition': FieldInfo(annotation=str, required=False, default=''), 'global_work_size': FieldInfo(annotation=List[int], required=True), 'local_work_size': FieldInfo(annotation=List[int], required=True), 'name': FieldInfo(annotation=str, required=True), 'suffix': FieldInfo(annotation=str, required=False, default='')}#

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

name: str#

suffix: str#

class oclk.benchmark_config.KernelArg(*, name: str = '', type: str, dtype: str = 'float32', shape: List | None = [1], value: ArgValueGenerator = ArgValueGenerator(method='constant', value=0))#

Bases: BaseModel

_abc_impl = <_abc._abc_data object>#

dtype: str#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'dtype': FieldInfo(annotation=str, required=False, default='float32'), 'name': FieldInfo(annotation=str, required=False, default=''), 'shape': FieldInfo(annotation=Union[List, NoneType], required=False, default=[1]), 'type': FieldInfo(annotation=str, required=True), 'value': FieldInfo(annotation=ArgValueGenerator, required=False, default=ArgValueGenerator(method='constant', value=0))}#

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

name: str#

shape: List | None#

type: str#

classmethod type_check(v: str)#

value: ArgValueGenerator#

class oclk.benchmark_config.Suite(*, suite_name: str, kernel_file: str, kernels: List[Kernel], timer: Timer = Timer(prefix='', repeat=1, warmup=0))#

Bases: BaseModel

_abc_impl = <_abc._abc_data object>#

kernel_file: str#

kernels: List[Kernel]#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'kernel_file': FieldInfo(annotation=str, required=True), 'kernels': FieldInfo(annotation=List[Kernel], required=True), 'suite_name': FieldInfo(annotation=str, required=True), 'timer': FieldInfo(annotation=Timer, required=False, default=Timer(prefix='', repeat=1, warmup=0))}#

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

suite_name: str#

timer: Timer#

class oclk.benchmark_config.Timer(*, prefix: str = '', repeat: int = 1, warmup: int = 0)#

Bases: BaseModel

_abc_impl = <_abc._abc_data object>#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'prefix': FieldInfo(annotation=str, required=False, default=''), 'repeat': FieldInfo(annotation=int, required=False, default=1), 'warmup': FieldInfo(annotation=int, required=False, default=0)}#

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

prefix: str#

repeat: int#

warmup: int#

oclk.benchmark_config.dict_to_Kernel(d: Dict) → KernelArg#

oclk.benchmark_config.dict_to_KernelArg(d: Dict) → KernelArg#

oclk.benchmark_config.dict_to_Suite(d: Dict) → Suite#

oclk.cli module#

oclk.cli.benchmark(name: str, kernel_file: str | None = '', file: str = '')#: Generate bench_{name}.yaml for benchmark

oclk.cli.main()#

oclk.cli.tune(name: str, file: str = '')#: Generate tune_{name}.py for tune

oclk.oclk_runner module#

class oclk.oclk_runner.Runner#

Bases: object

has_initialized: bool = False#

kernel_list: Dict[str, Dict[str, str]] = {}#

load_kernel(cl_file: str, kernel_name: str | List[str], compile_option: str | List[str] | None = None)#

Load kernel with filename and function name

Parameters:

cl_file – filename can be absolute or relative path
kernel_name – kernel_name is the kernel functions’ name
compile_option – compile option can be strings like “-DMY_DEF=1”, “-D” is necessary

release_kernel(kernel_name: str) → int#

unload kernel from context, kernel name cannot be duplicated.

If you want to reload a kernel, you have to release it firstly.

Parameters:: kernel_name –
Returns:

run the kernel

Parameters:

kernel_name (str) – the name of the kernel
input (List[Dict[str, Union[int, float, np.array]]]) –
Dictionary to set input args, in the same order as kernel function key “name”: name of this input key “value”: value of this input
- np.array should be contiguous array
- constant args:
  
  python type: float -> c type: float
  
  python type: int -> c type: long
  
  or specify c type with field “type”, support types:
  
  [unsigned] int
  
  [unsigned] long
  
  float
  
  double
- custom C struct arg: parse values as list, for example:
  [ {"type": "float", "value": 1.234}, {"type": "int", "value": 1234} ]
key “type”: [Optional] c type string for “value”

You can use input_maker() to create this
output (List[str]) – List of names to specify which array will be get back from GPU buffer
global_work_size (List[int]) – list of integer, specified global work size
local_work_size (List[int]) – list of integer, specified work sizes. same length as global_work_size. Note: local_work_size can be set to [-1,-1] (same length as global_work_size ), then will pass nullptr to clEnqueueNDRangeKernel
wait (Optional[bool]) – Optional, default true, wait for GPU
timer (Optional[Union[Dict, TimerArgs]]) – Optional, arguments to set up a timer for benchmark kernels

Returns:

time info and output arrays

Return type:

TimerResult

oclk.oclk_runner.RunnerCtx(filename, kernel_name, compile_option='')#

class oclk.oclk_runner.RunnerReturn#

Bases: object

including timer results and output array args

Variables:

timer_result – a TimerResult
results – list of return values

__init__()#

results: List[ndarray]#

timer_result: TimerResult#

class oclk.oclk_runner.TimerArgs(enable: bool, warmup: int, repeat: int, name: str)#

Bases: object

Set up the Timer

Variables:

enable – whether to use timer
warmup – warm up loop before timing
repeat – repeat \(n\) times and time it, the result will be \(result = total / n\)
name – timer name

__init__(enable: bool, warmup: int, repeat: int, name: str)#

class oclk.oclk_runner.TimerResult#

Bases: object

Time Results of Runner.run()

Variables:

name – The name of the Global Timer
cnt – How many times the timer called
avg – Average time among all calls
stdev – Standard deviation
total – Total time accumulated

__init__()#

avg: float#

cnt: int#

name: str#

stdev: float#

total: float#

oclk.oclk_runner.check_init(fn)#

oclk.tuner module#

class oclk.tuner.TuneArgGenerator(method, values)#

Bases: object

__init__(method, values)#

class oclk.tuner.Tuner(name='', **kwargs)#

Bases: object

__init__(name='', **kwargs)#

static exp2_range(start, end)#

generate exp2 values, from start(inclusive) to end(inclusive).

Parameters:

start –
end –

Returns:

static range_arg(name, start, end, step=1)#

decorator to generate ranged arguments, start,`end``,`step` are the same as range()

Parameters:

name – argument name
start – range start
end – range end
step – range step

run(kernel_file: str, kernel_name: str, compile_option: str, *, input: List[Dict[str, int | float | array]], local_work_size: List[int], global_work_size: List[int], output: List[str] | None = None, timer: Dict | TimerArgs | None = None) → RunnerReturn#

Wrapper for Runner.run In this method, Runner will load a kernel and run kernel, finally release kernel

Parameters:

kernel_file – filename can be absolute or relative path
kernel_name – kernel_name is the kernel functions’ name
compile_option – compile option can be strings like “-DMY_DEF=1”, “-D” is necessary
input – see Runner.run
local_work_size – see Runner.run
global_work_size – see Runner.run
output – see Runner.run
timer – see Runner.run

Returns:

see Runner.run

abstract setup()#

abstract method, will be called before tunable methods,: used to initialize variables

top_result(k=5) → List[Tuple[Dict[str, Any], float]]#

Get the top k results by ASC order

Parameters:

k (int) – top k

Returns:

top k results, for instance:

[
    (
        {
            key:value,
            key2:value2,
            key3:value3,
        },
        1.23
    ),
    (
        {
            key:value,
            key2:value2,
            key3:value3,
        },
        4.56
    ),
    (
        {
            key:value,
            key2:value2,
            key3:value3,
        },
        7.89
    )
]

Return type:

List[Tuple[Dict[str, Any], float]]

static tune()#

decorator, mark a tunable method.

NOTE: method should be passed kwargs only, must return a value as the metric.: all the returned values will be sorted by ASC order to pick the best. for instance, this value can be rtn.timer_result.avg

Raises:: TuningSkip – raise TuningSkip to skip an argument combination

tuner_registry = {}#

static values_arg(name, *args)#

decorator, add a values argument generator

Parameters:

name – the name of the argument
args – all possible values

static worksize_arg(name, dim_size: int, dim0: List[int], dim1: List[int] | None = None, dim2: List[int] | None = None)#

decorator to generate worksize arguments

Parameters:

name – argument name
dim_size – work dim size
dim0 – possible values for dim0
dim1 – possible values for dim1
dim2 – possible values for dim2

exception oclk.tuner.TuningSkip(*args, **kwargs)#

Bases: BaseException

__init__(*args, **kwargs)#

oclk.utils module#

oclk.utils.input_maker(**kwargs) → List[Dict[str, int | float | array]]#

easily make an input arguments list for Runner.run()

for example:

input_maker(a=a, b=b, length=(arr_length, "int"), out=out)

then got a list of dict

[
    {"name": "a", "value": a},
    {"name": "b", "value": b},
    {"name": "length", "value": arr_length, "type": "int"},
    {"name": "out", "value": out},
]

Parameters:: kwargs – key value arguments, if value is tuple, should be (value, type)
Returns:: input arg list
Return type:: List[Dict[str, Any]]