Python/OpenCL: Difference between revisions
< Python
Jump to navigation
Jump to search
(→Prefix) |
|||
| (37 intermediate revisions by the same user not shown) | |||
| Line 1: | Line 1: | ||
= | = PyOpenCL = | ||
== | == Passing data to kernel functions. == | ||
=== Prefix === | |||
{| class="wikitable" | {| class="wikitable" | ||
! OpenCL || Python | ! OpenCL || Python | ||
|- | |- | ||
| float || np.float32 || np. | | __global const long* data || | ||
<source lang="python3"> | |||
# Read Only | |||
py_param = [1, 2, 3, 4, 5, 6] | |||
cl_param = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.int64(py_param)) | |||
</source> | |||
|- | |||
| __global long* data || | |||
<source lang="python3"> | |||
# Read Write | |||
py_param = np.int64([1, 2, 3, 4, 5, 6]) | |||
cl_param = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=py_param) | |||
# Write Only | |||
py_param = np.int64([0, 0, 0, 0, 0]) | |||
cl_param = cl.Buffer(ctx, mf.WRITE_ONLY, py_param.nbytes) | |||
</source> | |||
|- | |||
| __private long i || | |||
<source lang="python3"> | |||
cl_param = np.int64(9999) | |||
</source> | |||
|} | |||
=== Type Mapping === | |||
{| class="wikitable" | |||
! OpenCL || Python | |||
|- | |||
| float || np.float32 | |||
|- | |||
| double || np.float64 | |||
|- | |- | ||
| | | long || np.int64 | ||
|- | |- | ||
| int | | int || np.int32 | ||
|- | |- | ||
| | | short || np.int16 | ||
|- | |- | ||
| char || | | char || np.int8 or bytearray | ||
|} | |} | ||
| Line 124: | Line 155: | ||
print(type(d[0]).__name__) | print(type(d[0]).__name__) | ||
</source> | </source> | ||
== Merge Sort == | |||
=== Sort 8 items === | |||
<quickgv name="s8" theme="warm"> | |||
rankdir=TB; | |||
A [label="8 7 6 5 4 3 2 1"]; | |||
subgraph cluster_1st { | |||
label="group size: 4"; | |||
B1 [label="8 | 7"]; | |||
B2 [label="6 | 5"]; | |||
B3 [label="4 | 3"]; | |||
B4 [label="2 | 1"]; | |||
C1 [label="7 8"]; | |||
C2 [label="5 6"]; | |||
C3 [label="3 4"]; | |||
C4 [label="1 2"]; | |||
} | |||
subgraph cluster_2nd { | |||
label="group size: 2"; | |||
D1 [label="7 8 | 5 6"]; | |||
D2 [label="3 4 | 1 2"]; | |||
E1 [label="5 6 7 8"]; | |||
E2 [label="1 2 3 4"]; | |||
} | |||
subgraph cluster_3rd { | |||
label="group size: 1"; | |||
F1 [label="5 6 7 8 | 1 2 3 4"]; | |||
G1 [label="1 2 3 4 5 6 7 8"]; | |||
} | |||
A -> {B1 B2 B3 B4}; | |||
B1 -> C1; | |||
B2 -> C2; | |||
B3 -> C3; | |||
B4 -> C4; | |||
{C1 C2} -> D1; | |||
{C3 C4} -> D2; | |||
D1 -> E1; | |||
D2 -> E2; | |||
{E1 E2} -> F1; | |||
F1 -> G1; | |||
</quickgv> | |||
=== Sort 5 items === | |||
<quickgv name="s10" theme="warm"> | |||
// rankdir="TB"; | |||
A [label="5 4 3 2 1"]; | |||
subgraph cluster_1st { | |||
label="group size: 3\n set size: 2"; | |||
B1 [label="5 | 4"]; | |||
B2 [label="3 | 2"]; | |||
B3 [label="1"]; | |||
C1 [label="4 5"]; | |||
C2 [label="2 3"]; | |||
C3 [label="1"]; | |||
} | |||
subgraph cluster_2nd { | |||
label="group size: 2\nset size: 4"; | |||
D1 [label="4 5 | 2 3"]; | |||
D2 [label="1"]; | |||
E1 [label="2 3 4 5"]; | |||
E2 [label="1"]; | |||
} | |||
subgraph cluster_3rd { | |||
label="group size: 1\nset size: 8"; | |||
F1 [label="2 3 4 5 | 1"]; | |||
G1 [label="1 2 3 4 5"]; | |||
} | |||
A -> {B1 B2 B3}; | |||
B1 -> C1; | |||
B2 -> C2; | |||
B3 -> C3; | |||
{C1 C2} -> D1; | |||
{C3} -> D2; | |||
D1 -> E1; | |||
D2 -> E2; | |||
{E1 E2} -> F1; | |||
F1 -> G1; | |||
</quickgv> | |||
Latest revision as of 09:55, 15 June 2018
PyOpenCL
Passing data to kernel functions.
Prefix
| OpenCL | Python |
|---|---|
| __global const long* data |
# Read Only
py_param = [1, 2, 3, 4, 5, 6]
cl_param = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.int64(py_param))
|
| __global long* data |
# Read Write
py_param = np.int64([1, 2, 3, 4, 5, 6])
cl_param = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=py_param)
# Write Only
py_param = np.int64([0, 0, 0, 0, 0])
cl_param = cl.Buffer(ctx, mf.WRITE_ONLY, py_param.nbytes)
|
| __private long i |
cl_param = np.int64(9999)
|
Type Mapping
| OpenCL | Python |
|---|---|
| float | np.float32 |
| double | np.float64 |
| long | np.int64 |
| int | np.int32 |
| short | np.int16 |
| char | np.int8 or bytearray |
See Also:
Sample
import os
import numpy as np
import pyopencl as cl
# Kernel code.
CL_INC = '''
__kernel void inc_f32(__global const float *a_g, __global float *res_g)
{
int gid = get_global_id(0);
res_g[gid] = a_g[gid] + 1.0;
}
__kernel void inc_f64(__global const double *a_g, __global double *res_g)
{
int gid = get_global_id(0);
res_g[gid] = a_g[gid] + 1.0;
}
__kernel void inc_i32(__global const int *a_g, __global int *res_g)
{
int gid = get_global_id(0);
res_g[gid] = a_g[gid] + 1;
}
__kernel void inc_i64(__global const long *a_g, __global long *res_g)
{
int gid = get_global_id(0);
res_g[gid] = a_g[gid] + 1;
}
'''
# Build
os.environ['PYOPENCL_CTX'] = '0:1' # Apple > HD Graphics 4000 of Macbook Air 2012 mid
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
prg = cl.Program(ctx, CL_INC).build()
# Shorthand to call kernel function.
def call_kernel(kernel_name, *args):
in_np = args[0]
out_np = np.empty_like(in_np)
in_cl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=in_np)
out_cl = cl.Buffer(ctx, mf.WRITE_ONLY, in_np.nbytes)
getattr(prg, kernel_name)(queue, in_np.shape, None, in_cl, out_cl)
cl.enqueue_copy(queue, out_np, out_cl)
return out_np
# Test inc_f32()
in_np = np.array([1.0, 2.0, 3.0, 4.0, 5.0]).astype(np.float32)
out_np = call_kernel('inc_f32', in_np)
print('Results of inc_f32()')
print(in_np)
print(out_np)
print()
# Test inc_f64()
in_np = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
out_np = call_kernel('inc_f64', in_np)
print('Results of inc_f64()')
print(in_np)
print(out_np)
print()
# Test inc_i32()
in_np = np.array([1, 2, 3, 4, 5]).astype(np.int32)
out_np = call_kernel('inc_i32', in_np)
print('Results of inc_i32()')
print(in_np)
print(out_np)
print()
# Test inc_i64()
in_np = np.array([1, 2, 3, 4, 5])
out_np = call_kernel('inc_i64', in_np)
print('Results of inc_i64()')
print(in_np)
print(out_np)
print()
numpy
Data Types
import numpy as np
# Default int type is int64
a = np.array([1, 2, 3])
print(type(a[0]).__name__)
# Default float type is float64
b = np.array([1.0, 2.0, 3.0])
print(type(b[0]).__name__)
# Force int32
c = np.array([1, 2, 3]).astype(np.int32)
print(type(c[0]).__name__)
# Force float32
d = np.array([1.0, 2.0, 3.0]).astype(np.float32)
print(type(d[0]).__name__)