Python源码示例:theano.sandbox.cuda.CudaNdarray()
示例1
def test_output_broadcast_cuda(self):
from theano.sandbox import cuda
if not cuda.cuda_available:
raise SkipTest("Optional package Cuda disabled")
if cuda.use.device_number is None:
# We should normally set VecAsRowAndCol as a GPUOp But we
# don't want to do this here as this will disable others
# tests in this file. So we manually init the GPU if
# needed to remove warning.
cuda.use("gpu",
force=True,
default_to_move_computation_to_gpu=False,
move_shared_float32_to_gpu=False,
enable_cuda=False)
v = cuda.fvector('v')
c, r = VecAsRowAndCol()(v)
f = theano.function([v], [c, r])
v_val = cuda.CudaNdarray(self.rng.randn(5).astype('float32'))
f(v_val)
示例2
def garray_to_cudandarray(x):
""" take a gnumpy.garray and make a CudaNdarray that point to its memory
"""
if not isinstance(x, gnumpy.garray):
raise ValueError("We can transfer only gnumpy.garray to CudaNdarray")
# elif x.dtype != "float32":
# raise ValueError("CudaNdarray support only float32")
# We don't need this, because cudamat is always float32.
else:
strides = [1]
for i in x.shape[::-1][:-1]:
strides.append(strides[-1] * i)
strides = strides[::-1]
for i in range(len(strides)):
if x.shape[i] == 1:
strides[i] = 0
strides = tuple(strides)
import ctypes
ptr_long = long(ctypes.cast(x._base.mat.data_device, ctypes.c_void_p).value)
# seems legit.
z = cuda.from_gpu_pointer(ptr_long, x.shape, strides, x._base)
return z
示例3
def test_to_cudandarray():
px = pycuda.gpuarray.zeros((3, 4, 5), 'float32')
cx = to_cudandarray(px)
assert isinstance(cx, cuda.CudaNdarray)
assert numpy.allclose(px.get(),
numpy.asarray(cx))
assert px.dtype == cx.dtype
assert px.shape == cx.shape
assert all(numpy.asarray(cx._strides) * 4 == px.strides)
try:
px = pycuda.gpuarray.zeros((3, 4, 5), 'float64')
to_cudandarray(px)
assert False
except ValueError:
pass
try:
to_cudandarray(numpy.zeros(4))
assert False
except ValueError:
pass
示例4
def test_pycuda_theano():
"""Simple example with pycuda function and Theano CudaNdarray object."""
from pycuda.compiler import SourceModule
mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
{
const int i = threadIdx.x;
dest[i] = a[i] * b[i];
}
""")
multiply_them = mod.get_function("multiply_them")
a = numpy.random.randn(100).astype(numpy.float32)
b = numpy.random.randn(100).astype(numpy.float32)
# Test with Theano object
ga = cuda_ndarray.CudaNdarray(a)
gb = cuda_ndarray.CudaNdarray(b)
dest = cuda_ndarray.CudaNdarray.zeros(a.shape)
multiply_them(dest, ga, gb,
block=(400, 1, 1), grid=(1, 1))
assert (numpy.asarray(dest) == a * b).all()
示例5
def to_cudandarray(x):
""" take a pycuda.gpuarray.GPUArray and make a CudaNdarray that point to its memory
:note: CudaNdarray support only float32, so only float32 GPUArray are accepted
"""
if not isinstance(x, pycuda.gpuarray.GPUArray):
raise ValueError("We can transfer only pycuda.gpuarray.GPUArray to CudaNdarray")
elif x.dtype != "float32":
raise ValueError("CudaNdarray support only float32")
else:
strides = [1]
for i in x.shape[::-1][:-1]:
strides.append(strides[-1] * i)
strides = tuple(strides[::-1])
ptr = int(x.gpudata) # in pycuda trunk, y.ptr also works, which is a little cleaner
z = cuda.from_gpu_pointer(ptr, x.shape, strides, x)
return z
示例6
def test_host_to_device():
#print >>sys.stdout, 'starting test_host_to_dev'
for shape in ((), (3,), (2, 3), (3, 4, 5, 6)):
a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
b = cuda_ndarray.CudaNdarray(a)
c = numpy.asarray(b)
assert numpy.all(a == c)
# test with float32 dtype
d = numpy.asarray(b, dtype='float32')
assert numpy.all(a == d)
# test with not float32 dtype
try:
numpy.asarray(b, dtype='int8')
assert False
except TypeError:
pass
示例7
def test_exp():
#print >>sys.stdout, 'starting test_exp'
for shape in ((), (3,), (2, 3),
(1, 10000000), (10, 1000000),
(100, 100000), (1000, 10000), (10000, 1000)):
a0 = theano._asarray(numpy.random.rand(*shape), dtype='float32')
a1 = a0.copy()
b0 = cuda_ndarray.CudaNdarray(a0)
b1 = cuda_ndarray.CudaNdarray(a1)
t0 = time.time()
bsum = b0.exp()
t1 = time.time()
gpu_dt = t1 - t0
t0 = time.time()
asum = numpy.exp(a1)
t1 = time.time()
cpu_dt = t1 - t0
# print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
#c = numpy.asarray(b0+b1)
if asum.shape:
assert numpy.allclose(asum, numpy.asarray(bsum))
示例8
def test_copy():
#print >>sys.stdout, 'starting test_copy'
shape = (500, 499)
a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
#print >>sys.stdout, '.. creating device object'
b = cuda_ndarray.CudaNdarray(a)
#print >>sys.stdout, '.. copy'
c = copy.copy(b)
#print >>sys.stdout, '.. deepcopy'
d = copy.deepcopy(b)
#print >>sys.stdout, '.. comparisons'
assert numpy.allclose(a, numpy.asarray(b))
assert numpy.allclose(a, numpy.asarray(c))
assert numpy.allclose(a, numpy.asarray(d))
b += b
assert numpy.allclose(a+a, numpy.asarray(b))
assert numpy.allclose(a+a, numpy.asarray(c))
assert numpy.allclose(a, numpy.asarray(d))
示例9
def test_getshape():
shapelist = [
((1, 2, 3), (1, 2, 3)),
((1,), (1,)),
((1, 2, 3), (3, 2, 1)),
((1, 2, 3), (6,)),
((1, 2, 3, 2), (6, 2)),
((2, 3, 2), (6, 2))
]
def subtest(shape):
a = theano._asarray(numpy.random.rand(*shape_1), dtype='float32')
b = cuda_ndarray.CudaNdarray(a)
assert b.shape == a.shape
for shape_1, shape_2 in shapelist:
subtest(shape_1)
subtest(shape_2)
示例10
def test_stride_manipulation():
a = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float32')
b = cuda_ndarray.CudaNdarray(a)
v = b.view()
v._dev_data += 0
c = numpy.asarray(v)
assert numpy.all(a == c)
sizeof_float = 4
offset = 0
b_strides = b._strides
for i in xrange(len(b.shape)):
offset += (b.shape[i]-1) * b_strides[i]
v._set_stride(i, -b_strides[i])
v._dev_data += offset * sizeof_float
c = numpy.asarray(v)
assert numpy.all(c == [[5, 4, 3], [2, 1, 0]])
示例11
def test_setitem_matrixscalar0():
a = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float32')
_a = cuda_ndarray.CudaNdarray(a)
b = theano._asarray(8, dtype='float32')
_b = cuda_ndarray.CudaNdarray(b)
# set an element to 8
_a[1, 1] = _b
a[1, 1] = b
assert numpy.allclose(a, numpy.asarray(_a))
# test direct transfert from numpy
_a[1, 1] = theano._asarray(888, dtype='float32')
a[1, 1] = theano._asarray(888, dtype='float32')
assert numpy.allclose(a, numpy.asarray(_a))
# broadcast a 0
_a[1, 1] = 0
_a[0:2] = 0
_a[1:] = 0
示例12
def test_setitem_matrixvector1():
a = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float32')
_a = cuda_ndarray.CudaNdarray(a)
b = theano._asarray([8, 9], dtype='float32')
_b = cuda_ndarray.CudaNdarray(b)
# set second column to 8,9
_a[:, 1] = _b
a[:, 1] = b
assert numpy.allclose(a, numpy.asarray(_a))
# test direct transfert from numpy
_a[:, 1] = b*100
a[:, 1] = b*100
assert numpy.allclose(a, numpy.asarray(_a))
row = theano._asarray([777, 888, 999], dtype='float32')
_a[1, :] = row
a[1, :] = row
assert numpy.allclose(a, numpy.asarray(_a))
示例13
def test_setitem_matrix_bad_shape():
a = numpy.arange(27)
a.resize((3, 3, 3))
a = theano._asarray(a, dtype='float32')
_a = cuda_ndarray.CudaNdarray(a)
b = theano._asarray([7, 8], dtype='float32')
_b = cuda_ndarray.CudaNdarray(b)
try:
# attempt to assign the ndarray b with setitem
_a[:, 1, 1] = _b
assert False
except ValueError as e:
# print e
assert True
# test direct transfert from numpy
try:
# attempt to assign the ndarray b with setitem
_a[1, 1, :] = b
assert False
except ValueError as e:
# print e
assert True
示例14
def test_setitem_matrix_bad_ndim():
a = numpy.arange(27)
a.resize((3, 3, 3))
a = theano._asarray(a, dtype='float32')
_a = cuda_ndarray.CudaNdarray(a)
b = theano._asarray([7, 8], dtype='float32')
_b = cuda_ndarray.CudaNdarray(b)
try:
# attempt to assign the ndarray b with setitem
_a[:, :, 1] = _b
assert False
except ValueError as e:
# print e
assert True
# test direct transfert from numpy
try:
# attempt to assign the ndarray b with setitem
_a[1, :, :] = b
assert False
except ValueError as e:
# print e
assert True
示例15
def test_setitem_matrix_bad_type():
a = numpy.arange(27)
a.resize((3, 3, 3))
a = theano._asarray(a, dtype='float32')
_a = cuda_ndarray.CudaNdarray(a)
b = theano._asarray([7, 8], dtype='float64')
# test direct transfert from numpy
try:
# attempt to assign the ndarray b with setitem
_a[1, :, :] = b
assert False
except TypeError as e:
# print e
assert True
示例16
def test_setitem_assign_to_slice():
a = numpy.arange(27)
a.resize((3, 3, 3))
a = theano._asarray(a, dtype='float32')
_a = cuda_ndarray.CudaNdarray(a)
b = theano._asarray([7, 8, 9], dtype='float32')
_b = cuda_ndarray.CudaNdarray(b)
# first get a slice of a
_c = _a[:, :, 1]
# set middle row through cube to 7,8,9
# (this corresponds to middle row of matrix _c)
_c[:, 1] = _b
a[:, :, 1][:, 1] = b
assert numpy.allclose(a, numpy.asarray(_a))
# test direct transfert from numpy
_d = _a[1, :, :]
_d[1, :] = b*10
a[1, :, :][1, :] = b*10
assert numpy.allclose(a, numpy.asarray(_a))
示例17
def test_base():
# Test that the 'base' attribute of a CudaNdarray is the one
# built initially, not an intermediate one.
a = cuda_ndarray.CudaNdarray.zeros((3, 4, 5))
for i in xrange(5):
b = a[:]
assert b.base is a
c = a[0]
d = c[:, 0]
# print d.shape
assert c.base is a
assert d.base is a
e = b.reshape((5, 2, 2, 3))
assert e.base is a
示例18
def test_set_strides():
a = cuda_ndarray.CudaNdarray.zeros((5, 5))
# Test with tuple
new_strides = (a.strides[1], a.strides[0])
a.strides = new_strides
assert a.strides == new_strides
# Test with list
new_strides = (a.strides[1], a.strides[0])
a.strides = [a.strides[1], a.strides[0]]
assert a.strides == new_strides
try:
a.strides = (a.strides[1],)
assert False
except ValueError:
pass
try:
a.strides = (1, 1, 1)
assert False
except ValueError:
pass
示例19
def speed_elemwise_collapse():
""" used to time if the collapse of ccontiguous dims are useful """
shape = (30, 40, 50, 600)
a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
dtype='float32'))
a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
a2 = tcn.shared_constructor(a, 'a')
a3 = a2[:, ::2, :, :]
b = tcn.CudaNdarrayType((False, False, False, False))()
c = a3 + b * tensor.exp(1 + b ** a3)
f = pfunc([b], [c], mode=mode_with_gpu)
v = theano._asarray(numpy.random.rand(*shape), dtype='float32')
v = v[:, ::2, :, :]
v = cuda_ndarray.CudaNdarray(v)
t1 = time.time()
for i in range(100):
# let debugmode catch errors
f(v)
t2 = time.time()
示例20
def speed_elemwise_collapse2():
""" used to test the speed up of the generalised collapse of
ccontiguous dims"""
shape = (30, 40, 50, 600)
a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
dtype='float32'))
a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
a2 = tcn.shared_constructor(a, 'a')
a3 = a2[:, :, :, ::2]
b = tcn.CudaNdarrayType((False, False, False, False))()
c = a3 + b * tensor.exp(1 + b ** a3)
f = pfunc([b], [c], mode=mode_with_gpu)
v = theano._asarray(numpy.random.rand(*shape), dtype='float32')
v = v[:, :, :, ::2]
v = cuda_ndarray.CudaNdarray(v)
t1 = time.time()
for i in range(100):
# let debugmode catch errors
f(v)
t2 = time.time()
示例21
def test_elemwise_collapse():
""" Test when all inputs have one(and the same) broadcastable dimension """
shape = (4, 5, 60)
a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
dtype='float32'))
a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
a2 = tcn.shared_constructor(a, 'a')
a3 = a2.dimshuffle(0, 'x', 1, 2)
b = tcn.CudaNdarrayType((False, True, False, False))()
c = a3 + b
f = pfunc([b], [c], mode=mode_with_gpu)
v = theano._asarray(numpy.random.rand(shape[0], 1, *shape[1:]),
dtype='float32')
v = cuda_ndarray.CudaNdarray(v)
# let debugmode catch errors
out = f(v)[0]
assert numpy.allclose(out, a.reshape(shape[0], 1, *shape[1:]) + v)
# print "Expected collapse of all dimensions"
示例22
def test_elemwise_collapse2():
""" Test when only one inputs have one broadcastable dimension """
shape = (4, 5, 9)
a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
dtype='float32'))
a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
a2 = tcn.shared_constructor(a, 'a')
a3 = a2.dimshuffle(0, 'x', 1, 2)
b = tcn.CudaNdarrayType((False, False, False, False))()
c = a3 + b
f = pfunc([b], [c], mode=mode_with_gpu)
v = theano._asarray(numpy.random.rand(shape[0], 5, *shape[1:]),
dtype='float32')
v = cuda_ndarray.CudaNdarray(v)
# let debugmode catch errors
out = f(v)[0]
assert numpy.allclose(out, a.reshape(shape[0], 1, *shape[1:]) + v)
# print "Expected collapse to 3 dimensions"
示例23
def test_elemwise_collapse4():
""" Test when only one inputs have two broadcastable dimension at
each ends and we add a scalar"""
shape = (4, 5)
a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
dtype='float32'))
a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
a2 = tcn.shared_constructor(a, 'a')
a3 = a2.dimshuffle('x', 0, 1, 'x')
b = tcn.CudaNdarrayType((False, False, False, False))()
c = (a3 + b + 2)
f = pfunc([b], [c], mode=mode_with_gpu)
v = theano._asarray(numpy.random.rand(5, shape[0], shape[1], 4),
dtype='float32')
v = cuda_ndarray.CudaNdarray(v)
# let debugmode catch errors
out = f(v)[0]
assert numpy.allclose(out, a.reshape(1, shape[0], shape[1], 1) + v + 2)
# print "Expected collapse to 3 dimensions"
示例24
def test_elemwise_collapse5():
""" Test when only one inputs have two broadcastable dimension at
the beginning and we add a scalar"""
shape = (4, 5)
a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
dtype='float32'))
a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
a2 = tcn.shared_constructor(a, 'a')
a3 = a2.dimshuffle('x', 'x', 0, 1)
b = tcn.CudaNdarrayType((False, False, False, False))()
c = (a3 + b + 2)
f = pfunc([b], [c], mode=mode_with_gpu)
v = theano._asarray(numpy.random.rand(5, 4, shape[0], shape[1]),
dtype='float32')
v = cuda_ndarray.CudaNdarray(v)
# let debugmode catch errors
out = f(v)[0]
assert numpy.allclose(out, a.reshape(1, 1, shape[0], shape[1]) + v + 2)
# print "Expected collapse to 2 dimensions"
示例25
def test_elemwise_collapse6():
""" Test when all inputs have two broadcastable dimension at the
beginning"""
shape = (4, 5)
a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
dtype='float32'))
a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
a2 = tcn.shared_constructor(a, 'a')
a3 = a2.dimshuffle('x', 'x', 0, 1)
b = tcn.CudaNdarrayType((True, True, False, False))()
f = pfunc([b], [a3 + b], mode=mode_with_gpu)
v = theano._asarray(numpy.random.rand(1, 1, shape[0], shape[1]),
dtype='float32')
v = cuda_ndarray.CudaNdarray(v)
# let debugmode catch errors
out = f(v)[0]
assert numpy.allclose(out, a.reshape(1, 1, shape[0], shape[1]) + v)
# print "Expected collapse to c contiguous"
示例26
def test_elemwise_collapse7(atol=1e-6):
""" Test when one input have one broadcastable dimension and the
other is a scalar"""
shape = (5, 4, 1)
a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
dtype='float32'))
a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
a2 = tcn.shared_constructor(a.copy(), 'a')
a3 = a2.dimshuffle(0, 'x', 1, 2)
f = pfunc([], [a3 + 2], mode=mode_with_gpu)
# let debugmode catch errors
out = f()[0]
ans = (a + 2).reshape(shape[0], 1, shape[1], shape[2])
assert numpy.allclose(out, ans, atol=atol)
# print "Expected collapse to c contiguous"
示例27
def test_deepcopy():
a = cuda.fmatrix()
a_v = cuda.CudaNdarray(numpy.zeros((3, 4), dtype='float32'))
# We force the c code to check that we generate c code
mode = theano.Mode("c", mode_with_gpu.optimizer)
f = theano.function([a], a, mode=mode)
theano.printing.debugprint(f)
out = f(a_v)
assert out is not a_v
assert numpy.allclose(numpy.asarray(a_v), numpy.asarray(out))
# We force the python linker as the default code should work for this op
mode = theano.Mode("py", mode_with_gpu.optimizer)
f = theano.function([a], a, mode=mode)
theano.printing.debugprint(f)
out = f(a_v)
assert out is not a_v
assert numpy.allclose(numpy.asarray(a_v), numpy.asarray(out))
示例28
def test_output_broadcast_cuda(self):
from theano.sandbox import cuda
if not cuda.cuda_available:
raise SkipTest("Optional package Cuda disabled")
if cuda.use.device_number is None:
# We should normally set VecAsRowAndCol as a GPUOp But we
# don't want to do this here as this will disable others
# tests in this file. So we manually init the GPU if
# needed to remove warning.
cuda.use("gpu",
force=True,
default_to_move_computation_to_gpu=False,
move_shared_float32_to_gpu=False,
enable_cuda=False)
v = cuda.fvector('v')
c, r = VecAsRowAndCol()(v)
f = theano.function([v], [c, r])
v_val = cuda.CudaNdarray(self.rng.randn(5).astype('float32'))
f(v_val)
示例29
def garray_to_cudandarray(x):
""" take a gnumpy.garray and make a CudaNdarray that point to its memory
"""
if not isinstance(x, gnumpy.garray):
raise ValueError("We can transfer only gnumpy.garray to CudaNdarray")
# elif x.dtype != "float32":
# raise ValueError("CudaNdarray support only float32")
# We don't need this, because cudamat is always float32.
else:
strides = [1]
for i in x.shape[::-1][:-1]:
strides.append(strides[-1] * i)
strides = strides[::-1]
for i in range(len(strides)):
if x.shape[i] == 1:
strides[i] = 0
strides = tuple(strides)
import ctypes
ptr_long = long(ctypes.cast(x._base.mat.data_device, ctypes.c_void_p).value)
# seems legit.
z = cuda.from_gpu_pointer(ptr_long, x.shape, strides, x._base)
return z
示例30
def test_to_cudandarray():
px = pycuda.gpuarray.zeros((3, 4, 5), 'float32')
cx = to_cudandarray(px)
assert isinstance(cx, cuda.CudaNdarray)
assert numpy.allclose(px.get(),
numpy.asarray(cx))
assert px.dtype == cx.dtype
assert px.shape == cx.shape
assert all(numpy.asarray(cx._strides) * 4 == px.strides)
try:
px = pycuda.gpuarray.zeros((3, 4, 5), 'float64')
to_cudandarray(px)
assert False
except ValueError:
pass
try:
to_cudandarray(numpy.zeros(4))
assert False
except ValueError:
pass