Python源码示例:theano.sandbox.cuda.CudaNdarray()

示例1
def test_output_broadcast_cuda(self):
        from theano.sandbox import cuda
        if not cuda.cuda_available:
            raise SkipTest("Optional package Cuda disabled")
        if cuda.use.device_number is None:
            # We should normally set VecAsRowAndCol as a GPUOp But we
            # don't want to do this here as this will disable others
            # tests in this file.  So we manually init the GPU if
            # needed to remove warning.
            cuda.use("gpu",
                     force=True,
                     default_to_move_computation_to_gpu=False,
                     move_shared_float32_to_gpu=False,
                     enable_cuda=False)
        v = cuda.fvector('v')
        c, r = VecAsRowAndCol()(v)
        f = theano.function([v], [c, r])

        v_val = cuda.CudaNdarray(self.rng.randn(5).astype('float32'))
        f(v_val) 
示例2
def garray_to_cudandarray(x):
        """ take a gnumpy.garray and make a CudaNdarray that point to its memory
        """
        if not isinstance(x, gnumpy.garray):
            raise ValueError("We can transfer only gnumpy.garray to CudaNdarray")
        # elif x.dtype != "float32":
        #     raise ValueError("CudaNdarray support only float32")
        # We don't need this, because cudamat is always float32.
        else:
            strides = [1]
            for i in x.shape[::-1][:-1]:
                strides.append(strides[-1] * i)
            strides = strides[::-1]
            for i in range(len(strides)):
                if x.shape[i] == 1:
                    strides[i] = 0
            strides = tuple(strides)

            import ctypes
            ptr_long = long(ctypes.cast(x._base.mat.data_device, ctypes.c_void_p).value)

            # seems legit.
            z = cuda.from_gpu_pointer(ptr_long, x.shape, strides, x._base)
            return z 
示例3
def test_to_cudandarray():
    px = pycuda.gpuarray.zeros((3, 4, 5), 'float32')
    cx = to_cudandarray(px)
    assert isinstance(cx, cuda.CudaNdarray)
    assert numpy.allclose(px.get(),
                          numpy.asarray(cx))
    assert px.dtype == cx.dtype
    assert px.shape == cx.shape
    assert all(numpy.asarray(cx._strides) * 4 == px.strides)

    try:
        px = pycuda.gpuarray.zeros((3, 4, 5), 'float64')
        to_cudandarray(px)
        assert False
    except ValueError:
        pass

    try:
        to_cudandarray(numpy.zeros(4))
        assert False
    except ValueError:
        pass 
示例4
def test_pycuda_theano():
    """Simple example with pycuda function and Theano CudaNdarray object."""
    from pycuda.compiler import SourceModule
    mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
{
  const int i = threadIdx.x;
  dest[i] = a[i] * b[i];
}
""")

    multiply_them = mod.get_function("multiply_them")

    a = numpy.random.randn(100).astype(numpy.float32)
    b = numpy.random.randn(100).astype(numpy.float32)

    # Test with Theano object
    ga = cuda_ndarray.CudaNdarray(a)
    gb = cuda_ndarray.CudaNdarray(b)
    dest = cuda_ndarray.CudaNdarray.zeros(a.shape)
    multiply_them(dest, ga, gb,
                  block=(400, 1, 1), grid=(1, 1))
    assert (numpy.asarray(dest) == a * b).all() 
示例5
def to_cudandarray(x):
    """ take a pycuda.gpuarray.GPUArray and make a CudaNdarray that point to its memory

    :note: CudaNdarray support only float32, so only float32 GPUArray are accepted
    """
    if not isinstance(x, pycuda.gpuarray.GPUArray):
        raise ValueError("We can transfer only pycuda.gpuarray.GPUArray to CudaNdarray")
    elif x.dtype != "float32":
        raise ValueError("CudaNdarray support only float32")
    else:
        strides = [1]
        for i in x.shape[::-1][:-1]:
            strides.append(strides[-1] * i)
        strides = tuple(strides[::-1])
        ptr = int(x.gpudata)  # in pycuda trunk, y.ptr also works, which is a little cleaner
        z = cuda.from_gpu_pointer(ptr, x.shape, strides, x)
        return z 
示例6
def test_host_to_device():
    #print >>sys.stdout, 'starting test_host_to_dev'
    for shape in ((), (3,), (2, 3), (3, 4, 5, 6)):
        a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        c = numpy.asarray(b)
        assert numpy.all(a == c)

        # test with float32 dtype
        d = numpy.asarray(b, dtype='float32')
        assert numpy.all(a == d)

        # test with not float32 dtype
        try:
            numpy.asarray(b, dtype='int8')
            assert False
        except TypeError:
            pass 
示例7
def test_exp():
    #print >>sys.stdout, 'starting test_exp'
    for shape in ((), (3,), (2, 3),
                  (1, 10000000), (10, 1000000),
                  (100, 100000), (1000, 10000), (10000, 1000)):
        a0 = theano._asarray(numpy.random.rand(*shape), dtype='float32')
        a1 = a0.copy()
        b0 = cuda_ndarray.CudaNdarray(a0)
        b1 = cuda_ndarray.CudaNdarray(a1)
        t0 = time.time()
        bsum = b0.exp()
        t1 = time.time()
        gpu_dt = t1 - t0
        t0 = time.time()
        asum = numpy.exp(a1)
        t1 = time.time()
        cpu_dt = t1 - t0
        # print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
        #c = numpy.asarray(b0+b1)
        if asum.shape:
            assert numpy.allclose(asum, numpy.asarray(bsum)) 
示例8
def test_copy():
    #print >>sys.stdout, 'starting test_copy'
    shape = (500, 499)
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')

    #print >>sys.stdout, '.. creating device object'
    b = cuda_ndarray.CudaNdarray(a)

    #print >>sys.stdout, '.. copy'
    c = copy.copy(b)
    #print >>sys.stdout, '.. deepcopy'
    d = copy.deepcopy(b)

    #print >>sys.stdout, '.. comparisons'
    assert numpy.allclose(a, numpy.asarray(b))
    assert numpy.allclose(a, numpy.asarray(c))
    assert numpy.allclose(a, numpy.asarray(d))
    b += b
    assert numpy.allclose(a+a, numpy.asarray(b))
    assert numpy.allclose(a+a, numpy.asarray(c))
    assert numpy.allclose(a, numpy.asarray(d)) 
示例9
def test_getshape():
    shapelist = [
            ((1, 2, 3), (1, 2, 3)),
            ((1,), (1,)),
            ((1, 2, 3), (3, 2, 1)),
            ((1, 2, 3), (6,)),
            ((1, 2, 3, 2), (6, 2)),
            ((2, 3, 2), (6, 2))
             ]

    def subtest(shape):
        a = theano._asarray(numpy.random.rand(*shape_1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert b.shape == a.shape

    for shape_1, shape_2 in shapelist:
        subtest(shape_1)
        subtest(shape_2) 
示例10
def test_stride_manipulation():

    a = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float32')
    b = cuda_ndarray.CudaNdarray(a)
    v = b.view()
    v._dev_data += 0
    c = numpy.asarray(v)
    assert numpy.all(a == c)

    sizeof_float = 4
    offset = 0

    b_strides = b._strides
    for i in xrange(len(b.shape)):
        offset += (b.shape[i]-1) * b_strides[i]
        v._set_stride(i, -b_strides[i])

    v._dev_data += offset * sizeof_float
    c = numpy.asarray(v)

    assert numpy.all(c == [[5, 4, 3], [2, 1, 0]]) 
示例11
def test_setitem_matrixscalar0():
    a = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray(8, dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)

    # set an element to 8
    _a[1, 1] = _b
    a[1, 1] = b
    assert numpy.allclose(a, numpy.asarray(_a))

    # test direct transfert from numpy
    _a[1, 1] = theano._asarray(888, dtype='float32')
    a[1, 1] = theano._asarray(888, dtype='float32')
    assert numpy.allclose(a, numpy.asarray(_a))

    # broadcast a 0
    _a[1, 1] = 0
    _a[0:2] = 0
    _a[1:] = 0 
示例12
def test_setitem_matrixvector1():
    a = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([8, 9], dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)

    # set second column to 8,9
    _a[:, 1] = _b
    a[:, 1] = b
    assert numpy.allclose(a, numpy.asarray(_a))

    # test direct transfert from numpy
    _a[:, 1] =  b*100
    a[:, 1] =  b*100
    assert numpy.allclose(a, numpy.asarray(_a))

    row = theano._asarray([777, 888, 999], dtype='float32')
    _a[1, :] = row
    a[1, :] = row
    assert numpy.allclose(a, numpy.asarray(_a)) 
示例13
def test_setitem_matrix_bad_shape():
    a = numpy.arange(27)
    a.resize((3, 3, 3))
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([7, 8], dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)

    try:
        # attempt to assign the ndarray b with setitem
        _a[:, 1, 1] = _b
        assert False
    except ValueError as e:
        # print e
        assert True

    # test direct transfert from numpy
    try:
        # attempt to assign the ndarray b with setitem
        _a[1, 1, :] = b
        assert False
    except ValueError as e:
        # print e
        assert True 
示例14
def test_setitem_matrix_bad_ndim():
    a = numpy.arange(27)
    a.resize((3, 3, 3))
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([7, 8], dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)

    try:
        # attempt to assign the ndarray b with setitem
        _a[:, :, 1] = _b
        assert False
    except ValueError as e:
        # print e
        assert True

    # test direct transfert from numpy
    try:
        # attempt to assign the ndarray b with setitem
        _a[1, :, :] = b
        assert False
    except ValueError as e:
        # print e
        assert True 
示例15
def test_setitem_matrix_bad_type():
    a = numpy.arange(27)
    a.resize((3, 3, 3))
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([7, 8], dtype='float64')

    # test direct transfert from numpy
    try:
        # attempt to assign the ndarray b with setitem
        _a[1, :, :] = b
        assert False
    except TypeError as e:
        # print e
        assert True 
示例16
def test_setitem_assign_to_slice():
    a = numpy.arange(27)
    a.resize((3, 3, 3))
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([7, 8, 9], dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)

    # first get a slice of a
    _c = _a[:, :, 1]

    # set middle row through cube to 7,8,9
    # (this corresponds to middle row of matrix _c)
    _c[:, 1] = _b

    a[:, :, 1][:, 1] = b
    assert numpy.allclose(a, numpy.asarray(_a))

    # test direct transfert from numpy
    _d = _a[1, :, :]
    _d[1, :] = b*10
    a[1, :, :][1, :] = b*10
    assert numpy.allclose(a, numpy.asarray(_a)) 
示例17
def test_base():
    # Test that the 'base' attribute of a CudaNdarray is the one
    # built initially, not an intermediate one.
    a = cuda_ndarray.CudaNdarray.zeros((3, 4, 5))
    for i in xrange(5):
        b = a[:]
    assert b.base is a

    c = a[0]
    d = c[:, 0]
    # print d.shape
    assert c.base is a
    assert d.base is a

    e = b.reshape((5, 2, 2, 3))
    assert e.base is a 
示例18
def test_set_strides():
    a = cuda_ndarray.CudaNdarray.zeros((5, 5))

    # Test with tuple
    new_strides = (a.strides[1], a.strides[0])
    a.strides = new_strides
    assert a.strides == new_strides

    # Test with list
    new_strides = (a.strides[1], a.strides[0])
    a.strides = [a.strides[1], a.strides[0]]
    assert a.strides == new_strides

    try:
        a.strides = (a.strides[1],)
        assert False
    except ValueError:
        pass

    try:
        a.strides = (1, 1, 1)
        assert False
    except ValueError:
        pass 
示例19
def speed_elemwise_collapse():
    """ used to time if the collapse of ccontiguous dims are useful """

    shape = (30, 40, 50, 600)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2[:, ::2, :, :]
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = a3 + b * tensor.exp(1 + b ** a3)
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    v = v[:, ::2, :, :]
    v = cuda_ndarray.CudaNdarray(v)
    t1 = time.time()
    for i in range(100):
        # let debugmode catch errors
        f(v)
    t2 = time.time() 
示例20
def speed_elemwise_collapse2():
    """ used to test the speed up of the generalised collapse of
    ccontiguous dims"""

    shape = (30, 40, 50, 600)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2[:, :, :, ::2]
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = a3 + b * tensor.exp(1 + b ** a3)
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    v = v[:, :, :, ::2]
    v = cuda_ndarray.CudaNdarray(v)
    t1 = time.time()
    for i in range(100):
        # let debugmode catch errors
        f(v)
    t2 = time.time() 
示例21
def test_elemwise_collapse():
    """ Test when all inputs have one(and the same) broadcastable dimension """

    shape = (4, 5, 60)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle(0, 'x', 1, 2)
    b = tcn.CudaNdarrayType((False, True, False, False))()
    c = a3 + b
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(shape[0], 1, *shape[1:]),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)

    # let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(shape[0], 1, *shape[1:]) + v)
    # print "Expected collapse of all dimensions" 
示例22
def test_elemwise_collapse2():
    """ Test when only one inputs have one broadcastable dimension """

    shape = (4, 5, 9)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle(0, 'x', 1, 2)
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = a3 + b
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(shape[0], 5, *shape[1:]),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)
    # let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(shape[0], 1, *shape[1:]) + v)
    # print "Expected collapse to 3 dimensions" 
示例23
def test_elemwise_collapse4():
    """ Test when only one inputs have two broadcastable dimension at
    each ends and we add a scalar"""

    shape = (4, 5)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle('x', 0, 1, 'x')
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = (a3 + b + 2)
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(5, shape[0], shape[1], 4),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)
    # let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(1, shape[0], shape[1], 1) + v + 2)
    # print "Expected collapse to 3 dimensions" 
示例24
def test_elemwise_collapse5():
    """ Test when only one inputs have two broadcastable dimension at
    the beginning and we add a scalar"""

    shape = (4, 5)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle('x', 'x', 0, 1)
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = (a3 + b + 2)
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(5, 4, shape[0], shape[1]),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)

    # let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(1, 1, shape[0], shape[1]) + v + 2)
    # print "Expected collapse to 2 dimensions" 
示例25
def test_elemwise_collapse6():
    """ Test when all inputs have two broadcastable dimension at the
    beginning"""

    shape = (4, 5)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle('x', 'x', 0, 1)
    b = tcn.CudaNdarrayType((True, True, False, False))()
    f = pfunc([b], [a3 + b], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(1, 1, shape[0], shape[1]),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)
    # let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(1, 1, shape[0], shape[1]) + v)
    # print "Expected collapse to c contiguous" 
示例26
def test_elemwise_collapse7(atol=1e-6):
    """ Test when one input have one broadcastable dimension and the
    other is a scalar"""

    shape = (5, 4, 1)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a.copy(), 'a')
    a3 = a2.dimshuffle(0, 'x', 1, 2)
    f = pfunc([], [a3 + 2], mode=mode_with_gpu)

    # let debugmode catch errors
    out = f()[0]
    ans = (a + 2).reshape(shape[0], 1, shape[1], shape[2])
    assert numpy.allclose(out, ans, atol=atol)
    # print "Expected collapse to c contiguous" 
示例27
def test_deepcopy():
    a = cuda.fmatrix()
    a_v = cuda.CudaNdarray(numpy.zeros((3, 4), dtype='float32'))

    # We force the c code to check that we generate c code
    mode = theano.Mode("c", mode_with_gpu.optimizer)
    f = theano.function([a], a, mode=mode)
    theano.printing.debugprint(f)
    out = f(a_v)
    assert out is not a_v
    assert numpy.allclose(numpy.asarray(a_v), numpy.asarray(out))

    # We force the python linker as the default code should work for this op
    mode = theano.Mode("py", mode_with_gpu.optimizer)
    f = theano.function([a], a, mode=mode)
    theano.printing.debugprint(f)
    out = f(a_v)
    assert out is not a_v
    assert numpy.allclose(numpy.asarray(a_v), numpy.asarray(out)) 
示例28
def test_output_broadcast_cuda(self):
        from theano.sandbox import cuda
        if not cuda.cuda_available:
            raise SkipTest("Optional package Cuda disabled")
        if cuda.use.device_number is None:
            # We should normally set VecAsRowAndCol as a GPUOp But we
            # don't want to do this here as this will disable others
            # tests in this file.  So we manually init the GPU if
            # needed to remove warning.
            cuda.use("gpu",
                     force=True,
                     default_to_move_computation_to_gpu=False,
                     move_shared_float32_to_gpu=False,
                     enable_cuda=False)
        v = cuda.fvector('v')
        c, r = VecAsRowAndCol()(v)
        f = theano.function([v], [c, r])

        v_val = cuda.CudaNdarray(self.rng.randn(5).astype('float32'))
        f(v_val) 
示例29
def garray_to_cudandarray(x):
        """ take a gnumpy.garray and make a CudaNdarray that point to its memory
        """
        if not isinstance(x, gnumpy.garray):
            raise ValueError("We can transfer only gnumpy.garray to CudaNdarray")
        # elif x.dtype != "float32":
        #     raise ValueError("CudaNdarray support only float32")
        # We don't need this, because cudamat is always float32.
        else:
            strides = [1]
            for i in x.shape[::-1][:-1]:
                strides.append(strides[-1] * i)
            strides = strides[::-1]
            for i in range(len(strides)):
                if x.shape[i] == 1:
                    strides[i] = 0
            strides = tuple(strides)

            import ctypes
            ptr_long = long(ctypes.cast(x._base.mat.data_device, ctypes.c_void_p).value)

            # seems legit.
            z = cuda.from_gpu_pointer(ptr_long, x.shape, strides, x._base)
            return z 
示例30
def test_to_cudandarray():
    px = pycuda.gpuarray.zeros((3, 4, 5), 'float32')
    cx = to_cudandarray(px)
    assert isinstance(cx, cuda.CudaNdarray)
    assert numpy.allclose(px.get(),
                          numpy.asarray(cx))
    assert px.dtype == cx.dtype
    assert px.shape == cx.shape
    assert all(numpy.asarray(cx._strides) * 4 == px.strides)

    try:
        px = pycuda.gpuarray.zeros((3, 4, 5), 'float64')
        to_cudandarray(px)
        assert False
    except ValueError:
        pass

    try:
        to_cudandarray(numpy.zeros(4))
        assert False
    except ValueError:
        pass