Python源码示例:theano.sandbox.cuda.cuda_available()

示例1
def test_output_broadcast_cuda(self):
        from theano.sandbox import cuda
        if not cuda.cuda_available:
            raise SkipTest("Optional package Cuda disabled")
        if cuda.use.device_number is None:
            # We should normally set VecAsRowAndCol as a GPUOp But we
            # don't want to do this here as this will disable others
            # tests in this file.  So we manually init the GPU if
            # needed to remove warning.
            cuda.use("gpu",
                     force=True,
                     default_to_move_computation_to_gpu=False,
                     move_shared_float32_to_gpu=False,
                     enable_cuda=False)
        v = cuda.fvector('v')
        c, r = VecAsRowAndCol()(v)
        f = theano.function([v], [c, r])

        v_val = cuda.CudaNdarray(self.rng.randn(5).astype('float32'))
        f(v_val) 
示例2
def test_deterministic():
    seed = utt.fetch_seed()
    sample_size = (10, 20)

    test_use_cuda = [False]
    if cuda_available:
        test_use_cuda.append(True)

    for use_cuda in test_use_cuda:
        # print 'use_cuda =', use_cuda
        R = MRG_RandomStreams(seed=seed, use_cuda=use_cuda)
        u = R.uniform(size=sample_size)
        f = theano.function([], u)

        fsample1 = f()
        fsample2 = f()
        assert not numpy.allclose(fsample1, fsample2)

        R2 = MRG_RandomStreams(seed=seed, use_cuda=use_cuda)
        u2 = R2.uniform(size=sample_size)
        g = theano.function([], u2)
        gsample1 = g()
        gsample2 = g()
        assert numpy.allclose(fsample1, gsample1)
        assert numpy.allclose(fsample2, gsample2) 
示例3
def test_GPU_nstreams_limit():
    """
    Verify that a ValueError is raised when n_streams
    is greater than 2**20 on GPU. This is the value of
    (NUM_VECTOR_OP_THREADS_PER_BLOCK * NUM_VECTOR_OP_BLOCKS).

    """
    if not cuda_available:
        raise SkipTest('Optional package cuda not available')

    seed = 12345
    R = MRG_RandomStreams(seed=seed, use_cuda=True)

    def eval_uniform(size, nstreams):
        if theano.config.mode == "FAST_COMPILE":
            mode = "FAST_RUN"
        else:
            mode = copy.copy(theano.compile.get_default_mode())
            mode.check_py_code = False
        out = R.uniform(size=size, nstreams=nstreams, dtype='float32')
        f = theano.function([], out, mode=mode)
        return f()

    eval_uniform((10,), 2**20)
    assert_raises(ValueError, eval_uniform, (10,), 2**20 + 1) 
示例4
def test_overflow_gpu_old_backend():
    # run with THEANO_FLAGS=mode=FAST_RUN,init_gpu_device=gpu1,device=cpu
    if not cuda_available:
        raise SkipTest('Optional package cuda not available')
    mode = mode_with_gpu
    seed = 12345
    rng = MRG_RandomStreams(seed=seed, use_cuda=True)
    fct = rng.uniform
    # should raise error as the size overflows
    sizes = [(2**31, ), (2**32, ), (2**15, 2**16,), (2, 2**15, 2**15)]
    rng_mrg_overflow(sizes, fct, mode, should_raise_error=True)
    # should not raise error
    sizes = [(2**5, ), (2**5, 2**5), (2**5, 2**5, 2**5)]
    rng_mrg_overflow(sizes, fct, mode, should_raise_error=False)
    # should support int32 sizes
    sizes = [(numpy.int32(2**10), ),
             (numpy.int32(2), numpy.int32(2**10), numpy.int32(2**10))]
    rng_mrg_overflow(sizes, fct, mode, should_raise_error=False) 
示例5
def test_output_broadcast_cuda(self):
        from theano.sandbox import cuda
        if not cuda.cuda_available:
            raise SkipTest("Optional package Cuda disabled")
        if cuda.use.device_number is None:
            # We should normally set VecAsRowAndCol as a GPUOp But we
            # don't want to do this here as this will disable others
            # tests in this file.  So we manually init the GPU if
            # needed to remove warning.
            cuda.use("gpu",
                     force=True,
                     default_to_move_computation_to_gpu=False,
                     move_shared_float32_to_gpu=False,
                     enable_cuda=False)
        v = cuda.fvector('v')
        c, r = VecAsRowAndCol()(v)
        f = theano.function([v], [c, r])

        v_val = cuda.CudaNdarray(self.rng.randn(5).astype('float32'))
        f(v_val) 
示例6
def test_unpickle_cudandarray_as_numpy_ndarray_flag0():
    oldflag = config.experimental.unpickle_gpu_on_cpu
    config.experimental.unpickle_gpu_on_cpu = False

    try:
        testfile_dir = os.path.dirname(os.path.realpath(__file__))
        fname = 'CudaNdarray.pkl'

        with open(os.path.join(testfile_dir, fname), 'rb') as fp:
            if PY3:
                u = CompatUnpickler(fp, encoding="latin1")
            else:
                u = CompatUnpickler(fp)
            if cuda_available:
                mat = u.load()
                assert isinstance(mat, CudaNdarray)
                assert numpy.asarray(mat)[0] == -42.0
            else:
                assert_raises(ImportError, u.load)
    finally:
        config.experimental.unpickle_gpu_on_cpu = oldflag 
示例7
def test_deterministic():
    seed = utt.fetch_seed()
    sample_size = (10, 20)

    test_use_cuda = [False]
    if cuda_available:
        test_use_cuda.append(True)

    for use_cuda in test_use_cuda:
        # print 'use_cuda =', use_cuda
        R = MRG_RandomStreams(seed=seed, use_cuda=use_cuda)
        u = R.uniform(size=sample_size)
        f = theano.function([], u)

        fsample1 = f()
        fsample2 = f()
        assert not numpy.allclose(fsample1, fsample2)

        R2 = MRG_RandomStreams(seed=seed, use_cuda=use_cuda)
        u2 = R2.uniform(size=sample_size)
        g = theano.function([], u2)
        gsample1 = g()
        gsample2 = g()
        assert numpy.allclose(fsample1, gsample1)
        assert numpy.allclose(fsample2, gsample2) 
示例8
def test_GPU_nstreams_limit():
    """
    Verify that a ValueError is raised when n_streams
    is greater than 2**20 on GPU. This is the value of
    (NUM_VECTOR_OP_THREADS_PER_BLOCK * NUM_VECTOR_OP_BLOCKS).

    """
    if not cuda_available:
        raise SkipTest('Optional package cuda not available')

    seed = 12345
    R = MRG_RandomStreams(seed=seed, use_cuda=True)

    def eval_uniform(size, nstreams):
        if theano.config.mode == "FAST_COMPILE":
            mode = "FAST_RUN"
        else:
            mode = copy.copy(theano.compile.get_default_mode())
            mode.check_py_code = False
        out = R.uniform(size=size, nstreams=nstreams, dtype='float32')
        f = theano.function([], out, mode=mode)
        return f()

    eval_uniform((10,), 2**20)
    assert_raises(ValueError, eval_uniform, (10,), 2**20 + 1) 
示例9
def test_maxout_conv_c01b_basic(self):
        if cuda.cuda_available is False:
            raise SkipTest('Optional package cuda disabled')
        if not hasattr(cuda, 'unuse'):
            raise Exception("Theano version too old to run this test!")
        # Tests that we can run a small convolutional model on GPU,
        assert cuda.cuda_enabled is False
        # Even if there is a GPU, but the user didn't specify device=gpu
        # we want to run this test.
        try:
            old_floatX = config.floatX
            cuda.use('gpu')
            config.floatX = 'float32'
            train = yaml_parse.load(yaml_string_maxout_conv_c01b_basic)
            train.main_loop()
        finally:
            config.floatX = old_floatX
            cuda.unuse()
        assert cuda.cuda_enabled is False 
示例10
def check_cuda(feature_name="You are using code that relies on cuda-convnet. Cuda-convnet",
               check_enabled=True):
    """
    Call this function before sections of code that depend on the cuda_convnet module.
    It will raise a RuntimeError if the GPU is not available.

    feature_name: The name of the feature the user should be told is unavailable.
    """
    if not cuda.cuda_available:
        raise RuntimeError("%s only runs on GPUs, but there doesn't "
                "seem to be a GPU available. If you would like assistance making "
                "a CPU version of convolutional maxout, contact "
                "pylearn-dev@googlegroups.com." % feature_name)

    if not hasattr(cuda.cuda_ndarray.cuda_ndarray, 'cublas_v2'):
        warnings.warn(
            "You are using probably a too old Theano version. That"
            " will cause compilation crash. If so, update Theano.")
    elif not cuda.cuda_ndarray.cuda_ndarray.cublas_v2():
        raise RuntimeError("You are using probably a too old Theano version."
                           " That will cause compilation crash. Update Theano")

    if check_enabled and not cuda.cuda_enabled:
        raise RuntimeError("%s must run be with theano configured to use the GPU" % feature_name) 
示例11
def test_cuda(self):
        import theano.sandbox.cuda as cuda
        if not cuda.cuda_available:
            raise SkipTest("Optional package cuda not available")

        a = sparse.csr_matrix('a', dtype='float32')
        b = cuda.float32_shared_constructor(
            numpy.random.rand(3, 4).astype('float32'))
        d = sparse.dot(a, b)
        f = theano.function([a], d)

        a_val = scipy.sparse.csr_matrix(random_lil((5, 3), 'float32', 5))
        d_theano = f(a_val)
        d_numpy = a_val * b.get_value()
        utt.assert_allclose(d_numpy, d_theano) 
示例12
def contains_nan(arr, node=None):
    """
    Test whether a numpy.ndarray contains any `np.nan` values.

    Parameters
    ----------
    arr : np.ndarray or output of any Theano op
    node : None or an Apply instance.
        If arr is the output of a Theano op, the node associated to it.

    Returns
    -------
    contains_nan : bool
        `True` if the array contains any `np.nan` values, `False` otherwise.

    Notes
    -----
    Tests for the presence of `np.nan`'s using `np.isnan(np.min(ndarray))`.
    This approach is faster and more memory efficient than the obvious
    alternative, calling `np.any(np.isnan(ndarray))`, which requires the
    construction of a boolean array with the same shape as the input array.

    """
    if isinstance(arr, theano.gof.type.CDataType._cdata_type):
        return False
    elif isinstance(arr, np.random.mtrand.RandomState):
        return False
    elif arr.size == 0:
        return False
    elif cuda.cuda_available and isinstance(arr, cuda.CudaNdarray):
        if (hasattr(theano.sandbox, 'rng_mrg') and
            isinstance(
                node.op,
                # It store ints in float container
                theano.sandbox.rng_mrg.GPU_mrg_uniform)):
            return False
        else:
            compile_gpu_func(True, False, False)
            return np.isnan(f_gpumin(arr.reshape(arr.size)))

    return np.isnan(np.min(arr)) 
示例13
def compile_gpu_func(nan_is_error, inf_is_error, big_is_error):
    """ compile utility function used by contains_nan and contains_inf
    """
    global f_gpumin, f_gpumax, f_gpuabsmax
    if not cuda.cuda_available:
        return
    guard_input = cuda.fvector('nan_guard')
    cuda_compile_failed = False
    if (nan_is_error or inf_is_error) and f_gpumin is None:
        try:
            f_gpumin = theano.function(
                [guard_input], T.min(guard_input),
                mode='FAST_RUN'
            )
        except RuntimeError:
            # This can happen if cuda is available, but the
            # device is in exclusive mode and used by another
            # process.
            cuda_compile_failed = True
    if inf_is_error and not cuda_compile_failed and f_gpumax is None:
        try:
            f_gpumax = theano.function(
                [guard_input], T.max(guard_input),
                mode='FAST_RUN'
            )
        except RuntimeError:
            # This can happen if cuda is available, but the
            # device is in exclusive mode and used by another
            # process.
            cuda_compile_failed = True
    if big_is_error and not cuda_compile_failed and f_gpuabsmax is None:
        try:
            f_gpuabsmax = theano.function(
                [guard_input], T.max(T.abs_(guard_input)),
                mode='FAST_RUN'
                )
        except RuntimeError:
            # This can happen if cuda is available, but the
            # device is in exclusive mode and used by another
            # process.
            cuda_compile_failed = True 
示例14
def test_viewop_gpu():
    from theano.sandbox import cuda
    if cuda.cuda_available == False:
        raise SkipTest('Optional package cuda disabled')
    _x = theano.tensor.fvector('x')
    x = cuda.gpu_from_host(_x)
    _out = theano.compile.ViewOp()(x)
    out = cuda.host_from_gpu(_out)
    f = theano.function([x],
                        out,
                       mode=mode_with_gpu)
    data = numpy.array([1, 2, 3], dtype='float32')
    assert numpy.allclose(f(data), data) 
示例15
def test_unpickle_cudandarray_as_numpy_ndarray_flag0():
    oldflag = config.experimental.unpickle_gpu_on_cpu
    config.experimental.unpickle_gpu_on_cpu = False

    try:
        testfile_dir = os.path.dirname(os.path.realpath(__file__))
        fname = 'CudaNdarray.pkl'

        with open(os.path.join(testfile_dir, fname), 'rb') as fp:
            if PY3:
                u = CompatUnpickler(fp, encoding="latin1")
            else:
                u = CompatUnpickler(fp)
            if cuda_available:
                try:
                    mat = u.load()
                except ImportError:
                    # Windows sometimes fail with nonsensical errors like:
                    #   ImportError: No module named type
                    #   ImportError: No module named copy_reg
                    # when "type" and "copy_reg" are builtin modules.
                    if sys.platform == 'win32':
                        exc_type, exc_value, exc_trace = sys.exc_info()
                        reraise(SkipTest, exc_value, exc_trace)
                    raise

                assert isinstance(mat, CudaNdarray)
                assert numpy.asarray(mat)[0] == -42.0
            else:
                assert_raises(ImportError, u.load)
    finally:
        config.experimental.unpickle_gpu_on_cpu = oldflag 
示例16
def test_consistency_randomstreams():
    """
    Verify that the random numbers generated by MRG_RandomStreams
    are the same as the reference (Java) implementation by L'Ecuyer et al.

    """
    seed = 12345
    n_samples = 5
    n_streams = 12
    n_substreams = 7

    test_use_cuda = [False]
    if cuda_available:
        test_use_cuda.append(True)

    for use_cuda in test_use_cuda:
        # print 'use_cuda =', use_cuda
        samples = []
        rng = MRG_RandomStreams(seed=seed, use_cuda=use_cuda)
        for i in range(n_streams):
            stream_samples = []
            u = rng.uniform(size=(n_substreams,), nstreams=n_substreams)
            f = theano.function([], u)
            for j in range(n_samples):
                s = f()
                stream_samples.append(s)
            stream_samples = numpy.array(stream_samples)
            stream_samples = stream_samples.T.flatten()
            samples.append(stream_samples)

        samples = numpy.array(samples).flatten()
        assert(numpy.allclose(samples, java_samples)) 
示例17
def t_binomial(mean, size, const_size, var_input, input, steps, rtol):
    R = MRG_RandomStreams(234, use_cuda=False)
    u = R.binomial(size=size, p=mean)
    f = theano.function(var_input, u, mode=mode)
    out = f(*input)

    # Increase the number of steps if sizes implies only a few samples
    if numpy.prod(const_size) < 10:
        steps_ = steps * 100
    else:
        steps_ = steps
    basictest(f, steps_, const_size, prefix='mrg  cpu',
              inputs=input, allow_01=True,
              target_avg=mean, mean_rtol=rtol)

    if mode != 'FAST_COMPILE' and cuda_available:
        R = MRG_RandomStreams(234, use_cuda=True)
        u = R.binomial(size=size, p=mean, dtype='float32')
        # well, it's really that this test w GPU doesn't make sense otw
        assert u.dtype == 'float32'
        f = theano.function(var_input, theano.Out(
            theano.sandbox.cuda.basic_ops.gpu_from_host(u),
            borrow=True), mode=mode_with_gpu)
        gpu_out = numpy.asarray(f(*input))

        basictest(f, steps_, const_size, prefix='mrg  gpu',
                  inputs=input, allow_01=True,
                  target_avg=mean, mean_rtol=rtol)
        numpy.testing.assert_array_almost_equal(out, gpu_out,
                                                decimal=6)

    RR = theano.tensor.shared_randomstreams.RandomStreams(234)

    uu = RR.binomial(size=size, p=mean)
    ff = theano.function(var_input, uu, mode=mode)
    # It's not our problem if numpy generates 0 or 1
    basictest(ff, steps_, const_size, prefix='numpy', allow_01=True,
              inputs=input, target_avg=mean, mean_rtol=rtol) 
示例18
def test_multinomial_n_samples():
    mode_ = mode
    if mode == 'FAST_COMPILE':
        mode_ = 'FAST_RUN'

    if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE'] or
            mode == 'Mode' and config.linker in ['py']):
        sample_size = (49, 5)
    else:
        sample_size = (450, 6)
    mode_ = theano.compile.mode.get_mode(mode_)

    pvals = numpy.asarray(numpy.random.uniform(size=sample_size))
    pvals = numpy.apply_along_axis(lambda row: row / numpy.sum(row), 1, pvals)
    R = MRG_RandomStreams(234, use_cuda=False)

    for n_samples, steps in zip([5, 10, 100, 1000], [20, 10, 1, 1]):
        m = R.multinomial(pvals=pvals, n=n_samples,
                          dtype=config.floatX, nstreams=30 * 256)
        f = theano.function([], m, mode=mode_)
        basic_multinomialtest(f, steps, sample_size, pvals,
                              n_samples, prefix='mrg ')
        sys.stdout.flush()

        if mode != 'FAST_COMPILE' and cuda_available:
            R = MRG_RandomStreams(234, use_cuda=True)
            pvals = numpy.asarray(pvals, dtype='float32')
            n = R.multinomial(pvals=pvals, n=n_samples,
                              dtype='float32', nstreams=30 * 256)
            assert n.dtype == 'float32'
            f = theano.function(
                [],
                theano.sandbox.cuda.basic_ops.gpu_from_host(n),
                mode=mode_.including('gpu'))

            sys.stdout.flush()
            basic_multinomialtest(f, steps, sample_size, pvals,
                                  n_samples, prefix='gpu mrg ') 
示例19
def test_multinomial_0():
    # This tests the MultinomialFromUniform Op directly, not going through the
    # multinomial() call in GPU random generation.

    p = tensor.fmatrix()
    u = tensor.fvector()

    m = multinomial.MultinomialFromUniform('auto')(p, u)

    def body(mode, gpu):
        # the m*2 allows the multinomial to reuse output
        f = function([p, u], m * 2, allow_input_downcast=True, mode=mode)

        if gpu:
            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
                        for node in f.maker.fgraph.toposort()])

        # test that both first and second samples can be drawn
        utt.assert_allclose(f([[1, 0], [0, 1]], [.1, .1]),
                            [[2, 0], [0, 2]])

        # test that both second labels can be drawn
        r = f([[.2, .8], [.3, .7]], [.31, .31])
        utt.assert_allclose(r, [[0, 2], [0, 2]])

        # test that both first labels can be drawn
        r = f([[.2, .8], [.3, .7]], [.21, .21])
        utt.assert_allclose(r, [[0, 2], [2, 0]])

        # change the size to make sure output gets reallocated ok
        # and also make sure that the GPU version doesn't screw up the
        # transposed-ness
        r = f([[.2, .8]], [.25])
        utt.assert_allclose(r, [[0, 2]])

    run_with_c(body)
    if cuda.cuda_available:
        run_with_c(body, True)


# TODO: check a bigger example (make sure blocking on GPU is handled correctly) 
示例20
def test_multinomial_large():
    # DEBUG_MODE will test this on GPU
    def body(mode, gpu):
        p = tensor.fmatrix()
        u = tensor.fvector()
        m = multinomial.MultinomialFromUniform('auto')(p, u)
        f = function([p, u], m * 2, allow_input_downcast=True, mode=mode)
        if gpu:
            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
                        for node in f.maker.fgraph.toposort()])

        pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1
        pval = pval / pval.sum(axis=1)[:, None]
        uval = numpy.ones_like(pval[:, 0]) * 0.5
        mval = f(pval, uval)

        assert mval.shape == pval.shape
        if config.cast_policy == 'custom':
            assert mval.dtype == pval.dtype
        elif config.cast_policy == 'numpy+floatX':
            assert mval.dtype == config.floatX
        elif config.cast_policy == 'numpy':
            assert mval.dtype == 'float64'
        else:
            raise NotImplementedError(config.cast_policy)
        utt.assert_allclose(mval.sum(axis=1), 2)
        asdf = numpy.asarray([0, 0, 2, 0]) + 0 * pval
        utt.assert_allclose(mval, asdf)  # broadcast over all rows
    run_with_c(body)
    if cuda.cuda_available:
        run_with_c(body, True) 
示例21
def test_gpu_opt():
    if not cuda.cuda_available:
        # Skip test if cuda_ndarray is not available.
        from nose.plugins.skip import SkipTest
        raise SkipTest('Optional package cuda not available')

    # We test the case where we put the op on the gpu when the output
    # is moved to the gpu.
    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform('auto')(p, u)
    assert m.dtype == 'float32', m.dtype
    m_gpu = cuda.gpu_from_host(m)

    f = function([p, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
    assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
                for node in f.maker.fgraph.toposort()])
    pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = numpy.ones_like(pval[:, 0]) * 0.5
    f(pval, uval)

    # Test with a row, it was failing in the past.
    r = tensor.frow()
    m = multinomial.MultinomialFromUniform('auto')(r, u)
    assert m.dtype == 'float32', m.dtype
    m_gpu = cuda.gpu_from_host(m)

    f = function([r, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
    assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
                for node in f.maker.fgraph.toposort()])
    pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = numpy.ones_like(pval[:, 0]) * 0.5
    f(pval, uval) 
示例22
def test_cuda(self):
        import theano.sandbox.cuda as cuda
        if not cuda.cuda_available:
            raise SkipTest("Optional package cuda not available")

        a = sparse.csr_matrix('a', dtype='float32')
        b = cuda.float32_shared_constructor(
            numpy.random.rand(3, 4).astype('float32'))
        d = sparse.dot(a, b)
        f = theano.function([a], d)

        a_val = scipy.sparse.csr_matrix(random_lil((5, 3), 'float32', 5))
        d_theano = f(a_val)
        d_numpy = a_val * b.get_value()
        utt.assert_allclose(d_numpy, d_theano) 
示例23
def contains_nan(arr, node=None):
    """
    Test whether a numpy.ndarray contains any `np.nan` values.

    Parameters
    ----------
    arr : np.ndarray or output of any Theano op
    node : None or an Apply instance.
        If arr is the output of a Theano op, the node associated to it.

    Returns
    -------
    contains_nan : bool
        `True` if the array contains any `np.nan` values, `False` otherwise.

    Notes
    -----
    Tests for the presence of `np.nan`'s using `np.isnan(np.min(ndarray))`.
    This approach is faster and more memory efficient than the obvious
    alternative, calling `np.any(np.isnan(ndarray))`, which requires the
    construction of a boolean array with the same shape as the input array.

    """
    if isinstance(arr, theano.gof.type.CDataType._cdata_type):
        return False
    elif isinstance(arr, np.random.mtrand.RandomState):
        return False
    elif arr.size == 0:
        return False
    elif cuda.cuda_available and isinstance(arr, cuda.CudaNdarray):
        if (hasattr(theano.sandbox, 'rng_mrg') and
            isinstance(
                node.op,
                # It store ints in float container
                theano.sandbox.rng_mrg.GPU_mrg_uniform)):
            return False
        else:
            compile_gpu_func(True, False, False)
            return np.isnan(f_gpumin(arr.reshape(arr.size)))

    return np.isnan(np.min(arr)) 
示例24
def compile_gpu_func(nan_is_error, inf_is_error, big_is_error):
    """ compile utility function used by contains_nan and contains_inf
    """
    global f_gpumin, f_gpumax, f_gpuabsmax
    if not cuda.cuda_available:
        return
    guard_input = cuda.fvector('nan_guard')
    cuda_compile_failed = False
    if (nan_is_error or inf_is_error) and f_gpumin is None:
        try:
            f_gpumin = theano.function(
                [guard_input], T.min(guard_input),
                mode='FAST_RUN'
            )
        except RuntimeError:
            # This can happen if cuda is available, but the
            # device is in exclusive mode and used by another
            # process.
            cuda_compile_failed = True
    if inf_is_error and not cuda_compile_failed and f_gpumax is None:
        try:
            f_gpumax = theano.function(
                [guard_input], T.max(guard_input),
                mode='FAST_RUN'
            )
        except RuntimeError:
            # This can happen if cuda is available, but the
            # device is in exclusive mode and used by another
            # process.
            cuda_compile_failed = True
    if big_is_error and not cuda_compile_failed and f_gpuabsmax is None:
        try:
            f_gpuabsmax = theano.function(
                [guard_input], T.max(T.abs_(guard_input)),
                mode='FAST_RUN'
                )
        except RuntimeError:
            # This can happen if cuda is available, but the
            # device is in exclusive mode and used by another
            # process.
            cuda_compile_failed = True 
示例25
def test_viewop_gpu():
    from theano.sandbox import cuda
    if cuda.cuda_available == False:
        raise SkipTest('Optional package cuda disabled')
    _x = theano.tensor.fvector('x')
    x = cuda.gpu_from_host(_x)
    _out = theano.compile.ViewOp()(x)
    out = cuda.host_from_gpu(_out)
    f = theano.function([x],
                        out,
                       mode=mode_with_gpu)
    data = numpy.array([1, 2, 3], dtype='float32')
    assert numpy.allclose(f(data), data) 
示例26
def test_consistency_randomstreams():
    """
    Verify that the random numbers generated by MRG_RandomStreams
    are the same as the reference (Java) implementation by L'Ecuyer et al.

    """
    seed = 12345
    n_samples = 5
    n_streams = 12
    n_substreams = 7

    test_use_cuda = [False]
    if cuda_available:
        test_use_cuda.append(True)

    for use_cuda in test_use_cuda:
        # print 'use_cuda =', use_cuda
        samples = []
        rng = MRG_RandomStreams(seed=seed, use_cuda=use_cuda)
        for i in range(n_streams):
            stream_samples = []
            u = rng.uniform(size=(n_substreams,), nstreams=n_substreams)
            f = theano.function([], u)
            for j in range(n_samples):
                s = f()
                stream_samples.append(s)
            stream_samples = numpy.array(stream_samples)
            stream_samples = stream_samples.T.flatten()
            samples.append(stream_samples)

        samples = numpy.array(samples).flatten()
        assert(numpy.allclose(samples, java_samples)) 
示例27
def t_binomial(mean, size, const_size, var_input, input, steps, rtol):
    R = MRG_RandomStreams(234, use_cuda=False)
    u = R.binomial(size=size, p=mean)
    f = theano.function(var_input, u, mode=mode)
    out = f(*input)

    # Increase the number of steps if sizes implies only a few samples
    if numpy.prod(const_size) < 10:
        steps_ = steps * 100
    else:
        steps_ = steps
    basictest(f, steps_, const_size, prefix='mrg  cpu',
              inputs=input, allow_01=True,
              target_avg=mean, mean_rtol=rtol)

    if mode != 'FAST_COMPILE' and cuda_available:
        R = MRG_RandomStreams(234, use_cuda=True)
        u = R.binomial(size=size, p=mean, dtype='float32')
        # well, it's really that this test w GPU doesn't make sense otw
        assert u.dtype == 'float32'
        f = theano.function(var_input, theano.Out(
            theano.sandbox.cuda.basic_ops.gpu_from_host(u),
            borrow=True), mode=mode_with_gpu)
        gpu_out = numpy.asarray(f(*input))

        basictest(f, steps_, const_size, prefix='mrg  gpu',
                  inputs=input, allow_01=True,
                  target_avg=mean, mean_rtol=rtol)
        numpy.testing.assert_array_almost_equal(out, gpu_out,
                                                decimal=6)

    RR = theano.tensor.shared_randomstreams.RandomStreams(234)

    uu = RR.binomial(size=size, p=mean)
    ff = theano.function(var_input, uu, mode=mode)
    # It's not our problem if numpy generates 0 or 1
    basictest(ff, steps_, const_size, prefix='numpy', allow_01=True,
              inputs=input, target_avg=mean, mean_rtol=rtol) 
示例28
def test_multinomial_n_samples():
    mode_ = mode
    if mode == 'FAST_COMPILE':
        mode_ = 'FAST_RUN'

    if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE'] or
            mode == 'Mode' and config.linker in ['py']):
        sample_size = (49, 5)
    else:
        sample_size = (450, 6)
    mode_ = theano.compile.mode.get_mode(mode_)

    pvals = numpy.asarray(numpy.random.uniform(size=sample_size))
    pvals = numpy.apply_along_axis(lambda row: row / numpy.sum(row), 1, pvals)
    R = MRG_RandomStreams(234, use_cuda=False)

    for n_samples, steps in zip([5, 10, 100, 1000], [20, 10, 1, 1]):
        m = R.multinomial(pvals=pvals, n=n_samples,
                          dtype=config.floatX, nstreams=30 * 256)
        f = theano.function([], m, mode=mode_)
        basic_multinomialtest(f, steps, sample_size, pvals,
                              n_samples, prefix='mrg ')
        sys.stdout.flush()

        if mode != 'FAST_COMPILE' and cuda_available:
            R = MRG_RandomStreams(234, use_cuda=True)
            pvals = numpy.asarray(pvals, dtype='float32')
            n = R.multinomial(pvals=pvals, n=n_samples,
                              dtype='float32', nstreams=30 * 256)
            assert n.dtype == 'float32'
            f = theano.function(
                [],
                theano.sandbox.cuda.basic_ops.gpu_from_host(n),
                mode=mode_.including('gpu'))

            sys.stdout.flush()
            basic_multinomialtest(f, steps, sample_size, pvals,
                                  n_samples, prefix='gpu mrg ') 
示例29
def test_multinomial_0():
    # This tests the MultinomialFromUniform Op directly, not going through the
    # multinomial() call in GPU random generation.

    p = tensor.fmatrix()
    u = tensor.fvector()

    m = multinomial.MultinomialFromUniform('auto')(p, u)

    def body(mode, gpu):
        # the m*2 allows the multinomial to reuse output
        f = function([p, u], m * 2, allow_input_downcast=True, mode=mode)

        if gpu:
            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
                        for node in f.maker.fgraph.toposort()])

        # test that both first and second samples can be drawn
        utt.assert_allclose(f([[1, 0], [0, 1]], [.1, .1]),
                            [[2, 0], [0, 2]])

        # test that both second labels can be drawn
        r = f([[.2, .8], [.3, .7]], [.31, .31])
        utt.assert_allclose(r, [[0, 2], [0, 2]])

        # test that both first labels can be drawn
        r = f([[.2, .8], [.3, .7]], [.21, .21])
        utt.assert_allclose(r, [[0, 2], [2, 0]])

        # change the size to make sure output gets reallocated ok
        # and also make sure that the GPU version doesn't screw up the
        # transposed-ness
        r = f([[.2, .8]], [.25])
        utt.assert_allclose(r, [[0, 2]])

    run_with_c(body)
    if cuda.cuda_available:
        run_with_c(body, True)


# TODO: check a bigger example (make sure blocking on GPU is handled correctly) 
示例30
def test_multinomial_large():
    # DEBUG_MODE will test this on GPU
    def body(mode, gpu):
        p = tensor.fmatrix()
        u = tensor.fvector()
        m = multinomial.MultinomialFromUniform('auto')(p, u)
        f = function([p, u], m * 2, allow_input_downcast=True, mode=mode)
        if gpu:
            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
                        for node in f.maker.fgraph.toposort()])

        pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1
        pval = pval / pval.sum(axis=1)[:, None]
        uval = numpy.ones_like(pval[:, 0]) * 0.5
        mval = f(pval, uval)

        assert mval.shape == pval.shape
        if config.cast_policy == 'custom':
            assert mval.dtype == pval.dtype
        elif config.cast_policy == 'numpy+floatX':
            assert mval.dtype == config.floatX
        elif config.cast_policy == 'numpy':
            assert mval.dtype == 'float64'
        else:
            raise NotImplementedError(config.cast_policy)
        utt.assert_allclose(mval.sum(axis=1), 2)
        asdf = numpy.asarray([0, 0, 2, 0]) + 0 * pval
        utt.assert_allclose(mval, asdf)  # broadcast over all rows
    run_with_c(body)
    if cuda.cuda_available:
        run_with_c(body, True)