Java源码示例:com.aparapi.Kernel
示例1
public void reportProfilingSummary(StringBuilder builder) {
builder.append("\nProfiles by Kernel Subclass (mean elapsed times in milliseconds)\n\n");
builder.append(KernelDeviceProfile.getTableHeader()).append("\n");
for (Class<? extends Kernel> kernelClass : profiles.keySet()) {
String simpleName = Reflection.getSimpleName(kernelClass);
String kernelName = "----------------- [[ " + simpleName + " ]] ";
builder.append(kernelName);
int dashes = 132 - kernelName.length();
for (int i = 0; i < dashes; ++i) {
builder.append('-');
}
builder.append("\n");
KernelProfile kernelProfile = profiles.get(kernelClass);
for (KernelDeviceProfile deviceProfile : kernelProfile.getDeviceProfiles()) {
builder.append(deviceProfile.getAverageAsTableRow()).append("\n");
}
}
}
示例2
private <T extends Kernel> T getSharedKernelInstance(Class<T> kernelClass) {
synchronized (sharedInstances) {
T shared = (T) sharedInstances.get(kernelClass);
if (shared == null) {
try {
Constructor<T> constructor = kernelClass.getConstructor();
constructor.setAccessible(true);
shared = constructor.newInstance();
sharedInstances.put(kernelClass, shared);
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
return shared;
}
}
示例3
@Test
public void negativeIntegerTestPass()
{
final Device device = openCLDevice;
final int SIZE = 1;
final int[] RESULT = new int[2];
Kernel kernel = new Kernel()
{
@Override
public void run()
{
RESULT[0] = -800;
}
};
kernel.execute(Range.create(device, SIZE, SIZE));
assertEquals("Result doesn't match", -800, RESULT[0]);
}
示例4
@Ignore("Known bug, ignoring test")
@Test
public void UnusedInNormalScopeTest()
{
Kernel kernel = new Kernel() {
int[] ints = new int[1024];
public void run() {
if (ints != null) {
int value = ints[0];
}
}
};
kernel.execute(1);
}
示例5
public static void Zrun1D() {
final boolean[] A = new boolean[N * N];
final boolean[] B = new boolean[N * N];
final boolean[] gpu = new boolean[N * N];
boolean[] cpu = new boolean[N * N];
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[(i * N) + j] = ((i % 2) == 0) ^ ((j % 2) == 0);
B[(i * N) + j] = ((i % 2) == 0) & ((j % 2) == 0);
cpu[(i * N) + j] = false;
gpu[(i * N) + j] = false;
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new ZMatMul1D(A, B, gpu, N);
kernel.execute(N * N);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, N);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例6
public static void Brun1D() {
final byte[] A = new byte[N * N];
final byte[] B = new byte[N * N];
final byte[] gpu = new byte[N * N];
byte[] cpu = new byte[N * N];
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[(i * N) + j] = (byte) (i + j);
B[(i * N) + j] = (byte) (i - j);
cpu[(i * N) + j] = (byte) 0;
gpu[(i * N) + j] = (byte) 0;
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new BMatMul1D(A, B, gpu, N);
kernel.execute(N * N);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, N);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例7
public static void Srun1D() {
final short[] A = new short[N * N];
final short[] B = new short[N * N];
final short[] gpu = new short[N * N];
short[] cpu = new short[N * N];
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[(i * N) + j] = (short) (i + j);
B[(i * N) + j] = (short) (i - j);
cpu[(i * N) + j] = (short) 0;
gpu[(i * N) + j] = (short) 0;
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new SMatMul1D(A, B, gpu, N);
kernel.execute(N * N);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, N);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例8
public static void Irun1D() {
final int[] A = new int[N * N];
final int[] B = new int[N * N];
final int[] gpu = new int[N * N];
int[] cpu = new int[N * N];
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[(i * N) + j] = i + j;
B[(i * N) + j] = i - j;
cpu[(i * N) + j] = 0;
gpu[(i * N) + j] = 0;
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new IMatMul1D(A, B, gpu, N);
kernel.execute(N * N);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, N);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例9
public static void Lrun1D() {
final long[] A = new long[N * N];
final long[] B = new long[N * N];
final long[] gpu = new long[N * N];
long[] cpu = new long[N * N];
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[(i * N) + j] = i + j;
B[(i * N) + j] = i - j;
cpu[(i * N) + j] = 0l;
gpu[(i * N) + j] = 0l;
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new LMatMul1D(A, B, gpu, N);
kernel.execute(N * N);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, N);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例10
public static void Frun1D() {
final float[] A = new float[N * N];
final float[] B = new float[N * N];
final float[] gpu = new float[N * N];
float[] cpu = new float[N * N];
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[(i * N) + j] = i + j;
B[(i * N) + j] = i - j;
cpu[(i * N) + j] = 0.0f;
gpu[(i * N) + j] = 0.0f;
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new FMatMul1D(A, B, gpu, N);
kernel.execute(N * N);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, N);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例11
public static void Drun1D() {
final double[] A = new double[N * N];
final double[] B = new double[N * N];
final double[] gpu = new double[N * N];
double[] cpu = new double[N * N];
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[(i * N) + j] = i + j;
B[(i * N) + j] = i - j;
cpu[(i * N) + j] = 0.0;
gpu[(i * N) + j] = 0.0;
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new DMatMul1D(A, B, gpu, N);
kernel.execute(N * N);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, N);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例12
public static void Zrun2D() {
final boolean[][] A = new boolean[N][N];
final boolean[][] B = new boolean[N][N];
final boolean[][] gpu = new boolean[N][N];
boolean[][] cpu = new boolean[N][N];
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[i][j] = ((i % 2) == 0) ^ ((j % 2) == 0);
B[i][j] = ((i % 2) == 0) & ((j % 2) == 0);
cpu[i][j] = false;
gpu[i][j] = false;
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new ZMatMul2D(A, B, gpu, N);
kernel.execute(N * N);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, N);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例13
public static void Brun2D() {
final byte[][] A = new byte[N][N];
final byte[][] B = new byte[N][N];
final byte[][] gpu = new byte[N][N];
byte[][] cpu = new byte[N][N];
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[i][j] = (byte) (i + j);
B[i][j] = (byte) (i - j);
cpu[i][j] = (byte) 0;
gpu[i][j] = (byte) 0;
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new BMatMul2D(A, B, gpu, N);
kernel.execute(N * N);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, N);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例14
public static void Srun2D() {
final short[][] A = new short[N][N];
final short[][] B = new short[N][N];
final short[][] gpu = new short[N][N];
short[][] cpu = new short[N][N];
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[i][j] = (short) (i + j);
B[i][j] = (short) (i - j);
cpu[i][j] = (short) 0;
gpu[i][j] = (short) 0;
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new SMatMul2D(A, B, gpu, N);
kernel.execute(N * N);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, N);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例15
public static void Irun2D() {
final int[][] A = new int[N][N];
final int[][] B = new int[N][N];
final int[][] gpu = new int[N][N];
int[][] cpu = new int[N][N];
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[i][j] = i + j;
B[i][j] = i - j;
cpu[i][j] = 0;
gpu[i][j] = 0;
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new IMatMul2D(A, B, gpu, N);
kernel.execute(N * N);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, N);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例16
public static void Lrun2D() {
final long[][] A = new long[N][N];
final long[][] B = new long[N][N];
final long[][] gpu = new long[N][N];
long[][] cpu = new long[N][N];
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[i][j] = i + j;
B[i][j] = i - j;
cpu[i][j] = 0l;
gpu[i][j] = 0l;
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new LMatMul2D(A, B, gpu, N);
kernel.execute(N * N);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, N);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例17
public static void Frun2D() {
final float[][] A = new float[N][N];
final float[][] B = new float[N][N];
final float[][] gpu = new float[N][N];
float[][] cpu = new float[N][N];
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[i][j] = i + j;
B[i][j] = i - j;
cpu[i][j] = 0.0f;
gpu[i][j] = 0.0f;
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new FMatMul2D(A, B, gpu, N);
kernel.execute(N * N);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, N);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例18
public static void Drun2D() {
final double[][] A = new double[N][N];
final double[][] B = new double[N][N];
final double[][] gpu = new double[N][N];
double[][] cpu = new double[N][N];
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[i][j] = i + j;
B[i][j] = i - j;
cpu[i][j] = 0.0;
gpu[i][j] = 0.0;
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new DMatMul2D(A, B, gpu, N);
kernel.execute(N * N);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, N);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例19
public static void Zrun3D() {
final boolean[][][] A = new boolean[M][M][M];
final boolean[][][] B = new boolean[M][M][M];
final boolean[][][] gpu = new boolean[M][M][M];
boolean[][][] cpu = new boolean[M][M][M];
for (int i = 0; i < M; i++) {
for (int j = 0; j < M; j++) {
for (int k = 0; k < M; k++) {
A[i][j][k] = ((i % 2) == 0) ^ (((j % 2) == 0) & ((k % 2) == 0));
B[i][j][k] = (((i % 2) == 0) & ((j % 2) == 0)) ^ ((k % 2) == 0);
;
cpu[i][j][k] = false;
gpu[i][j][k] = false;
}
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new ZMatMul3D(A, B, gpu, M);
kernel.execute(M * M * M);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, M);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例20
public static void Brun3D() {
final byte[][][] A = new byte[M][M][M];
final byte[][][] B = new byte[M][M][M];
final byte[][][] gpu = new byte[M][M][M];
byte[][][] cpu = new byte[M][M][M];
for (int i = 0; i < M; i++) {
for (int j = 0; j < M; j++) {
for (int k = 0; k < M; k++) {
A[i][j][k] = (byte) (i + j + k);
B[i][j][k] = (byte) ((i - j) + k);
cpu[i][j][k] = (byte) 0;
gpu[i][j][k] = (byte) 0;
}
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new BMatMul3D(A, B, gpu, M);
kernel.execute(M * M * M);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, M);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例21
public static void Srun3D() {
final short[][][] A = new short[M][M][M];
final short[][][] B = new short[M][M][M];
final short[][][] gpu = new short[M][M][M];
short[][][] cpu = new short[M][M][M];
for (int i = 0; i < M; i++) {
for (int j = 0; j < M; j++) {
for (int k = 0; k < M; k++) {
A[i][j][k] = (short) (i + j + k);
B[i][j][k] = (short) ((i - j) + k);
cpu[i][j][k] = (short) 0;
gpu[i][j][k] = (short) 0;
}
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new SMatMul3D(A, B, gpu, M);
kernel.execute(M * M * M);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, M);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例22
public static void Irun3D() {
final int[][][] A = new int[M][M][M];
final int[][][] B = new int[M][M][M];
final int[][][] gpu = new int[M][M][M];
int[][][] cpu = new int[M][M][M];
for (int i = 0; i < M; i++) {
for (int j = 0; j < M; j++) {
for (int k = 0; k < M; k++) {
A[i][j][k] = i + j + k;
B[i][j][k] = (i - j) + k;
cpu[i][j][k] = 0;
gpu[i][j][k] = 0;
}
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new IMatMul3D(A, B, gpu, M);
kernel.execute(M * M * M);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, M);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例23
public static void Lrun3D() {
final long[][][] A = new long[M][M][M];
final long[][][] B = new long[M][M][M];
final long[][][] gpu = new long[M][M][M];
long[][][] cpu = new long[M][M][M];
for (int i = 0; i < M; i++) {
for (int j = 0; j < M; j++) {
for (int k = 0; k < M; k++) {
A[i][j][k] = i + j + k;
B[i][j][k] = (i - j) + k;
cpu[i][j][k] = 0l;
gpu[i][j][k] = 0l;
}
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new LMatMul3D(A, B, gpu, M);
kernel.execute(M * M * M);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, M);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例24
public static void Frun3D() {
final float[][][] A = new float[M][M][M];
final float[][][] B = new float[M][M][M];
final float[][][] gpu = new float[M][M][M];
float[][][] cpu = new float[M][M][M];
for (int i = 0; i < M; i++) {
for (int j = 0; j < M; j++) {
for (int k = 0; k < M; k++) {
A[i][j][k] = i + j + k;
B[i][j][k] = (i - j) + k;
cpu[i][j][k] = 0.0f;
gpu[i][j][k] = 0.0f;
}
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new FMatMul3D(A, B, gpu, M);
kernel.execute(M * M * M);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, M);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例25
public static void Drun3D() {
final double[][][] A = new double[M][M][M];
final double[][][] B = new double[M][M][M];
final double[][][] gpu = new double[M][M][M];
double[][][] cpu = new double[M][M][M];
for (int i = 0; i < M; i++) {
for (int j = 0; j < M; j++) {
for (int k = 0; k < M; k++) {
A[i][j][k] = i + j + k;
B[i][j][k] = (i - j) + k;
cpu[i][j][k] = 0.0;
gpu[i][j][k] = 0.0;
}
}
}
long gs = System.currentTimeMillis();
final Kernel kernel = new DMatMul3D(A, B, gpu, M);
kernel.execute(M * M * M);
gs = System.currentTimeMillis() - gs;
long cs = System.currentTimeMillis();
cpu = matMull(A, B, M);
cs = System.currentTimeMillis() - cs;
System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
System.out.print("valid? ");
if (checkResults(cpu, gpu)) {
System.out.println("yes");
} else {
System.out.println("no");
}
}
示例26
/**
* Execute the GPU kernel
*
* @param subMatrixA
* @param matrixA_NumTerms
* @param subMatrixB
* @param matrixB_NumTerms
* @param numLongs
* @param subResultMatrix
* @param kernel
*
* @return resultMatrix
*/
private static void executeKernel(final Device device, final long[] subMatrixA, final int matrixA_NumTerms, final long[] subMatrixB, final int matrixB_NumTerms, final int numLongs, final int[] subResultMatrix, final Kernel kernel) {
// Power of Two for best performance
int matrixA_NumTermsRnd = matrixA_NumTerms;
while (!isPowerOfTwo(matrixA_NumTermsRnd)) {
matrixA_NumTermsRnd += 1;
}
int matrixB_NumTermsRnd = matrixB_NumTerms;
while (!isPowerOfTwo(matrixB_NumTermsRnd)) {
matrixB_NumTermsRnd += 1;
}
final Range range;
if (device != null) {
range = Range.create2D(device, matrixA_NumTermsRnd, matrixB_NumTermsRnd);
} else {
range = Range.create2D(matrixA_NumTermsRnd, matrixB_NumTermsRnd);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Range: " + range);
}
kernel.put(subMatrixA);
kernel.put(subMatrixB);
kernel.put(subResultMatrix);
kernel.execute(range);
kernel.get(subResultMatrix);
}
示例27
public static void main(String[] _args) {
final int size = 512;
final float[] a = new float[size];
final float[] b = new float[size];
for (int i = 0; i < size; i++) {
a[i] = (float) (Math.random() * 100);
b[i] = (float) (Math.random() * 100);
}
final float[] sum = new float[size];
Kernel kernel = new Kernel(){
@Override public void run() {
int gid = getGlobalId();
sum[gid] = a[gid] + b[gid];
}
};
kernel.execute(Range.create(size));
for (int i = 0; i < size; i++) {
System.out.printf("%6.2f + %6.2f = %8.2f\n", a[i], b[i], sum[i]);
}
kernel.dispose();
}
示例28
public static void main(String[] _args) {
final int size = 512;
/** Input float array for which square values need to be computed. */
final float[] values = new float[size];
/** Initialize input array. */
for (int i = 0; i < size; i++) {
values[i] = i;
}
/** Output array which will be populated with square values of corresponding input array elements. */
final float[] squares = new float[size];
/** Aparapi Kernel which computes squares of input array elements and populates them in corresponding elements of
* output array.
**/
Kernel kernel = new Kernel(){
@Override public void run() {
int gid = getGlobalId();
squares[gid] = values[gid] * values[gid];
}
};
// Execute Kernel.
kernel.execute(Range.create(512));
// Report target execution mode: GPU or JTP (Java Thread Pool).
System.out.println("Device = " + kernel.getTargetDevice().getShortDescription());
// Display computed square values.
for (int i = 0; i < size; i++) {
System.out.printf("%6.0f %8.0f\n", values[i], squares[i]);
}
// Dispose Kernel resources.
kernel.dispose();
}
示例29
public static void setCachesEnabled(boolean cachesEnabled) {
if (CacheEnabler.cachesEnabled != cachesEnabled) {
Kernel.invalidateCaches();
ClassModel.invalidateCaches();
}
CacheEnabler.cachesEnabled = cachesEnabled;
}
示例30
public KernelPreferences getPreferences(Kernel kernel) {
synchronized (preferences) {
PreferencesWrapper wrapper = preferences.get(kernel.getClass());
KernelPreferences kernelPreferences;
if (wrapper == null) {
kernelPreferences = new KernelPreferences(this, kernel.getClass());
preferences.put(kernel.getClass(), new PreferencesWrapper(kernel.getClass(), kernelPreferences));
}else{
kernelPreferences = preferences.get(kernel.getClass()).getPreferences();
}
return kernelPreferences;
}
}