Java源码示例:com.aparapi.Kernel

示例1
public void reportProfilingSummary(StringBuilder builder) {
   builder.append("\nProfiles by Kernel Subclass (mean elapsed times in milliseconds)\n\n");
   builder.append(KernelDeviceProfile.getTableHeader()).append("\n");
   for (Class<? extends Kernel> kernelClass : profiles.keySet()) {
      String simpleName = Reflection.getSimpleName(kernelClass);
      String kernelName = "----------------- [[ " + simpleName + " ]] ";
      builder.append(kernelName);
      int dashes = 132 - kernelName.length();
      for (int i = 0; i < dashes; ++i) {
         builder.append('-');
      }
      builder.append("\n");
      KernelProfile kernelProfile = profiles.get(kernelClass);
      for (KernelDeviceProfile deviceProfile : kernelProfile.getDeviceProfiles()) {
         builder.append(deviceProfile.getAverageAsTableRow()).append("\n");
      }
   }
}
 
示例2
private <T extends Kernel> T getSharedKernelInstance(Class<T> kernelClass) {
   synchronized (sharedInstances) {
      T shared = (T) sharedInstances.get(kernelClass);
      if (shared == null) {
         try {
            Constructor<T> constructor = kernelClass.getConstructor();
            constructor.setAccessible(true);
            shared = constructor.newInstance();
            sharedInstances.put(kernelClass, shared);
         }
         catch (Exception e) {
            throw new RuntimeException(e);
         }
      }
      return shared;
   }
}
 
示例3
@Test
public void negativeIntegerTestPass()
{
	final Device device = openCLDevice;
    final int SIZE = 1;
    final int[] RESULT = new int[2];
    Kernel kernel = new Kernel()
    {
         @Override
        public void run()
        {
            RESULT[0] = -800;
        }
    };
    kernel.execute(Range.create(device, SIZE, SIZE));
    assertEquals("Result doesn't match", -800, RESULT[0]);
}
 
示例4
@Ignore("Known bug, ignoring test")
@Test
public void UnusedInNormalScopeTest()
{
    Kernel kernel = new Kernel() {
        int[] ints = new int[1024];

        public void run() {
            if (ints != null) {
                int value = ints[0];
            }

        }
    };
    kernel.execute(1);
}
 
示例5
public static void Zrun1D() {
   final boolean[] A = new boolean[N * N];
   final boolean[] B = new boolean[N * N];
   final boolean[] gpu = new boolean[N * N];
   boolean[] cpu = new boolean[N * N];

   for (int i = 0; i < N; i++) {
      for (int j = 0; j < N; j++) {
         A[(i * N) + j] = ((i % 2) == 0) ^ ((j % 2) == 0);
         B[(i * N) + j] = ((i % 2) == 0) & ((j % 2) == 0);
         cpu[(i * N) + j] = false;
         gpu[(i * N) + j] = false;
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new ZMatMul1D(A, B, gpu, N);
   kernel.execute(N * N);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, N);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例6
public static void Brun1D() {
   final byte[] A = new byte[N * N];
   final byte[] B = new byte[N * N];
   final byte[] gpu = new byte[N * N];
   byte[] cpu = new byte[N * N];

   for (int i = 0; i < N; i++) {
      for (int j = 0; j < N; j++) {
         A[(i * N) + j] = (byte) (i + j);
         B[(i * N) + j] = (byte) (i - j);
         cpu[(i * N) + j] = (byte) 0;
         gpu[(i * N) + j] = (byte) 0;
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new BMatMul1D(A, B, gpu, N);
   kernel.execute(N * N);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, N);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例7
public static void Srun1D() {
   final short[] A = new short[N * N];
   final short[] B = new short[N * N];
   final short[] gpu = new short[N * N];
   short[] cpu = new short[N * N];

   for (int i = 0; i < N; i++) {
      for (int j = 0; j < N; j++) {
         A[(i * N) + j] = (short) (i + j);
         B[(i * N) + j] = (short) (i - j);
         cpu[(i * N) + j] = (short) 0;
         gpu[(i * N) + j] = (short) 0;
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new SMatMul1D(A, B, gpu, N);
   kernel.execute(N * N);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, N);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例8
public static void Irun1D() {
   final int[] A = new int[N * N];
   final int[] B = new int[N * N];
   final int[] gpu = new int[N * N];
   int[] cpu = new int[N * N];

   for (int i = 0; i < N; i++) {
      for (int j = 0; j < N; j++) {
         A[(i * N) + j] = i + j;
         B[(i * N) + j] = i - j;
         cpu[(i * N) + j] = 0;
         gpu[(i * N) + j] = 0;
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new IMatMul1D(A, B, gpu, N);
   kernel.execute(N * N);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, N);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例9
public static void Lrun1D() {
   final long[] A = new long[N * N];
   final long[] B = new long[N * N];
   final long[] gpu = new long[N * N];
   long[] cpu = new long[N * N];

   for (int i = 0; i < N; i++) {
      for (int j = 0; j < N; j++) {
         A[(i * N) + j] = i + j;
         B[(i * N) + j] = i - j;
         cpu[(i * N) + j] = 0l;
         gpu[(i * N) + j] = 0l;
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new LMatMul1D(A, B, gpu, N);
   kernel.execute(N * N);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, N);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例10
public static void Frun1D() {
   final float[] A = new float[N * N];
   final float[] B = new float[N * N];
   final float[] gpu = new float[N * N];
   float[] cpu = new float[N * N];

   for (int i = 0; i < N; i++) {
      for (int j = 0; j < N; j++) {
         A[(i * N) + j] = i + j;
         B[(i * N) + j] = i - j;
         cpu[(i * N) + j] = 0.0f;
         gpu[(i * N) + j] = 0.0f;
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new FMatMul1D(A, B, gpu, N);
   kernel.execute(N * N);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, N);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例11
public static void Drun1D() {
   final double[] A = new double[N * N];
   final double[] B = new double[N * N];
   final double[] gpu = new double[N * N];
   double[] cpu = new double[N * N];

   for (int i = 0; i < N; i++) {
      for (int j = 0; j < N; j++) {
         A[(i * N) + j] = i + j;
         B[(i * N) + j] = i - j;
         cpu[(i * N) + j] = 0.0;
         gpu[(i * N) + j] = 0.0;
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new DMatMul1D(A, B, gpu, N);
   kernel.execute(N * N);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, N);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例12
public static void Zrun2D() {
   final boolean[][] A = new boolean[N][N];
   final boolean[][] B = new boolean[N][N];
   final boolean[][] gpu = new boolean[N][N];
   boolean[][] cpu = new boolean[N][N];

   for (int i = 0; i < N; i++) {
      for (int j = 0; j < N; j++) {
         A[i][j] = ((i % 2) == 0) ^ ((j % 2) == 0);
         B[i][j] = ((i % 2) == 0) & ((j % 2) == 0);
         cpu[i][j] = false;
         gpu[i][j] = false;
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new ZMatMul2D(A, B, gpu, N);
   kernel.execute(N * N);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, N);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例13
public static void Brun2D() {
   final byte[][] A = new byte[N][N];
   final byte[][] B = new byte[N][N];
   final byte[][] gpu = new byte[N][N];
   byte[][] cpu = new byte[N][N];

   for (int i = 0; i < N; i++) {
      for (int j = 0; j < N; j++) {
         A[i][j] = (byte) (i + j);
         B[i][j] = (byte) (i - j);
         cpu[i][j] = (byte) 0;
         gpu[i][j] = (byte) 0;
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new BMatMul2D(A, B, gpu, N);
   kernel.execute(N * N);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, N);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例14
public static void Srun2D() {
   final short[][] A = new short[N][N];
   final short[][] B = new short[N][N];
   final short[][] gpu = new short[N][N];
   short[][] cpu = new short[N][N];

   for (int i = 0; i < N; i++) {
      for (int j = 0; j < N; j++) {
         A[i][j] = (short) (i + j);
         B[i][j] = (short) (i - j);
         cpu[i][j] = (short) 0;
         gpu[i][j] = (short) 0;
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new SMatMul2D(A, B, gpu, N);
   kernel.execute(N * N);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, N);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例15
public static void Irun2D() {
   final int[][] A = new int[N][N];
   final int[][] B = new int[N][N];
   final int[][] gpu = new int[N][N];
   int[][] cpu = new int[N][N];

   for (int i = 0; i < N; i++) {
      for (int j = 0; j < N; j++) {
         A[i][j] = i + j;
         B[i][j] = i - j;
         cpu[i][j] = 0;
         gpu[i][j] = 0;
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new IMatMul2D(A, B, gpu, N);
   kernel.execute(N * N);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, N);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例16
public static void Lrun2D() {
   final long[][] A = new long[N][N];
   final long[][] B = new long[N][N];
   final long[][] gpu = new long[N][N];
   long[][] cpu = new long[N][N];

   for (int i = 0; i < N; i++) {
      for (int j = 0; j < N; j++) {
         A[i][j] = i + j;
         B[i][j] = i - j;
         cpu[i][j] = 0l;
         gpu[i][j] = 0l;
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new LMatMul2D(A, B, gpu, N);
   kernel.execute(N * N);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, N);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例17
public static void Frun2D() {
   final float[][] A = new float[N][N];
   final float[][] B = new float[N][N];
   final float[][] gpu = new float[N][N];
   float[][] cpu = new float[N][N];

   for (int i = 0; i < N; i++) {
      for (int j = 0; j < N; j++) {
         A[i][j] = i + j;
         B[i][j] = i - j;
         cpu[i][j] = 0.0f;
         gpu[i][j] = 0.0f;
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new FMatMul2D(A, B, gpu, N);
   kernel.execute(N * N);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, N);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例18
public static void Drun2D() {
   final double[][] A = new double[N][N];
   final double[][] B = new double[N][N];
   final double[][] gpu = new double[N][N];
   double[][] cpu = new double[N][N];

   for (int i = 0; i < N; i++) {
      for (int j = 0; j < N; j++) {
         A[i][j] = i + j;
         B[i][j] = i - j;
         cpu[i][j] = 0.0;
         gpu[i][j] = 0.0;
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new DMatMul2D(A, B, gpu, N);
   kernel.execute(N * N);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, N);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例19
public static void Zrun3D() {
   final boolean[][][] A = new boolean[M][M][M];
   final boolean[][][] B = new boolean[M][M][M];
   final boolean[][][] gpu = new boolean[M][M][M];
   boolean[][][] cpu = new boolean[M][M][M];

   for (int i = 0; i < M; i++) {
      for (int j = 0; j < M; j++) {
         for (int k = 0; k < M; k++) {
            A[i][j][k] = ((i % 2) == 0) ^ (((j % 2) == 0) & ((k % 2) == 0));
            B[i][j][k] = (((i % 2) == 0) & ((j % 2) == 0)) ^ ((k % 2) == 0);
            ;
            cpu[i][j][k] = false;
            gpu[i][j][k] = false;
         }
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new ZMatMul3D(A, B, gpu, M);
   kernel.execute(M * M * M);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, M);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例20
public static void Brun3D() {
   final byte[][][] A = new byte[M][M][M];
   final byte[][][] B = new byte[M][M][M];
   final byte[][][] gpu = new byte[M][M][M];
   byte[][][] cpu = new byte[M][M][M];

   for (int i = 0; i < M; i++) {
      for (int j = 0; j < M; j++) {
         for (int k = 0; k < M; k++) {
            A[i][j][k] = (byte) (i + j + k);
            B[i][j][k] = (byte) ((i - j) + k);
            cpu[i][j][k] = (byte) 0;
            gpu[i][j][k] = (byte) 0;
         }
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new BMatMul3D(A, B, gpu, M);
   kernel.execute(M * M * M);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, M);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例21
public static void Srun3D() {
   final short[][][] A = new short[M][M][M];
   final short[][][] B = new short[M][M][M];
   final short[][][] gpu = new short[M][M][M];
   short[][][] cpu = new short[M][M][M];

   for (int i = 0; i < M; i++) {
      for (int j = 0; j < M; j++) {
         for (int k = 0; k < M; k++) {
            A[i][j][k] = (short) (i + j + k);
            B[i][j][k] = (short) ((i - j) + k);
            cpu[i][j][k] = (short) 0;
            gpu[i][j][k] = (short) 0;
         }
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new SMatMul3D(A, B, gpu, M);
   kernel.execute(M * M * M);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, M);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例22
public static void Irun3D() {
   final int[][][] A = new int[M][M][M];
   final int[][][] B = new int[M][M][M];
   final int[][][] gpu = new int[M][M][M];
   int[][][] cpu = new int[M][M][M];

   for (int i = 0; i < M; i++) {
      for (int j = 0; j < M; j++) {
         for (int k = 0; k < M; k++) {
            A[i][j][k] = i + j + k;
            B[i][j][k] = (i - j) + k;
            cpu[i][j][k] = 0;
            gpu[i][j][k] = 0;
         }
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new IMatMul3D(A, B, gpu, M);
   kernel.execute(M * M * M);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, M);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例23
public static void Lrun3D() {
   final long[][][] A = new long[M][M][M];
   final long[][][] B = new long[M][M][M];
   final long[][][] gpu = new long[M][M][M];
   long[][][] cpu = new long[M][M][M];

   for (int i = 0; i < M; i++) {
      for (int j = 0; j < M; j++) {
         for (int k = 0; k < M; k++) {
            A[i][j][k] = i + j + k;
            B[i][j][k] = (i - j) + k;
            cpu[i][j][k] = 0l;
            gpu[i][j][k] = 0l;
         }
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new LMatMul3D(A, B, gpu, M);
   kernel.execute(M * M * M);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, M);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例24
public static void Frun3D() {
   final float[][][] A = new float[M][M][M];
   final float[][][] B = new float[M][M][M];
   final float[][][] gpu = new float[M][M][M];
   float[][][] cpu = new float[M][M][M];

   for (int i = 0; i < M; i++) {
      for (int j = 0; j < M; j++) {
         for (int k = 0; k < M; k++) {
            A[i][j][k] = i + j + k;
            B[i][j][k] = (i - j) + k;
            cpu[i][j][k] = 0.0f;
            gpu[i][j][k] = 0.0f;
         }
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new FMatMul3D(A, B, gpu, M);
   kernel.execute(M * M * M);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, M);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例25
public static void Drun3D() {
   final double[][][] A = new double[M][M][M];
   final double[][][] B = new double[M][M][M];
   final double[][][] gpu = new double[M][M][M];
   double[][][] cpu = new double[M][M][M];

   for (int i = 0; i < M; i++) {
      for (int j = 0; j < M; j++) {
         for (int k = 0; k < M; k++) {
            A[i][j][k] = i + j + k;
            B[i][j][k] = (i - j) + k;
            cpu[i][j][k] = 0.0;
            gpu[i][j][k] = 0.0;
         }
      }
   }

   long gs = System.currentTimeMillis();
   final Kernel kernel = new DMatMul3D(A, B, gpu, M);
   kernel.execute(M * M * M);
   gs = System.currentTimeMillis() - gs;

   long cs = System.currentTimeMillis();
   cpu = matMull(A, B, M);
   cs = System.currentTimeMillis() - cs;

   System.out.println("gpu time: " + gs + "\ncpu time: " + cs);
   System.out.print("valid? ");

   if (checkResults(cpu, gpu)) {
      System.out.println("yes");
   } else {
      System.out.println("no");
   }
}
 
示例26
/**
 * Execute the GPU kernel
 * 
 * @param subMatrixA
 * @param matrixA_NumTerms
 * @param subMatrixB
 * @param matrixB_NumTerms
 * @param numLongs
 * @param subResultMatrix
 * @param kernel
 * 
 * @return resultMatrix
 */
private static void executeKernel(final Device device, final long[] subMatrixA, final int matrixA_NumTerms, final long[] subMatrixB, final int matrixB_NumTerms, final int numLongs, final int[] subResultMatrix, final Kernel kernel) {

   // Power of Two for best performance
   int matrixA_NumTermsRnd = matrixA_NumTerms;
   while (!isPowerOfTwo(matrixA_NumTermsRnd)) {
      matrixA_NumTermsRnd += 1;
   }

   int matrixB_NumTermsRnd = matrixB_NumTerms;
   while (!isPowerOfTwo(matrixB_NumTermsRnd)) {
      matrixB_NumTermsRnd += 1;
   }

   final Range range;
   if (device != null) {
      range = Range.create2D(device, matrixA_NumTermsRnd, matrixB_NumTermsRnd);
   } else {
      range = Range.create2D(matrixA_NumTermsRnd, matrixB_NumTermsRnd);
   }

   if (LOG.isDebugEnabled()) {
      LOG.debug("Range: " + range);
   }

   kernel.put(subMatrixA);
   kernel.put(subMatrixB);
   kernel.put(subResultMatrix);

   kernel.execute(range);

   kernel.get(subResultMatrix);
}
 
示例27
public static void main(String[] _args) {

      final int size = 512;

      final float[] a = new float[size];
      final float[] b = new float[size];

      for (int i = 0; i < size; i++) {
         a[i] = (float) (Math.random() * 100);
         b[i] = (float) (Math.random() * 100);
      }

      final float[] sum = new float[size];

      Kernel kernel = new Kernel(){
         @Override public void run() {
            int gid = getGlobalId();
            sum[gid] = a[gid] + b[gid];
         }
      };

      kernel.execute(Range.create(size));

      for (int i = 0; i < size; i++) {
         System.out.printf("%6.2f + %6.2f = %8.2f\n", a[i], b[i], sum[i]);
      }

      kernel.dispose();
   }
 
示例28
public static void main(String[] _args) {

      final int size = 512;

      /** Input float array for which square values need to be computed. */
      final float[] values = new float[size];

      /** Initialize input array. */
      for (int i = 0; i < size; i++) {
         values[i] = i;
      }

      /** Output array which will be populated with square values of corresponding input array elements. */
      final float[] squares = new float[size];

      /** Aparapi Kernel which computes squares of input array elements and populates them in corresponding elements of 
       * output array. 
       **/
      Kernel kernel = new Kernel(){
         @Override public void run() {
            int gid = getGlobalId();
            squares[gid] = values[gid] * values[gid];
         }
      };

      // Execute Kernel.

      kernel.execute(Range.create(512));

      // Report target execution mode: GPU or JTP (Java Thread Pool).
      System.out.println("Device = " + kernel.getTargetDevice().getShortDescription());

      // Display computed square values.
      for (int i = 0; i < size; i++) {
         System.out.printf("%6.0f %8.0f\n", values[i], squares[i]);
      }

      // Dispose Kernel resources.
      kernel.dispose();
   }
 
示例29
public static void setCachesEnabled(boolean cachesEnabled) {
   if (CacheEnabler.cachesEnabled != cachesEnabled) {
      Kernel.invalidateCaches();
      ClassModel.invalidateCaches();
   }

   CacheEnabler.cachesEnabled = cachesEnabled;
}
 
示例30
public KernelPreferences getPreferences(Kernel kernel) {
   synchronized (preferences) {
      PreferencesWrapper wrapper = preferences.get(kernel.getClass());
      KernelPreferences kernelPreferences;
      if (wrapper == null) {
         kernelPreferences = new KernelPreferences(this, kernel.getClass());
         preferences.put(kernel.getClass(), new PreferencesWrapper(kernel.getClass(), kernelPreferences));
      }else{
        kernelPreferences = preferences.get(kernel.getClass()).getPreferences();
      }
      return kernelPreferences;
   }
}