diff --git a/.gitignore b/.gitignore
index b509eebe..8e0d5325 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,3 @@ target
project/boot
project/target
*~
-Tests
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index d8bf3b6c..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,16 +0,0 @@
-language: java
-
-jdk:
-- oraclejdk8
-- openjdk6
-
-cache:
- directories:
- - $HOME/.m2
-
-env:
-- MAVEN_OPTS=-Xmx512m CL_LOG_ERRORS=stdout
-
-before_install:
- - sudo apt-get update -qq
- - sudo apt-get install -qq fglrx=2:8.960-0ubuntu1 opencl-headers
diff --git a/Blas/.classpath b/Blas/.classpath
deleted file mode 100644
index 3abb79c1..00000000
--- a/Blas/.classpath
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-
-
-
diff --git a/Blas/.project b/Blas/.project
deleted file mode 100644
index 85ec62de..00000000
--- a/Blas/.project
+++ /dev/null
@@ -1,29 +0,0 @@
-
-
- Blas
-
-
-
-
-
- org.eclipse.jdt.core.javabuilder
-
-
-
-
- org.maven.ide.eclipse.maven2Builder
-
-
-
-
- org.eclipse.iam.jdt.core.mavenIncrementalBuilder
-
-
-
-
-
- org.maven.ide.eclipse.maven2Nature
- org.eclipse.iam.jdt.core.mavenNature
- org.eclipse.jdt.core.javanature
-
-
diff --git a/Blas/.settings/org.eclipse.jdt.core.prefs b/Blas/.settings/org.eclipse.jdt.core.prefs
deleted file mode 100644
index 0a83a729..00000000
--- a/Blas/.settings/org.eclipse.jdt.core.prefs
+++ /dev/null
@@ -1,5 +0,0 @@
-#Thu Jan 28 23:48:19 CET 2010
-eclipse.preferences.version=1
-org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
-org.eclipse.jdt.core.compiler.compliance=1.6
-org.eclipse.jdt.core.compiler.source=1.6
diff --git a/Blas/.settings/org.maven.ide.eclipse.prefs b/Blas/.settings/org.maven.ide.eclipse.prefs
deleted file mode 100644
index a03b0328..00000000
--- a/Blas/.settings/org.maven.ide.eclipse.prefs
+++ /dev/null
@@ -1,9 +0,0 @@
-#Thu Jan 28 23:46:13 CET 2010
-activeProfiles=
-eclipse.preferences.version=1
-fullBuildGoals=process-test-resources
-includeModules=false
-resolveWorkspaceProjects=true
-resourceFilterGoals=process-resources resources\:testResources
-skipCompilerPlugin=true
-version=1
diff --git a/Blas/javacl-blas-bridj.iml b/Blas/javacl-blas-bridj.iml
deleted file mode 100644
index 1a89a315..00000000
--- a/Blas/javacl-blas-bridj.iml
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/Blas/javacl-blas.iml b/Blas/javacl-blas.iml
deleted file mode 100644
index 2b7d843d..00000000
--- a/Blas/javacl-blas.iml
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/Blas/pom.xml b/Blas/pom.xml
deleted file mode 100644
index 40eef44e..00000000
--- a/Blas/pom.xml
+++ /dev/null
@@ -1,51 +0,0 @@
-
-
- 4.0.0
- com.nativelibs4java
- javacl-blas
- JavaCL BLAS / BridJ
- http://code.google.com/p/javacl/
- jar
-
-
- com.nativelibs4java
- javacl-parent
- 1.0-SNAPSHOT
- ..
-
-
-
-
-
- com.nativelibs4java
- javacl
-
-
-
- org.ujmp
- ujmp-core
- 0.2.4
-
-
-
-
-
-
-
- com.nativelibs4java
- maven-javacl-plugin
-
-
-
-
-
-
- nativelibs4java-legacy
- NativeLibs4Java Legacy Repository
- http://nativelibs4java.sourceforge.net/maven/
-
-
-
-
-
-
diff --git a/Blas/src/main/java/com/nativelibs4java/opencl/blas/CLDefaultMatrix2D.java b/Blas/src/main/java/com/nativelibs4java/opencl/blas/CLDefaultMatrix2D.java
deleted file mode 100644
index 1f90ce31..00000000
--- a/Blas/src/main/java/com/nativelibs4java/opencl/blas/CLDefaultMatrix2D.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-package com.nativelibs4java.opencl.blas;
-
-import com.nativelibs4java.opencl.CLBuffer;
-import com.nativelibs4java.opencl.CLContext;
-import com.nativelibs4java.opencl.CLEvent;
-import com.nativelibs4java.opencl.CLMem.Usage;
-import com.nativelibs4java.opencl.CLQueue;
-import com.nativelibs4java.opencl.util.Primitive;
-
-import org.bridj.Pointer;
-
-/**
- *
- * @author ochafik
- */
-public class CLDefaultMatrix2D implements CLMatrix2D {
- protected final Primitive primitive;
- protected final Class primitiveClass;
- protected final long rows, columns, stride, length;
- protected final int blockSize;
-
- protected final CLKernels kernels;
- protected final CLBuffer buffer;
- protected final CLQueue queue;
- protected final CLContext context;
- protected CLEvents _events = new CLEvents();
-
- public static final int DEFAULT_BLOCK_SIZE = 16;
-
- public CLDefaultMatrix2D(Primitive primitive, CLBuffer buffer, long rows, long columns, CLKernels kernels) {
- this(primitive, buffer, rows, columns, DEFAULT_BLOCK_SIZE, kernels);
- }
- public CLDefaultMatrix2D(Primitive primitive, CLBuffer buffer, long rows, long columns, int blockSize, CLKernels kernels) {
- this.primitive = primitive;
- this.primitiveClass = (Class)primitive.primitiveType;
- this.stride = CLMatrixUtils.roundUp(columns, blockSize);
- this.length = this.stride * CLMatrixUtils.roundUp(rows, blockSize);
- if (buffer != null) {
- if (buffer.getElementCount() < this.length) {
- throw new IllegalArgumentException("Buffer size too small; buffer of size " + this.length + " expected, size " + buffer.getByteCount() + " was given");
- }
- this.buffer = buffer;
- } else {
- this.buffer = (CLBuffer)kernels.getContext().createBuffer(Usage.InputOutput, primitive.primitiveType, length);
- }
- this.kernels = kernels;
- this.rows = rows;
- this.columns = columns;
- this.queue = kernels.getQueue();
- this.context = kernels.getContext();
- this.blockSize = blockSize;
-
- assert getBuffer().getElementCount() >= stride * rows &&
- getBuffer().getElementCount() <= stride * CLMatrixUtils.roundUp(rows, getBlockSize());
- }
-
- public CLMatrix2D blankClone() {
- return blankMatrix(getRowCount(), getColumnCount());
- }
- public CLMatrix2D blankMatrix(long rows, long columns) {
- return new CLDefaultMatrix2D(primitive, null, rows, columns, blockSize, kernels);
- }
-
- public long getRowCount() {
- return rows;
- }
-
- public long getColumnCount() {
- return columns;
- }
-
- public long getStride() {
- return stride;
- }
-
- public int getBlockSize() {
- return blockSize;
- }
-
- public CLEvents getEvents() {
- return _events;
- }
-
- public void write(final Pointer b) {
- getEvents().performWrite(new CLEvents.Action() {
- public CLEvent perform(CLEvent[] events) {
- return buffer.write(queue, b, false, events);
- }
- });
- }
-
- public void read(final Pointer b) {
- getEvents().performRead(new CLEvents.Action() {
- public CLEvent perform(CLEvent[] events) {
- return buffer.read(queue, b, true, events);
- }
- });
- }
- public Pointer read() {
- Pointer out = Pointer.allocateArray(primitiveClass, length);
- read(out);
- return out;
- }
-
-
- public CLBuffer getBuffer() {
- return buffer;
- }
-
- public CLContext getContext() {
- return context;
- }
-
- public synchronized CLQueue getQueue() {
- return queue;
- }
-
- /*
- public synchronized void setQueue(CLQueue queue) {
- if (this.queue != null && queue != null) {
- if (this.queue.equals(queue))
- return;
- }
- getEvents().waitFor();
- this.queue = queue;
- }
- * */
-
- public Primitive getPrimitive() {
- return primitive;
- }
-
- public Class getPrimitiveClass() {
- return primitiveClass;
- }
-
- public CLKernels getKernels() {
- return kernels;
- }
-
-}
diff --git a/Blas/src/main/java/com/nativelibs4java/opencl/blas/CLEvents.java b/Blas/src/main/java/com/nativelibs4java/opencl/blas/CLEvents.java
deleted file mode 100644
index f78a714f..00000000
--- a/Blas/src/main/java/com/nativelibs4java/opencl/blas/CLEvents.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-package com.nativelibs4java.opencl.blas;
-
-import com.nativelibs4java.opencl.CLEvent;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- *
- * @author ochafik
- */
-public class CLEvents {
- CLEvent lastWriteEvent;
- List readEvents = new ArrayList();
-
- List listeners = new ArrayList();
-
- public interface Listener {
- void writing(CLEvents evts);
- void reading(CLEvents evts);
- }
- public interface Action {
- CLEvent perform(CLEvent[] events);
- }
-
- public synchronized void addListener(Listener l) {
- listeners.add(l);
- }
- public synchronized void removeListener(Listener l) {
- listeners.remove(l);
- }
- static final CLEvent[] EMPTY_EVENTS = new CLEvent[0];
- protected synchronized CLEvent clearEvents(Action action) {
- int nReads = readEvents.size();
- boolean hasWrite = lastWriteEvent != null;
- int n = nReads + (hasWrite ? 1 : 0);
- CLEvent[] evts = n == 0 ? EMPTY_EVENTS : readEvents.toArray(new CLEvent[n]);
- if (hasWrite)
- evts[nReads] = lastWriteEvent;
- CLEvent evt = action.perform(evts);
- lastWriteEvent = null;
- readEvents.clear();
- return evt;
- }
- public synchronized CLEvent performRead(Action action) {
- for (Listener listener : listeners)
- listener.writing(this);
- CLEvent evt = action.perform(lastWriteEvent == null ? EMPTY_EVENTS : new CLEvent[] { lastWriteEvent });
- if (evt != null) {
- readEvents.add(evt);
- lastWriteEvent = null; // read completed only if the optional write also completed
- }
- return evt;
- }
-
- public synchronized void performRead(Runnable action) {
- for (Listener listener : listeners)
- listener.reading(this);
- waitForRead();
- action.run();
- }
-
- public synchronized CLEvent performWrite(Action action) {
- return lastWriteEvent = clearEvents(action);
- }
-
- /**
- * Wait until all write operations are completed so that the data is readable.
- */
- public synchronized void waitForRead() {
- CLEvent.waitFor(lastWriteEvent);
- lastWriteEvent = null;
- }
- /**
- * Wait for all associated operations to complete (read or write).
- */
- public synchronized void waitFor() {
- clearEvents(new Action() {
- public CLEvent perform(CLEvent[] evts) {
- CLEvent.waitFor(evts);
- return null;
- }
- });
- }
-}
diff --git a/Blas/src/main/java/com/nativelibs4java/opencl/blas/CLKernels.java b/Blas/src/main/java/com/nativelibs4java/opencl/blas/CLKernels.java
deleted file mode 100644
index 140f021f..00000000
--- a/Blas/src/main/java/com/nativelibs4java/opencl/blas/CLKernels.java
+++ /dev/null
@@ -1,419 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package com.nativelibs4java.opencl.blas;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-import com.nativelibs4java.opencl.CLBuffer;
-import com.nativelibs4java.opencl.CLBuildException;
-import com.nativelibs4java.opencl.CLContext;
-import com.nativelibs4java.opencl.CLEvent;
-import com.nativelibs4java.opencl.CLKernel;
-import com.nativelibs4java.opencl.CLMem.Usage;
-import com.nativelibs4java.opencl.CLPlatform.DeviceFeature;
-import com.nativelibs4java.opencl.CLProgram;
-import com.nativelibs4java.opencl.CLQueue;
-import com.nativelibs4java.opencl.JavaCL;
-import com.nativelibs4java.opencl.LocalSize;
-import com.nativelibs4java.opencl.util.Fun1;
-import com.nativelibs4java.opencl.util.Fun2;
-import com.nativelibs4java.opencl.util.LinearAlgebraUtils;
-import com.nativelibs4java.opencl.util.ParallelMath;
-import com.nativelibs4java.opencl.util.Primitive;
-
-import static com.nativelibs4java.opencl.blas.CLMatrixUtils.roundUp;
-import static org.bridj.Pointer.pointerToInt;
-
-/**
- *
- * @author ochafik
- */
-public class CLKernels {
- protected final LinearAlgebraUtils kernels;
- protected final ParallelMath math;
- protected final CLContext context;
- protected final CLQueue queue;
-
- private static volatile CLKernels instance;
-
- public static synchronized void setInstance(CLKernels kernels) {
- instance = kernels;
- }
- public static synchronized CLKernels getInstance() {
- if (instance == null) {
- try {
- instance = new CLKernels();
- } catch (Throwable ex) {
- throw new RuntimeException(ex);
- }
- }
- return instance;
- }
-
- public CLKernels() throws IOException, CLBuildException {
- this(
- JavaCL.createBestContext(
- DeviceFeature.DoubleSupport,
- DeviceFeature.MaxComputeUnits
- ).createDefaultQueue()
- );
- }
- public CLKernels(CLQueue queue) throws IOException, CLBuildException {
- kernels = new LinearAlgebraUtils(queue);
- math = new ParallelMath(queue);
- context = queue.getContext();
- this.queue = queue;
- }
-
- public CLEvent op1(Primitive prim, Fun1 fun, CLBuffer a, long rows, long columns, long stride, CLBuffer out, CLEvent... eventsToWaitFor) throws CLBuildException {
- long length = rows * stride;
- if (out == null || out.getElementCount() < length)
- throw new IllegalArgumentException("Expected buffer of length >= " + length + ", got " + out);
- //if (out != null)
- // out = (CLBuffer)context.createBuffer(Usage.Output, prim.primitiveType, length);
-
- CLKernel kernel = math.getKernel(fun, prim);
- synchronized (kernel) {
- kernel.setArgs(a, out, length);
- CLEvent evt = kernel.enqueueNDRange(queue, new int [] { (int)length }, eventsToWaitFor);
- return evt;
- }
- }
-
- public CLEvent op2(Primitive prim, Fun2 fun, CLBuffer a, CLBuffer b, long rows, long columns, long stride, CLBuffer out, CLEvent... eventsToWaitFor) throws CLBuildException {
- long length = rows * stride;
- if (out == null || out.getElementCount() < length)
- throw new IllegalArgumentException("Expected buffer of length >= " + length + ", got " + out.getElementCount());
- //if (out != null)
- // out = (CLBuffer)context.createBuffer(Usage.Output, prim.primitiveType, length);
-
- CLKernel kernel = math.getKernel(fun, prim, false);
- synchronized (kernel) {
- kernel.setArgs(a, b, out, length);
- CLEvent evt = kernel.enqueueNDRange(queue, new int [] { (int)length }, eventsToWaitFor);
- return evt;
- }
- }
-
- public CLEvent op2(Primitive prim, Fun2 fun, CLBuffer a, T b, long rows, long columns, long stride, CLBuffer out, CLEvent... eventsToWaitFor) throws CLBuildException {
- long length = rows * stride;
- if (out == null || out.getElementCount() < length)
- throw new IllegalArgumentException("Expected buffer of length >= " + length + ", got " + out.getElementCount());
- //if (out != null)
- // out = (CLBuffer)context.createBuffer(Usage.Output, prim.primitiveType, length);
-
- CLKernel kernel = math.getKernel(fun, prim, true);
- synchronized (kernel) {
- kernel.setArgs(a, b, out, length);
- CLEvent evt = kernel.enqueueNDRange(queue, new int [] { (int)length }, eventsToWaitFor);
- return evt;
- }
- }
-
- Map containsValueKernels = new HashMap();
- public boolean containsValue(Primitive primitive, CLBuffer buffer, long length, V value, CLEvent... eventsToWaitFor) throws CLBuildException {
- CLKernel kernel;
- synchronized (containsValueKernels) {
- kernel = containsValueKernels.get(primitive);
- if (kernel == null) {
- kernel = context.createProgram((
- primitive.getRequiredPragmas() +
- "__kernel void containsValue( \n" +
- " __global const double* a, \n" +
- " int length, \n" +
- " double value, \n" +
- " __global int* pOut \n" +
- ") { \n" +
- " int i = get_global_id(0);\n" +
- " if (i >= length) \n" +
- " return; \n" +
- " \n" +
- " if (a[i] == value) \n" +
- " *pOut = 1; \n" +
- "} \n"
- ).replaceAll("double", primitive.clTypeName())).createKernel("containsValue");
- containsValueKernels.put(primitive, kernel);
- }
- }
- synchronized(kernel) {
- CLBuffer pOut = context.createBuffer(Usage.Output, pointerToInt(0));
- kernel.setArgs(buffer, (int)length, value, pOut);
- kernel.enqueueNDRange(queue, new int[] { (int)length }, eventsToWaitFor).waitFor();
- return pOut.read(queue).getInt() != 0;
- }
- }
-
- Map clearKernels = new HashMap();
- public CLEvent clear(Primitive primitive, CLBuffer buffer, long length, CLEvent... eventsToWaitFor) throws CLBuildException {
- CLKernel kernel;
- synchronized (clearKernels) {
- kernel = clearKernels.get(primitive);
- if (kernel == null) {
- kernel = context.createProgram((
- primitive.getRequiredPragmas() +
- "__kernel void clear_buffer( \n" +
- " __global double* a, \n" +
- " int length \n" +
- ") { \n" +
- " int i = get_global_id(0); \n" +
- " if (i >= length) \n" +
- " return; \n" +
- " \n" +
- " a[i] = (double)0; \n" +
- "} \n"
- ).replaceAll("double", primitive.clTypeName())).createKernel("clear_buffer");
- clearKernels.put(primitive, kernel);
- }
- }
- synchronized(kernel) {
- kernel.setArgs(buffer, (int)length);
- CLEvent evt = kernel.enqueueNDRange(queue, new int[] { (int)length }, eventsToWaitFor);
- //Object array = buffer.read(queue, evt).getArray();
- return evt;
- }
- }
-
- Map matrixMultiplyKernels = new HashMap();
- public CLEvent matrixMultiply(Primitive prim,
- CLBuffer a, long aRows, long aColumns, long aStride, int aBlockSize,
- CLBuffer b, long bRows, long bColumns, long bStride, int bBlockSize,
- CLBuffer out, CLEvent... eventsToWaitFor) throws CLBuildException {
- boolean useBlocks = false;
- int blockSize = aBlockSize;
- if (blockSize > 1 && blockSize == bBlockSize) {
- long[] maxWorkItemSizes = queue.getDevice().getMaxWorkItemSizes();
- useBlocks = maxWorkItemSizes.length >= 2 &&
- maxWorkItemSizes[0] >= blockSize &&
- maxWorkItemSizes[1] >= blockSize;
- }
- if (useBlocks) {
- return blockMatrixMultiply(
- blockSize, prim,
- a, roundUp(aRows, blockSize), roundUp(aColumns, blockSize),
- b, roundUp(bRows, blockSize), roundUp(bColumns, blockSize),
- out, eventsToWaitFor);
- } else {
- return naiveMatrixMultiply(prim, a, aRows, aColumns, aStride, b, bRows, bColumns, bStride, out, eventsToWaitFor);
- }
- }
- public CLEvent blockMatrixMultiply(int blockSize, Primitive prim, CLBuffer a, long aRows, long aColumns, CLBuffer b, long bRows, long bColumns, CLBuffer out, CLEvent... eventsToWaitFor) throws CLBuildException {
- if (out == null)
- throw new IllegalArgumentException("Null output matrix !");
- //if (out != null)
- // out = (CLBuffer)context.createBuffer(Usage.Output, prim.primitiveType, aRows * bColumns);
-
- CLKernel kernel;
- String key = "block_" + blockSize + "_" + prim;
- synchronized (matrixMultiplyKernels) {
- kernel = matrixMultiplyKernels.get(key);
- if (kernel == null) {
- String src = prim.getRequiredPragmas() +
- "#define BLOCK_SIZE " + blockSize + "\n" +
- "#define AS(i, j) As[j + i * BLOCK_SIZE]\n" +
- "#define BS(i, j) Bs[j + i * BLOCK_SIZE]\n" +
- "\n" +
- "__kernel void mulMat( " +
- " __global const double* A, int aColumns, " +
- " __global const double* B, int bColumns, " +
- " __global double* C, " +
- " __local double* As, " +
- " __local double* Bs " +
- ") { " +
- " // Block index\n" +
- " int bx = get_group_id(0);\n" +
- " int by = get_group_id(1);\n" +
- "\n" +
- " // Thread index\n" +
- " int tx = get_local_id(0);\n" +
- " int ty = get_local_id(1);\n" +
- "\n" +
- " // Index of the first sub-matrix of A processed by the block\n" +
- " int aBegin = aColumns * BLOCK_SIZE * by + aColumns * ty + tx;\n" +
- "\n" +
- " // Index of the last sub-matrix of A processed by the block\n" +
- " int aEnd = aBegin + aColumns;\n" +
- "\n" +
- " // Step size used to iterate through the sub-matrices of A\n" +
- " int aStep = BLOCK_SIZE;\n" +
- "\n" +
- " // Index of the first sub-matrix of B processed by the block\n" +
- " int bBegin = BLOCK_SIZE * bx + bColumns * ty + tx;\n" +
- "\n" +
- " // Step size used to iterate through the sub-matrices of B\n" +
- " int bStep = BLOCK_SIZE * bColumns;\n" +
- "\n" +
- " // total is used to store the element of the block sub-matrix\n" +
- " // that is computed by the thread\n" +
- " float total = 0.0f;\n" +
- "\n" +
- " // Loop over all the sub-matrices of A and B\n" +
- " // required to compute the block sub-matrix\n" +
- " for (int a = aBegin, b = bBegin;\n" +
- " a < aEnd;\n" +
- " a += aStep, b += bStep) {\n" +
- "\n" +
- " // Load the matrices from device memory\n" +
- " // to shared memory; each thread loads\n" +
- " // one element of each matrix\n" +
- " AS(ty, tx) = A[a];\n" +
- " BS(ty, tx) = B[b];\n" +
- "\t\n" +
- " // Synchronize to make sure the matrices are loaded\n" +
- " barrier(CLK_LOCAL_MEM_FENCE);\n" +
- "\n" +
- " // Multiply the two matrices together;\n" +
- " // each thread computes one element\n" +
- " // of the block sub-matrix \n" +
- " #pragma unroll\n" +
- " for (int k = 0; k < BLOCK_SIZE; ++k)\n" +
- " total += AS(ty, k) * BS(k, tx);\n" +
- "\n" +
- " // Synchronize to make sure that the preceding\n" +
- " // computation is done before loading two new\n" +
- " // sub-matrices of A and B in the next iteration\n" +
- " barrier(CLK_LOCAL_MEM_FENCE);\n" +
- " }\n" +
- "\n" +
- " C[get_global_id(1) * get_global_size(0) + get_global_id(0)] = total;\n" +
- "} "
- ;
- String clTypeName = prim.clTypeName();
- src = src.replaceAll("double", clTypeName);
- kernel = context.createProgram(src).createKernel("mulMat");
- matrixMultiplyKernels.put(key, kernel);
- }
- }
- synchronized (kernel) {
- kernel.setArgs(a, (int) aColumns, b, (int) bColumns, out,
- LocalSize.ofFloatArray(blockSize * blockSize),
- LocalSize.ofFloatArray(blockSize * blockSize));
- CLEvent evt = kernel.enqueueNDRange(queue,
- new int[]{(int) aRows, (int) bColumns},
- new int[]{blockSize, blockSize},
- eventsToWaitFor);
- return evt;
- }
- }
-
- public CLEvent naiveMatrixMultiply(Primitive prim,
- CLBuffer a, long aRows, long aColumns, long aStride,
- CLBuffer b, long bRows, long bColumns, long bStride,
- CLBuffer out, CLEvent... eventsToWaitFor) throws CLBuildException {
- if (out == null)
- throw new IllegalArgumentException("Null output matrix !");
- //if (out != null)
- // out = (CLBuffer)context.createBuffer(Usage.Output, prim.primitiveType, aRows * bColumns);
-
- CLKernel kernel;
- String key = "naive_" + prim;
- synchronized (matrixMultiplyKernels) {
- kernel = matrixMultiplyKernels.get(key);
- if (kernel == null) {
- String src = prim.getRequiredPragmas() +
- "__kernel void mulMat( " +
- " __global const double* a, int aRows, int aColumns, int aStride, " +
- " __global const double* b, int bColumns, int bStride, " +
- " __global double* c " +
- ") { " +
- " int i = get_global_id(0); " +
- " int j = get_global_id(1); " +
- " " +
- " if (i >= aRows || j >= bColumns) return; " +
- " double total = 0; " +
- " size_t iOff = i * (size_t)aStride; " +
- " for (long k = 0; k < aColumns; k++) { " +
- " total += a[iOff + k] * b[k * (size_t)bStride + j]; " +
- " } " +
- " c[i * (size_t)bStride + j] = total; " +
- "} "
- ;
- String clTypeName = prim.clTypeName();
- src = src.replaceAll("double", clTypeName);
- kernel = context.createProgram(src).createKernel("mulMat");
- matrixMultiplyKernels.put(key, kernel);
- }
- }
- synchronized (kernel) {
- // assert aStride == aColumns: ("Weird a stride: aStride = " + aStride + ", aColumns = " + aColumns);
- // assert bStride == bColumns: ("Weird b stride: bStride = " + bStride + ", bColumns = " + bColumns);
- kernel.setArgs(a, (int)aRows, (int)aColumns, (int)aStride, b, (int)bColumns, (int)bStride, out);
- CLEvent evt = kernel.enqueueNDRange(queue, new int [] { (int)aRows, (int)bColumns }, eventsToWaitFor);
- return evt;
- }
- }
-
- Map matrixTransposeKernels = new HashMap();
- public CLEvent matrixTranspose(Primitive prim, CLBuffer a, long aRows, long aColumns, long aStride, CLBuffer out, CLEvent... eventsToWaitFor) throws CLBuildException {
- if (out == null)
- throw new IllegalArgumentException("Null output matrix !");
- //if (out != null)
- // out = (CLBuffer)context.createBuffer(Usage.Output, prim.primitiveType, aRows * aColumns);
-
- CLKernel[] kernels;
- synchronized (matrixTransposeKernels) {
- kernels = matrixTransposeKernels.get(prim);
- if (kernels == null) {
- String src =
- prim.getRequiredPragmas() +
- "__kernel void transposeSelf( \n" +
- " __global double* a, int aRows, int aColumns, int aStride \n" +
- ") { \n" +
- " int i = get_global_id(0); \n" +
- " int j = get_global_id(1); \n" +
- " \n" +
- " if (i >= aRows || j >= aColumns || j >= i) return; \n" +
- " \n" +
- " size_t aIndex = i * aStride + j; \n" +
- " size_t outIndex = j * aRows + i; \n" +
- " double temp = a[outIndex]; \n" +
- " a[outIndex] = a[aIndex]; \n" +
- " a[aIndex] = temp; \n" +
- "} \n" +
- "__kernel void transposeOther( \n" +
- " __global const double* a, int aRows, int aColumns, int aStride, \n" +
- " __global double* out \n" +
- ") { \n" +
- " int i = get_global_id(0); \n" +
- " int j = get_global_id(1); \n" +
- " \n" +
- " if (i >= aRows || j >= aColumns) return; \n" +
- " \n" +
- " size_t aIndex = i * aStride + j; \n" +
- " size_t outIndex = j * aRows + i; \n" +
- " out[outIndex] = a[aIndex]; \n" +
- "} \n"
- ;
- String clTypeName = prim.clTypeName();
- src = src.replaceAll("double", clTypeName);
- CLProgram program = context.createProgram(src);
- kernels = new CLKernel[] { program.createKernel("transposeSelf"), program.createKernel("transposeOther") };
- matrixTransposeKernels.put(prim, kernels);
- }
- }
- boolean self = a.equals(out);
- CLKernel kernel = kernels[self ? 0 : 1];
- synchronized (kernel) {
- if (self)
- kernel.setArgs(a, (int)aRows, (int)aColumns, (int)aStride);
- else
- kernel.setArgs(a, (int)aRows, (int)aColumns, (int)aStride, out);
-
- CLEvent evt = kernel.enqueueNDRange(queue, new int [] { (int)aRows, (int)aColumns }, eventsToWaitFor);
- return evt;
- }
- }
-
- public CLContext getContext() {
- return context;
- }
-
- public CLQueue getQueue() {
- return queue;
- }
-
-}
diff --git a/Blas/src/main/java/com/nativelibs4java/opencl/blas/CLMatrix2D.java b/Blas/src/main/java/com/nativelibs4java/opencl/blas/CLMatrix2D.java
deleted file mode 100644
index e1ae8ec6..00000000
--- a/Blas/src/main/java/com/nativelibs4java/opencl/blas/CLMatrix2D.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-package com.nativelibs4java.opencl.blas;
-
-import com.nativelibs4java.opencl.CLBuffer;
-import com.nativelibs4java.opencl.CLContext;
-import com.nativelibs4java.opencl.CLQueue;
-import com.nativelibs4java.opencl.util.Primitive;
-import org.bridj.Pointer;
-
-/**
- *
- * @author ochafik
- */
-public interface CLMatrix2D {
-
- Primitive getPrimitive();
- Class getPrimitiveClass();
- CLEvents getEvents();
- CLBuffer getBuffer();
- CLContext getContext();
- CLQueue getQueue();
- long getRowCount();
- long getColumnCount();
- long getStride();
- int getBlockSize();
- CLMatrix2D blankClone();
- CLMatrix2D blankMatrix(long rows, long columns);
- CLKernels getKernels();
-
- void write(Pointer b);
- void read(Pointer b);
- Pointer read();
-}
diff --git a/Blas/src/main/java/com/nativelibs4java/opencl/blas/CLMatrixUtils.java b/Blas/src/main/java/com/nativelibs4java/opencl/blas/CLMatrixUtils.java
deleted file mode 100644
index 1a4d79b5..00000000
--- a/Blas/src/main/java/com/nativelibs4java/opencl/blas/CLMatrixUtils.java
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-package com.nativelibs4java.opencl.blas;
-
-import com.nativelibs4java.opencl.CLBuildException;
-import com.nativelibs4java.opencl.CLEvent;
-import com.nativelibs4java.opencl.util.Fun1;
-import com.nativelibs4java.opencl.util.Fun2;
-import com.nativelibs4java.opencl.util.Primitive;
-import com.nativelibs4java.opencl.util.ReductionUtils.Reductor;
-
-/**
- *
- * @author ochafik
- */
-public class CLMatrixUtils {
-
- static CLEvent[] join(CLEvent[]... evts) {
- int n = 0;
- for (CLEvent[] e : evts)
- n += e.length;
- CLEvent[] out = new CLEvent[n];
- n = 0;
- for (CLEvent[] e : evts)
- System.arraycopy(e, 0, out, n, e.length);
-
- return out;
- }
-
- public static long roundUp(long size, int blockSize) {
- return ((size + blockSize - 1) / blockSize) * blockSize;
- }
-
- public static void matrixMultiply(
- final CLMatrix2D a,
- final CLMatrix2D b,
- final CLMatrix2D out)
- throws CLBuildException
- {
- final CLKernels kernels = a.getKernels();
- final Primitive primitive = a.getPrimitive();
- a.getEvents().performRead(new CLEvents.Action() {
- public CLEvent perform(final CLEvent[] aevents) {
- return b.getEvents().performRead(new CLEvents.Action() {
- public CLEvent perform(final CLEvent[] bevents) {
- return out.getEvents().performWrite(new CLEvents.Action() {
- public CLEvent perform(final CLEvent[] cevents) {
- CLEvent evt = kernels.matrixMultiply(
- primitive,
- a.getBuffer(), a.getRowCount(), a.getColumnCount(), a.getStride(), a.getBlockSize(),
- b.getBuffer(), b.getRowCount(), b.getColumnCount(), b.getStride(), b.getBlockSize(),
- out.getBuffer(),
- join(aevents, bevents, cevents)
- );
- return evt;
- }
- });
- }
- });
- }
- });
- }
-
- static final int MAX_REDUCTION_SIZE = 32;
-
- public static void reduce(
- final CLMatrix2D in,
- final CLMatrix2D out,
- final Reductor reductor
- ) {
- in.getEvents().performRead(new CLEvents.Action() {
- public CLEvent perform(final CLEvent[] ievents) {
- return out.getEvents().performWrite(new CLEvents.Action() {
- public CLEvent perform(CLEvent[] oevents) {
- return reductor.reduce(in.getQueue(), in.getBuffer(), in.getBuffer().getElementCount(), out.getBuffer(), MAX_REDUCTION_SIZE, join(ievents, oevents));
- }
- });
- }
- });
- }
- public static void matrixTranspose(
- final CLMatrix2D a,
- final CLMatrix2D out)
- throws CLBuildException
- {
- final Primitive primitive = a.getPrimitive();
- final CLKernels kernels = a.getKernels();
- a.getEvents().performRead(new CLEvents.Action() {
- public CLEvent perform(final CLEvent[] aevents) {
- return out.getEvents().performWrite(new CLEvents.Action() {
- public CLEvent perform(final CLEvent[] cevents) {
- CLEvent evt = kernels.matrixTranspose(
- primitive,
- a.getBuffer(),
- a.getRowCount(), a.getColumnCount(), a.getStride(),
- out.getBuffer(),
- join(aevents, cevents)
- );
- return evt;
- }
- });
- }
- });
- }
-
- public static CLMatrix2D clone(final CLMatrix2D matrix) {
- final CLMatrix2D out = matrix.blankClone();
- matrix.getEvents().performRead(new CLEvents.Action() {
- public CLEvent perform(final CLEvent[] aevents) {
- return out.getEvents().performWrite(new CLEvents.Action() {
- public CLEvent perform(CLEvent[] bevents) {
- return matrix.getBuffer().copyTo(matrix.getQueue(), out.getBuffer(), CLMatrixUtils.join(aevents, bevents));
- }
- });
- }
- });
- return out;
- }
-
-
- public static CLMatrix2D createMatrix(long rows, long columns, Class elementClass, CLKernels kernels) {
- if (elementClass == Double.class)
- return (CLMatrix2D)new CLDefaultMatrix2D(Primitive.Double, null, rows, columns, kernels);
-
- throw new UnsupportedOperationException("Cannot build buffers of " + elementClass.getName() + " yet");
- }
-
-
- public static CLMatrix2D op1(final CLMatrix2D in, final Fun1 fun, final CLMatrix2D out) throws CLBuildException {
- in.getEvents().performRead(new CLEvents.Action() {
-
- public CLEvent perform(final CLEvent[] ievents) {
- return out.getEvents().performWrite(new CLEvents.Action() {
-
- public CLEvent perform(CLEvent[] oevents) {
- return in.getKernels().op1(in.getPrimitive(), fun, in.getBuffer(),
- in.getRowCount(), in.getColumnCount(), in.getStride(),
- out.getBuffer(), join(ievents, oevents));
- }
- });
- }
- });
- return out;
- }
-
-
- public static CLMatrix2D op2(final CLMatrix2D in1, final Fun2 fun, final CLMatrix2D in2, final CLMatrix2D out) throws CLBuildException {
- in1.getEvents().performRead(new CLEvents.Action() {
-
- public CLEvent perform(final CLEvent[] i1events) {
- return in2.getEvents().performRead(new CLEvents.Action() {
-
- public CLEvent perform(final CLEvent[] i2events) {
- return out.getEvents().performWrite(new CLEvents.Action() {
-
- public CLEvent perform(CLEvent[] oevents) {
- return in1.getKernels().op2(in1.getPrimitive(), fun,
- in1.getBuffer(), in2.getBuffer(),
- in1.getRowCount(), in1.getColumnCount(), in1.getStride(),
- out.getBuffer(), join(i1events, i2events, oevents));
- }
- });
- }
- });
- }
- });
- return out;
- }
-
- public static CLMatrix2D op2(final CLMatrix2D in, final Fun2 fun, final V s2, final CLMatrix2D out) throws CLBuildException {
- in.getEvents().performRead(new CLEvents.Action() {
- public CLEvent perform(final CLEvent[] ievents) {
- return out.getEvents().performWrite(new CLEvents.Action() {
-
- public CLEvent perform(CLEvent[] oevents) {
- return in.getKernels().op2(
- in.getPrimitive(), fun, in.getBuffer(), s2,
- in.getRowCount(), in.getColumnCount(), in.getStride(),
- out.getBuffer(),
- join(ievents, oevents));
- }
- });
- }
- });
- return out;
- }
-
-}
diff --git a/Blas/src/main/java/com/nativelibs4java/opencl/blas/ujmp/CLDenseDoubleMatrix2D.java b/Blas/src/main/java/com/nativelibs4java/opencl/blas/ujmp/CLDenseDoubleMatrix2D.java
deleted file mode 100644
index 04d41160..00000000
--- a/Blas/src/main/java/com/nativelibs4java/opencl/blas/ujmp/CLDenseDoubleMatrix2D.java
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package com.nativelibs4java.opencl.blas.ujmp;
-
-import com.nativelibs4java.opencl.blas.CLMatrix2D;
-import com.nativelibs4java.opencl.blas.CLMatrixUtils;
-import com.nativelibs4java.opencl.blas.CLDefaultMatrix2D;
-import com.nativelibs4java.opencl.blas.CLKernels;
-import com.nativelibs4java.opencl.blas.CLEvents.Action;
-import java.nio.DoubleBuffer;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.ujmp.core.Matrix;
-import org.ujmp.core.calculation.Calculation.Ret;
-import org.ujmp.core.doublematrix.DoubleMatrix2D;
-import org.ujmp.core.exceptions.MatrixException;
-
-import com.nativelibs4java.opencl.CLBuildException;
-import com.nativelibs4java.opencl.CLEvent;
-import com.nativelibs4java.opencl.CLMem.MapFlags;
-import com.nativelibs4java.opencl.CLMem.Usage;
-import com.nativelibs4java.opencl.CLQueue;
-import com.nativelibs4java.opencl.CLBuffer;
-import com.nativelibs4java.opencl.CLContext;
-import com.nativelibs4java.opencl.util.LinearAlgebraUtils;
-import com.nativelibs4java.opencl.util.Primitive;
-import com.nativelibs4java.util.NIOUtils;
-import java.nio.Buffer;
-import org.bridj.Pointer;
-import org.ujmp.core.doublematrix.stub.AbstractDenseDoubleMatrix2D;
-import org.ujmp.core.matrix.Matrix2D;
-
-/**
- *
- * @author ochafik
- */
-public class CLDenseDoubleMatrix2D extends AbstractDenseDoubleMatrix2D {
-
- protected final CLDenseMatrix2DImpl impl;
-
- public CLDenseMatrix2DImpl getImpl() {
- return impl;
- }
- public CLDenseDoubleMatrix2D(CLDenseMatrix2DImpl impl) {
- this.impl = impl;
- }
- public CLDenseDoubleMatrix2D(CLMatrix2D matrix) {
- this(new CLDenseMatrix2DImpl(matrix));
- }
- public CLDenseDoubleMatrix2D(long rows, long columns, CLKernels kernels) {
- this(new CLDefaultMatrix2D(Primitive.Double, null, rows, columns, kernels));
- }
- public CLDenseDoubleMatrix2D(long rows, long columns, CLKernels clUJMP, int blockSize) {
- this(new CLDefaultMatrix2D(Primitive.Double, null, rows, columns, blockSize, clUJMP));
- }
- public CLDenseDoubleMatrix2D(long rows, long columns) {
- this(rows, columns, CLKernels.getInstance());
- }
- public CLDenseDoubleMatrix2D(long size) {
- this(size, size);
- }
- public CLDenseDoubleMatrix2D(long... size) {
- this(size[0], size[1], CLKernels.getInstance());
- }
-
- public long getStride() {
- return getImpl().getStride();
- }
-
- public void write(Pointer p) {
- getImpl().write(p);
- }
-
- public void read(Pointer p) {
- getImpl().read(p);
- }
-
- public Pointer read() {
- return getImpl().read();
- }
-
- static CLDenseDoubleMatrix2D inst(CLMatrix2D matrix) {
- return new CLDenseDoubleMatrix2D(matrix);
- }
-
- static CLDenseDoubleMatrix2D inst(CLDenseMatrix2DImpl matrix) {
- return new CLDenseDoubleMatrix2D(matrix);
- }
-
- @Override
- public Matrix mtimes(Ret returnType, boolean ignoreNaN, Matrix matrix) throws MatrixException {
- if (matrix instanceof Matrix2D) {
- return inst(getImpl().multiply(returnType, ignoreNaN, (Matrix2D)matrix));
- } else {
- return super.mtimes(returnType, ignoreNaN, matrix);
- }
- }
-
- @Override
- public Matrix mtimes(Matrix matrix) throws MatrixException {
- return mtimes(Ret.NEW, true, matrix);
- }
-
-
-
- @Override
- public Iterable