/*
 * Decompiled with CFR 0.152.
 */
package org.nd4j.linalg.jcublas;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import org.bytedeco.javacpp.BytePointer;
import org.bytedeco.javacpp.IntPointer;
import org.bytedeco.javacpp.Loader;
import org.bytedeco.javacpp.LongPointer;
import org.bytedeco.javacpp.Pointer;
import org.bytedeco.javacpp.PointerPointer;
import org.nd4j.common.base.Preconditions;
import org.nd4j.common.primitives.Pair;
import org.nd4j.common.util.ArrayUtil;
import org.nd4j.jita.allocator.enums.CudaConstants;
import org.nd4j.jita.allocator.impl.AllocationPoint;
import org.nd4j.jita.allocator.impl.AtomicAllocator;
import org.nd4j.jita.allocator.pointers.CudaPointer;
import org.nd4j.jita.allocator.pointers.cuda.cudaStream_t;
import org.nd4j.jita.conf.CudaEnvironment;
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.buffer.DataTypeEx;
import org.nd4j.linalg.api.memory.MemcpyDirection;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.memory.enums.MemoryKind;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.CustomOp;
import org.nd4j.linalg.api.ops.custom.Flatten;
import org.nd4j.linalg.api.ops.executioner.GridExecutioner;
import org.nd4j.linalg.api.ops.impl.shape.Concat;
import org.nd4j.linalg.api.ops.performance.PerformanceTracker;
import org.nd4j.linalg.api.shape.Shape;
import org.nd4j.linalg.api.shape.options.ArrayOptionsHelper;
import org.nd4j.linalg.api.shape.options.ArrayType;
import org.nd4j.linalg.cache.TADManager;
import org.nd4j.linalg.compression.CompressedDataBuffer;
import org.nd4j.linalg.compression.CompressionDescriptor;
import org.nd4j.linalg.compression.CompressionType;
import org.nd4j.linalg.compression.CompressionUtils;
import org.nd4j.linalg.exception.ND4JIllegalStateException;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.jcublas.JCublasNDArray;
import org.nd4j.linalg.jcublas.blas.CudaBlas;
import org.nd4j.linalg.jcublas.blas.JcublasLapack;
import org.nd4j.linalg.jcublas.blas.JcublasLevel1;
import org.nd4j.linalg.jcublas.blas.JcublasLevel2;
import org.nd4j.linalg.jcublas.blas.JcublasLevel3;
import org.nd4j.linalg.jcublas.buffer.AddressRetriever;
import org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer;
import org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer;
import org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer;
import org.nd4j.linalg.jcublas.buffer.CudaLongDataBuffer;
import org.nd4j.linalg.jcublas.buffer.CudaUtf8Buffer;
import org.nd4j.linalg.jcublas.context.CudaContext;
import org.nd4j.nativeblas.BaseNativeNDArrayFactory;
import org.nd4j.nativeblas.LongPointerWrapper;
import org.nd4j.nativeblas.OpaqueDataBuffer;
import org.nd4j.nativeblas.PointerPointerWrapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class JCublasNDArrayFactory
extends BaseNativeNDArrayFactory {
    private static final Logger log = LoggerFactory.getLogger(JCublasNDArrayFactory.class);

    public JCublasNDArrayFactory() {
    }

    public JCublasNDArrayFactory(DataType dtype, Character order) {
        super(dtype, order);
    }

    public JCublasNDArrayFactory(DataType dtype, char order) {
        super(dtype, order);
        AtomicAllocator.getInstance();
    }

    public void createBlas() {
        this.blas = new CudaBlas();
        PointerPointer functions = new PointerPointer(13L);
        functions.put(0L, Loader.addressof((String)"cublasSgemv_v2"));
        functions.put(1L, Loader.addressof((String)"cublasDgemv_v2"));
        functions.put(2L, Loader.addressof((String)"cublasHgemm"));
        functions.put(3L, Loader.addressof((String)"cublasSgemm_v2"));
        functions.put(4L, Loader.addressof((String)"cublasDgemm_v2"));
        functions.put(5L, Loader.addressof((String)"cublasSgemmEx"));
        functions.put(6L, Loader.addressof((String)"cublasHgemmBatched"));
        functions.put(7L, Loader.addressof((String)"cublasSgemmBatched"));
        functions.put(8L, Loader.addressof((String)"cublasDgemmBatched"));
        functions.put(9L, Loader.addressof((String)"cusolverDnSgesvd_bufferSize"));
        functions.put(10L, Loader.addressof((String)"cusolverDnDgesvd_bufferSize"));
        functions.put(11L, Loader.addressof((String)"cusolverDnSgesvd"));
        functions.put(12L, Loader.addressof((String)"cusolverDnDgesvd"));
        this.nativeOps.initializeFunctions(functions);
        if (this.nativeOps.lastErrorCode() != 0) {
            throw new RuntimeException(this.nativeOps.lastErrorMessage());
        }
    }

    public void createLevel1() {
        this.level1 = new JcublasLevel1();
    }

    public void createLevel2() {
        this.level2 = new JcublasLevel2();
    }

    public void createLevel3() {
        this.level3 = new JcublasLevel3();
    }

    public void createLapack() {
        this.lapack = new JcublasLapack();
    }

    public INDArray create(int[] shape, DataBuffer buffer) {
        return new JCublasNDArray(shape, buffer);
    }

    public INDArray create(double[][] data) {
        return new JCublasNDArray(data);
    }

    public INDArray create(double[][] data, char ordering) {
        return new JCublasNDArray(data, ordering);
    }

    public INDArray create(DataBuffer data) {
        return new JCublasNDArray(data);
    }

    public INDArray create(DataBuffer data, long rows, long columns, int[] stride, long offset) {
        return new JCublasNDArray(data, new long[]{rows, columns}, ArrayUtil.toLongArray((int[])stride), Nd4j.order().charValue(), data.dataType());
    }

    public INDArray create(int[] shape, char ordering) {
        return new JCublasNDArray(shape, ordering);
    }

    public INDArray createUninitialized(int[] shape, char ordering) {
        return new JCublasNDArray(shape, Nd4j.getStrides((int[])shape, (char)ordering), 0L, ordering, false);
    }

    public INDArray create(DataBuffer data, int[] newShape, int[] newStride, long offset, char ordering) {
        return new JCublasNDArray(data, newShape, newStride, offset, ordering);
    }

    public INDArray create(float[] data, int[] shape, long offset, Character order) {
        return new JCublasNDArray(data, shape, offset, order.charValue());
    }

    public INDArray create(float[] data, long rows, long columns, int[] stride, long offset, char ordering) {
        return new JCublasNDArray(data, new long[]{rows, columns}, ArrayUtil.toLongArray((int[])stride), offset, ordering);
    }

    public INDArray create(double[] data, int[] shape, char ordering) {
        return new JCublasNDArray(data, shape, ordering);
    }

    public INDArray create(double[] data, long[] shape, char ordering) {
        return new JCublasNDArray(data, shape, ordering);
    }

    public INDArray create(Collection<String> strings, long[] shape, char order) {
        Pair pairShape = Nd4j.getShapeInfoProvider().createShapeInformation(shape, order, DataType.UTF8);
        CudaUtf8Buffer buffer = new CudaUtf8Buffer(strings);
        ArrayList<String> list = new ArrayList<String>(strings);
        return Nd4j.createArrayFromShapeBuffer((DataBuffer)buffer, (Pair)pairShape);
    }

    public INDArray create(List<INDArray> list, int[] shape, char ordering) {
        return new JCublasNDArray(list, shape, ordering);
    }

    public INDArray create(double[] data, int[] shape, long offset) {
        return new JCublasNDArray(data, shape, (char)offset);
    }

    public INDArray create(double[] data, int[] shape, int[] stride, long offset, char ordering) {
        return new JCublasNDArray(data, shape, stride, offset, ordering);
    }

    public INDArray create(float[] data, int[] shape, int[] stride, long offset) {
        return new JCublasNDArray(data, shape, stride, offset);
    }

    public INDArray create(double[] data, int[] shape, int[] stride, long offset) {
        return new JCublasNDArray(data, shape, stride, offset);
    }

    public INDArray create(DataBuffer data, int[] shape) {
        return new JCublasNDArray(data, shape);
    }

    public INDArray create(DataBuffer data, int[] shape, int[] stride, long offset) {
        return new JCublasNDArray(data, ArrayUtil.toLongArray((int[])shape), ArrayUtil.toLongArray((int[])stride), Nd4j.order().charValue(), data.dataType());
    }

    public INDArray create(List<INDArray> list, int[] shape) {
        if (this.order == 'f') {
            return new JCublasNDArray(list, shape, ArrayUtil.calcStridesFortran((int[])shape));
        }
        return new JCublasNDArray(list, shape);
    }

    public INDArray create(float[] data, int[] shape, long offset) {
        return new JCublasNDArray(data, shape, offset);
    }

    public INDArray create(float[] data, long[] shape, long[] stride, char order, DataType dataType, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createTypedBuffer((float[])data, (DataType)dataType, (MemoryWorkspace)workspace), shape, stride, order, dataType);
    }

    public INDArray create(long[] data, long[] shape, long[] stride, char order, DataType dataType, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createTypedBuffer((long[])data, (DataType)dataType), shape, stride, order, dataType);
    }

    public INDArray create(int[] data, long[] shape, long[] stride, char order, DataType dataType, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createTypedBuffer((int[])data, (DataType)dataType), shape, stride, order, dataType);
    }

    public INDArray create(short[] data, long[] shape, long[] stride, char order, DataType dataType, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createTypedBuffer((short[])data, (DataType)dataType), shape, stride, order, dataType);
    }

    public INDArray create(byte[] data, long[] shape, long[] stride, char order, DataType dataType, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createTypedBuffer((byte[])data, (DataType)dataType), shape, stride, order, dataType);
    }

    public INDArray create(boolean[] data, long[] shape, long[] stride, char order, DataType dataType, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createTypedBuffer((boolean[])data, (DataType)dataType), shape, stride, order, dataType);
    }

    public INDArray create(float[][] floats) {
        return new JCublasNDArray(floats);
    }

    public INDArray create(float[][] data, char ordering) {
        return new JCublasNDArray(data, ordering);
    }

    public INDArray create(float[] data, int[] shape, int[] stride, long offset, char ordering) {
        return new JCublasNDArray(data, shape, stride, offset, ordering);
    }

    public INDArray create(DataBuffer buffer, int[] shape, long offset) {
        return new JCublasNDArray(buffer, shape, offset);
    }

    public INDArray toFlattened(Collection<INDArray> matrices) {
        return this.toFlattened(this.order(), matrices);
    }

    public INDArray toFlattened(char order, Collection<INDArray> matrices) {
        if (Nd4j.getExecutioner() instanceof GridExecutioner) {
            ((GridExecutioner)Nd4j.getExecutioner()).flushQueue();
        }
        return Nd4j.exec((CustomOp)new Flatten(order, matrices.toArray(new INDArray[0])))[0];
    }

    public INDArray concat(int dimension, INDArray ... toConcat) {
        Nd4j.getExecutioner().push();
        return Nd4j.exec((CustomOp)new Concat(dimension, toConcat))[0];
    }

    public INDArray specialConcat(int dimension, INDArray ... toConcat) {
        if (toConcat.length == 1) {
            return toConcat[0];
        }
        if (Nd4j.getExecutioner() instanceof GridExecutioner) {
            ((GridExecutioner)Nd4j.getExecutioner()).flushQueue();
        }
        PointerPointer shapeInfoPointers = new PointerPointer((long)toConcat.length);
        PointerPointer dataPointers = new PointerPointer((long)toConcat.length);
        AtomicAllocator allocator = AtomicAllocator.getInstance();
        CudaContext context = allocator.getDeviceContext();
        int sumAlongDim = 0;
        long[] outputShape = ArrayUtil.copy((long[])toConcat[0].shape());
        for (int i = 0; i < toConcat.length; ++i) {
            ((BaseCudaDataBuffer)toConcat[i].data()).lazyAllocateHostPointer();
            if (toConcat[i].isCompressed()) {
                Nd4j.getCompressor().decompressi(toConcat[i]);
            }
            allocator.synchronizeHostData(toConcat[i]);
            shapeInfoPointers.put((long)i, allocator.getHostPointer(toConcat[i].shapeInfoDataBuffer()));
            dataPointers.put((long)i, allocator.getHostPointer(toConcat[i].data()));
            sumAlongDim = (int)((long)sumAlongDim + toConcat[i].size(dimension));
            for (int j = 0; j < toConcat[i].rank(); ++j) {
                if (j == dimension || toConcat[i].size(j) == outputShape[j]) continue;
                throw new IllegalArgumentException("Illegal concatenation at array " + i + " and shape element " + j);
            }
        }
        outputShape[dimension] = sumAlongDim;
        INDArray ret = Nd4j.createUninitialized((DataType)toConcat[0].dataType(), (long[])outputShape, (char)Nd4j.order().charValue());
        ((BaseCudaDataBuffer)ret.data()).lazyAllocateHostPointer();
        this.nativeOps.specialConcat(null, dimension, toConcat.length, dataPointers, shapeInfoPointers, ret.data().addressPointer(), (LongPointer)ret.shapeInfoDataBuffer().addressPointer(), null, null);
        if (this.nativeOps.lastErrorCode() != 0) {
            throw new RuntimeException(this.nativeOps.lastErrorMessage());
        }
        AllocationPoint point = allocator.getAllocationPoint(ret);
        long perfD = PerformanceTracker.getInstance().helperStartTransaction();
        this.nativeOps.memcpyAsync(point.getDevicePointer(), point.getHostPointer(), ret.length() * (long)Nd4j.sizeOfDataType((DataType)ret.data().dataType()), CudaConstants.cudaMemcpyHostToDevice, (Pointer)context.getSpecialStream());
        context.getSpecialStream().synchronize();
        if (this.nativeOps.lastErrorCode() != 0) {
            throw new RuntimeException(this.nativeOps.lastErrorMessage());
        }
        PerformanceTracker.getInstance().helperRegisterTransaction(point.getDeviceId(), perfD, point.getNumberOfBytes(), MemcpyDirection.HOST_TO_DEVICE);
        point.tickHostRead();
        point.tickDeviceWrite();
        return ret;
    }

    public INDArray pullRows(INDArray source, int sourceDimension, int[] indexes) {
        return this.pullRows(source, sourceDimension, indexes, Nd4j.order().charValue());
    }

    public INDArray pullRows(INDArray source, int sourceDimension, long[] indexes) {
        return this.pullRows(source, sourceDimension, ArrayUtil.toInts((long[])indexes));
    }

    public INDArray pullRows(INDArray source, int sourceDimension, int[] indexes, char order) {
        long[] shape;
        if (indexes == null || indexes.length < 1) {
            throw new IllegalStateException("Indexes can't be null or zero-length");
        }
        if (source.rank() == 1) {
            shape = new long[]{indexes.length};
        } else if (sourceDimension == 1) {
            shape = new long[]{indexes.length, source.shape()[sourceDimension]};
        } else if (sourceDimension == 0) {
            shape = new long[]{source.shape()[sourceDimension], indexes.length};
        } else {
            throw new UnsupportedOperationException("2D input is expected");
        }
        return this.pullRows(source, Nd4j.createUninitialized((DataType)source.dataType(), (long[])shape, (char)order), sourceDimension, indexes);
    }

    public INDArray pullRows(INDArray source, INDArray destination, int sourceDimension, int[] indexes) {
        Nd4j.getExecutioner().push();
        if (indexes == null || indexes.length < 1) {
            throw new IllegalStateException("Indexes can't be null or zero-length");
        }
        Preconditions.checkArgument((source.dataType() == destination.dataType() ? 1 : 0) != 0, (String)"Source and Destination data types must be the same");
        long[] shape = null;
        if (source.rank() == 1) {
            shape = new long[]{indexes.length};
        } else if (sourceDimension == 1) {
            shape = new long[]{indexes.length, source.shape()[sourceDimension]};
        } else if (sourceDimension == 0) {
            shape = new long[]{source.shape()[sourceDimension], indexes.length};
        } else {
            throw new UnsupportedOperationException("2D input is expected");
        }
        INDArray ret = destination;
        if (ret == null) {
            ret = Nd4j.createUninitialized((DataType)source.dataType(), (long[])shape, (char)this.order);
        } else if (!Arrays.equals(shape, destination.shape())) {
            throw new IllegalStateException("Cannot pull rows into destination array: expected destination array of shape " + Arrays.toString(shape) + " but got destination array of shape " + Arrays.toString(destination.shape()));
        }
        AtomicAllocator allocator = AtomicAllocator.getInstance();
        CudaContext context = allocator.getFlowController().prepareAction(ret, source);
        OpaqueDataBuffer x = ((BaseCudaDataBuffer)source.data()).getOpaqueDataBuffer();
        OpaqueDataBuffer z = ((BaseCudaDataBuffer)ret.data()).getOpaqueDataBuffer();
        Pointer xShape = AtomicAllocator.getInstance().getPointer(source.shapeInfoDataBuffer(), context);
        Pointer zShape = AtomicAllocator.getInstance().getPointer(ret.shapeInfoDataBuffer(), context);
        PointerPointer extras = new PointerPointer(new Pointer[]{AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), context.getOldStream(), allocator.getDeviceIdPointer()});
        CudaLongDataBuffer tempIndexes = new CudaLongDataBuffer(indexes.length);
        AtomicAllocator.getInstance().memcpyBlocking(tempIndexes, (Pointer)new LongPointer(ArrayUtil.toLongArray((int[])indexes)), indexes.length * 8, 0L);
        Pointer pIndex = AtomicAllocator.getInstance().getPointer(tempIndexes, context);
        TADManager tadManager = Nd4j.getExecutioner().getTADManager();
        Pair tadBuffers = tadManager.getTADOnlyShapeInfo(source, new int[]{sourceDimension});
        Pair zTadBuffers = tadManager.getTADOnlyShapeInfo(ret, new int[]{sourceDimension});
        Pointer tadShapeInfo = AtomicAllocator.getInstance().getPointer((DataBuffer)tadBuffers.getFirst(), context);
        Pointer zTadShapeInfo = AtomicAllocator.getInstance().getPointer((DataBuffer)zTadBuffers.getFirst(), context);
        DataBuffer offsets = (DataBuffer)tadBuffers.getSecond();
        Pointer tadOffsets = AtomicAllocator.getInstance().getPointer(offsets, context);
        Pointer zTadOffsets = AtomicAllocator.getInstance().getPointer((DataBuffer)zTadBuffers.getSecond(), context);
        this.nativeOps.pullRows(extras, x, (LongPointer)source.shapeInfoDataBuffer().addressPointer(), (LongPointer)xShape, z, (LongPointer)ret.shapeInfoDataBuffer().addressPointer(), (LongPointer)zShape, (long)indexes.length, (LongPointer)pIndex, (LongPointer)tadShapeInfo, (LongPointer)new LongPointerWrapper(tadOffsets), (LongPointer)zTadShapeInfo, (LongPointer)new LongPointerWrapper(zTadOffsets));
        if (this.nativeOps.lastErrorCode() != 0) {
            throw new RuntimeException(this.nativeOps.lastErrorMessage());
        }
        allocator.registerAction(context, ret, source);
        return ret;
    }

    public INDArray accumulate(INDArray target, INDArray ... arrays) {
        if (arrays == null || arrays.length == 0) {
            throw new RuntimeException("Input arrays are missing");
        }
        if (arrays.length == 1) {
            return target.assign(arrays[0]);
        }
        Nd4j.getExecutioner().push();
        long len = target.length();
        AtomicAllocator allocator = AtomicAllocator.getInstance();
        CudaContext context = allocator.getFlowController().prepareAction(target, arrays);
        PointerPointer extras = new PointerPointer(new Pointer[]{null, context.getOldStream(), allocator.getDeviceIdPointer(), new CudaPointer(0L)});
        Pointer z = AtomicAllocator.getInstance().getPointer(target, context);
        long[] xPointers = new long[arrays.length];
        for (int i = 0; i < arrays.length; ++i) {
            if (arrays[i].elementWiseStride() != 1) {
                throw new ND4JIllegalStateException("Native averaging is applicable only to continuous INDArrays");
            }
            if (arrays[i].length() != len) {
                throw new ND4JIllegalStateException("All arrays should have equal length for averaging");
            }
            AllocationPoint point = allocator.getAllocationPoint(arrays[i]);
            xPointers[i] = point.getDevicePointer().address();
            point.tickDeviceWrite();
        }
        CudaDoubleDataBuffer tempX = new CudaDoubleDataBuffer(arrays.length);
        allocator.memcpyBlocking(tempX, (Pointer)new LongPointer(xPointers), xPointers.length * 8, 0L);
        PointerPointer x = new PointerPointer(AtomicAllocator.getInstance().getPointer(tempX, context));
        this.nativeOps.accumulate(extras, null, (LongPointer)arrays[0].shapeInfoDataBuffer().addressPointer(), x, null, null, (LongPointer)allocator.getHostPointer(target.shapeInfoDataBuffer()), z, (LongPointer)allocator.getPointer(target.shapeInfoDataBuffer()), arrays.length, len);
        if (this.nativeOps.lastErrorCode() != 0) {
            throw new RuntimeException(this.nativeOps.lastErrorMessage());
        }
        allocator.getFlowController().registerAction(context, target, arrays);
        return target;
    }

    public INDArray average(INDArray target, INDArray[] arrays) {
        int i;
        if (arrays == null || arrays.length == 0) {
            throw new RuntimeException("Input arrays are missing");
        }
        if (arrays.length == 1) {
            if (target == null) {
                return null;
            }
            return target.assign(arrays[0]);
        }
        if (this.nativeOps.isP2PAvailable() && CudaEnvironment.getInstance().getConfiguration().isCrossDeviceAccessAllowed()) {
            Nd4j.getExecutioner().push();
            long len = target != null ? target.length() : arrays[0].length();
            AtomicAllocator allocator = AtomicAllocator.getInstance();
            CudaContext context = allocator.getFlowController().prepareAction(target, arrays);
            PointerPointer extras = new PointerPointer(new Pointer[]{null, context.getOldStream(), allocator.getDeviceIdPointer(), new CudaPointer(0L)});
            Pointer z = target == null ? null : AtomicAllocator.getInstance().getPointer(target, context);
            long[] xPointers = new long[arrays.length];
            for (int i2 = 0; i2 < arrays.length; ++i2) {
                if (arrays[i2].elementWiseStride() != 1) {
                    throw new ND4JIllegalStateException("Native averaging is applicable only to continuous INDArrays");
                }
                if (arrays[i2].length() != len) {
                    throw new ND4JIllegalStateException("All arrays should have equal length for averaging");
                }
                AllocationPoint point = allocator.getAllocationPoint(arrays[i2]);
                xPointers[i2] = point.getDevicePointer().address();
                point.tickDeviceWrite();
            }
            CudaDoubleDataBuffer tempX = new CudaDoubleDataBuffer(arrays.length);
            allocator.memcpyBlocking(tempX, (Pointer)new LongPointer(xPointers), xPointers.length * 8, 0L);
            PointerPointer x = new PointerPointer(AtomicAllocator.getInstance().getPointer(tempX, context));
            this.nativeOps.average(extras, null, (LongPointer)arrays[0].shapeInfoDataBuffer().addressPointer(), x, null, null, (LongPointer)(target == null ? null : target.shapeInfoDataBuffer().addressPointer()), target == null ? null : z, null, arrays.length, len, true);
            if (this.nativeOps.lastErrorCode() != 0) {
                throw new RuntimeException(this.nativeOps.lastErrorMessage());
            }
            allocator.getFlowController().registerAction(context, target, arrays);
            return target;
        }
        long len = target == null ? arrays[0].length() : target.length();
        CudaContext context = AtomicAllocator.getInstance().getDeviceContext();
        PointerPointer dataPointers = new PointerPointer((long)arrays.length);
        PointerPointer extras = new PointerPointer(new Pointer[]{null, context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer(), new CudaPointer(1L)});
        for (i = 0; i < arrays.length; ++i) {
            Nd4j.getCompressor().autoDecompress(arrays[i]);
            if (arrays[i].elementWiseStride() != 1) {
                throw new ND4JIllegalStateException("Native averaging is applicable only to continuous INDArrays");
            }
            if (arrays[i].length() != len) {
                throw new ND4JIllegalStateException("All arrays should have equal length for averaging");
            }
            ((BaseCudaDataBuffer)arrays[i].data()).lazyAllocateHostPointer();
            dataPointers.put((long)i, AtomicAllocator.getInstance().getHostPointer(arrays[i]));
        }
        if (target != null) {
            ((BaseCudaDataBuffer)target.data()).lazyAllocateHostPointer();
        }
        this.nativeOps.average(extras, dataPointers, (LongPointer)arrays[0].shapeInfoDataBuffer().addressPointer(), null, null, target == null ? null : target.data().addressPointer(), (LongPointer)(target == null ? null : target.shapeInfoDataBuffer().addressPointer()), null, null, arrays.length, len, true);
        if (this.nativeOps.lastErrorCode() != 0) {
            throw new RuntimeException(this.nativeOps.lastErrorMessage());
        }
        if (target != null) {
            AtomicAllocator.getInstance().getAllocationPoint(target).tickHostWrite();
        }
        for (i = 0; i < arrays.length; ++i) {
            AtomicAllocator.getInstance().getAllocationPoint(arrays[i]).tickHostWrite();
        }
        return target;
    }

    public INDArray average(Collection<INDArray> arrays) {
        return this.average(arrays.toArray(new INDArray[0]));
    }

    public INDArray average(INDArray[] arrays) {
        if (arrays == null || arrays.length == 0) {
            throw new RuntimeException("Input arrays are missing");
        }
        INDArray ret = Nd4j.createUninitialized((DataType)arrays[0].dataType(), (long[])arrays[0].shape(), (char)arrays[0].ordering());
        return this.average(ret, arrays);
    }

    public INDArray average(INDArray target, Collection<INDArray> arrays) {
        return this.average(target, arrays.toArray(new INDArray[0]));
    }

    public void shuffle(INDArray array, Random rnd, int ... dimension) {
        this.shuffle(Collections.singletonList(array), rnd, dimension);
    }

    public void shuffle(List<INDArray> arrays, Random rnd, List<int[]> dimensions) {
        if (dimensions == null || dimensions.size() == 0) {
            throw new RuntimeException("Dimension can't be null or 0-length");
        }
        if (arrays == null || arrays.size() == 0) {
            throw new RuntimeException("No input arrays provided");
        }
        if (dimensions.size() > 1 && arrays.size() != dimensions.size()) {
            throw new IllegalStateException("Number of dimensions do not match number of arrays to shuffle");
        }
        Nd4j.getExecutioner().push();
        AtomicAllocator allocator = AtomicAllocator.getInstance();
        CudaContext context = null;
        for (int x = 0; x < arrays.size(); ++x) {
            context = allocator.getFlowController().prepareAction(arrays.get(x), new INDArray[0]);
        }
        INDArray zero = arrays.get(0);
        int tadLength = 1;
        if (zero.rank() > 1) {
            for (int i = 0; i < dimensions.get(0).length; ++i) {
                tadLength = (int)((long)tadLength * zero.shape()[dimensions.get(0)[i]]);
            }
        }
        long numTads = zero.length() / (long)tadLength;
        int[] map = ArrayUtil.buildInterleavedVector((Random)rnd, (int)((int)numTads));
        CudaIntDataBuffer shuffle = new CudaIntDataBuffer(map);
        Pointer shuffleMap = allocator.getPointer(shuffle, context);
        PointerPointer extras = new PointerPointer(new Pointer[]{null, context.getOldStream(), allocator.getDeviceIdPointer()});
        long[] hPointers = new long[arrays.size()];
        long[] xPointers = new long[arrays.size()];
        long[] xShapes = new long[arrays.size()];
        long[] tadShapes = new long[arrays.size()];
        long[] tadOffsets = new long[arrays.size()];
        for (int i = 0; i < arrays.size(); ++i) {
            INDArray array = arrays.get(i);
            AllocationPoint point = allocator.getAllocationPoint(array);
            if (point.isActualOnHostSide()) {
                AtomicAllocator.getInstance().getFlowController().synchronizeToDevice(point);
                point.tickDeviceWrite();
            }
            Pointer x = AtomicAllocator.getInstance().getPointer(array, context);
            Pointer xShapeInfo = AtomicAllocator.getInstance().getPointer(array.shapeInfoDataBuffer(), context);
            TADManager tadManager = Nd4j.getExecutioner().getTADManager();
            int[] dimension = dimensions.size() > 1 ? dimensions.get(i) : dimensions.get(0);
            Pair tadBuffers = tadManager.getTADOnlyShapeInfo(array, dimension);
            Pointer tadShapeInfo = AtomicAllocator.getInstance().getPointer((DataBuffer)tadBuffers.getFirst(), context);
            DataBuffer offsets = (DataBuffer)tadBuffers.getSecond();
            if (zero.rank() != 1 && offsets.length() != numTads) {
                throw new ND4JIllegalStateException("Can't symmetrically shuffle arrays with non-equal number of TADs");
            }
            Pointer tadOffset = AtomicAllocator.getInstance().getPointer(offsets, context);
            hPointers[i] = AtomicAllocator.getInstance().getHostPointer(array.shapeInfoDataBuffer()).address();
            xPointers[i] = x.address();
            xShapes[i] = xShapeInfo.address();
            tadShapes[i] = tadShapeInfo.address();
            tadOffsets[i] = tadOffset.address();
        }
        LongPointer hostPointers = new LongPointer(hPointers);
        PointerPointerWrapper hosthost = new PointerPointerWrapper(hostPointers);
        CudaDoubleDataBuffer tempX = new CudaDoubleDataBuffer(arrays.size());
        CudaDoubleDataBuffer tempShapes = new CudaDoubleDataBuffer(arrays.size());
        CudaDoubleDataBuffer tempTAD = new CudaDoubleDataBuffer(arrays.size());
        CudaDoubleDataBuffer tempOffsets = new CudaDoubleDataBuffer(arrays.size());
        AtomicAllocator.getInstance().memcpyBlocking(tempX, (Pointer)new LongPointer(xPointers), xPointers.length * 8, 0L);
        AtomicAllocator.getInstance().memcpyBlocking(tempShapes, (Pointer)new LongPointer(xShapes), xPointers.length * 8, 0L);
        AtomicAllocator.getInstance().memcpyBlocking(tempTAD, (Pointer)new LongPointer(tadShapes), xPointers.length * 8, 0L);
        AtomicAllocator.getInstance().memcpyBlocking(tempOffsets, (Pointer)new LongPointer(tadOffsets), xPointers.length * 8, 0L);
        this.nativeOps.shuffle(extras, null, (PointerPointer)hosthost, new PointerPointer(allocator.getPointer(tempX, context)), new PointerPointer(allocator.getPointer(tempShapes, context)), null, null, new PointerPointer(allocator.getPointer(tempX, context)), new PointerPointer(allocator.getPointer(tempShapes, context)), arrays.size(), (IntPointer)shuffleMap, new PointerPointer(allocator.getPointer(tempTAD, context)), new PointerPointer(allocator.getPointer(tempOffsets, context)));
        if (this.nativeOps.lastErrorCode() != 0) {
            throw new RuntimeException(this.nativeOps.lastErrorMessage());
        }
        for (int f = 0; f < arrays.size(); ++f) {
            allocator.getFlowController().registerAction(context, arrays.get(f), new INDArray[0]);
        }
        tempX.dataType();
        tempShapes.dataType();
        tempOffsets.dataType();
        tempTAD.dataType();
    }

    public void shuffle(Collection<INDArray> sourceArrays, Random rnd, int ... dimension) {
        this.shuffle(new ArrayList<INDArray>(sourceArrays), rnd, Collections.singletonList(dimension));
    }

    public INDArray convertDataEx(DataTypeEx typeSrc, INDArray source, DataTypeEx typeDst) {
        if (source.isView()) {
            throw new UnsupportedOperationException("Impossible to compress View. Consider using dup() before. ");
        }
        DataBuffer buffer = this.convertDataEx(typeSrc, source.data(), typeDst);
        source.setData(buffer);
        if (buffer instanceof CompressedDataBuffer) {
            source.markAsCompressed(true);
        } else {
            source.markAsCompressed(false);
        }
        return source;
    }

    public void convertDataEx(DataTypeEx typeSrc, Pointer source, DataTypeEx typeDst, Pointer target, long length) {
        cudaStream_t stream = AtomicAllocator.getInstance().getDeviceContext().getOldStream();
        PointerPointer p = new PointerPointer(new Pointer[]{null, stream});
        this.nativeOps.convertTypes(p, typeSrc.ordinal(), source, length, typeDst.ordinal(), target);
        if (this.nativeOps.lastErrorCode() != 0) {
            throw new RuntimeException(this.nativeOps.lastErrorMessage());
        }
    }

    public void convertDataEx(DataTypeEx typeSrc, Pointer source, DataTypeEx typeDst, DataBuffer buffer) {
        Pointer srcPtr = null;
        Pointer dstPtr = null;
        long size = 0L;
        long ssize = 0L;
        cudaStream_t stream = AtomicAllocator.getInstance().getDeviceContext().getOldStream();
        if (buffer instanceof CompressedDataBuffer) {
            size = ((CompressedDataBuffer)buffer).getCompressionDescriptor().getCompressedLength();
            ssize = ((CompressedDataBuffer)buffer).getCompressionDescriptor().getOriginalLength();
            srcPtr = this.nativeOps.mallocDevice(ssize, 0, 0);
            dstPtr = this.nativeOps.mallocDevice(size, 0, 0);
            if (this.nativeOps.lastErrorCode() != 0) {
                throw new RuntimeException(this.nativeOps.lastErrorMessage());
            }
            this.nativeOps.memcpyAsync(srcPtr, source, ssize, CudaConstants.cudaMemcpyHostToDevice, (Pointer)stream);
            if (this.nativeOps.lastErrorCode() != 0) {
                throw new RuntimeException(this.nativeOps.lastErrorMessage());
            }
        } else {
            throw new UnsupportedOperationException();
        }
        this.convertDataEx(typeSrc, srcPtr, typeDst, dstPtr, buffer.length());
        this.nativeOps.memcpyAsync(buffer.addressPointer(), dstPtr, size, CudaConstants.cudaMemcpyHostToHost, (Pointer)stream);
        stream.synchronize();
        if (this.nativeOps.lastErrorCode() != 0) {
            throw new RuntimeException(this.nativeOps.lastErrorMessage());
        }
        if (buffer instanceof CompressedDataBuffer) {
            this.nativeOps.freeDevice(srcPtr, 0);
            this.nativeOps.freeDevice(dstPtr, 0);
            if (this.nativeOps.lastErrorCode() != 0) {
                throw new RuntimeException(this.nativeOps.lastErrorMessage());
            }
        }
    }

    public void convertDataEx(DataTypeEx typeSrc, DataBuffer source, DataTypeEx typeDst, DataBuffer target) {
        cudaStream_t stream = AtomicAllocator.getInstance().getDeviceContext().getOldStream();
        Pointer srcPtr = null;
        Pointer dstPtr = null;
        if (Nd4j.getWorkspaceManager().anyWorkspaceActiveForCurrentThread()) {
            long size;
            MemoryWorkspace ws = Nd4j.getMemoryManager().getCurrentWorkspace();
            if (source instanceof CompressedDataBuffer) {
                size = ((CompressedDataBuffer)source).getCompressionDescriptor().getCompressedLength();
                srcPtr = ws.alloc(size, MemoryKind.DEVICE, DataType.HALF, false);
                this.nativeOps.memcpyAsync(srcPtr, source.addressPointer(), size, CudaConstants.cudaMemcpyHostToHost, (Pointer)stream);
                if (this.nativeOps.lastErrorCode() != 0) {
                    throw new RuntimeException(this.nativeOps.lastErrorMessage());
                }
            }
            if (target instanceof CompressedDataBuffer) {
                size = ((CompressedDataBuffer)target).getCompressionDescriptor().getCompressedLength();
                dstPtr = ws.alloc(size, MemoryKind.DEVICE, DataType.HALF, false);
            }
        } else {
            long size;
            if (source instanceof CompressedDataBuffer) {
                log.info("Replacing source ptr");
                size = ((CompressedDataBuffer)source).getCompressionDescriptor().getCompressedLength();
                srcPtr = this.nativeOps.mallocDevice(size, 0, 0);
                this.nativeOps.memcpyAsync(srcPtr, source.addressPointer(), size, CudaConstants.cudaMemcpyHostToHost, (Pointer)stream);
                stream.synchronize();
                if (this.nativeOps.lastErrorCode() != 0) {
                    throw new RuntimeException(this.nativeOps.lastErrorMessage());
                }
            } else {
                srcPtr = AtomicAllocator.getInstance().getPointer(source);
            }
            if (target instanceof CompressedDataBuffer) {
                log.info("Replacing target ptr");
                size = ((CompressedDataBuffer)target).getCompressionDescriptor().getCompressedLength();
                dstPtr = this.nativeOps.mallocDevice(size, 0, 0);
                if (this.nativeOps.lastErrorCode() != 0) {
                    throw new RuntimeException(this.nativeOps.lastErrorMessage());
                }
            } else {
                dstPtr = AtomicAllocator.getInstance().getPointer(target);
            }
        }
        this.convertDataEx(typeSrc, srcPtr, typeDst, dstPtr, target.length());
        if (this.nativeOps.lastErrorCode() != 0) {
            throw new RuntimeException(this.nativeOps.lastErrorMessage());
        }
        Nd4j.getExecutioner().commit();
        if (target instanceof CompressedDataBuffer) {
            this.nativeOps.memcpyAsync(target.addressPointer(), dstPtr, target.capacity(), CudaConstants.cudaMemcpyHostToHost, (Pointer)stream);
            if (!Nd4j.getWorkspaceManager().anyWorkspaceActiveForCurrentThread()) {
                this.nativeOps.freeDevice(dstPtr, 0);
            }
        }
        if (source instanceof CompressedDataBuffer && !Nd4j.getWorkspaceManager().anyWorkspaceActiveForCurrentThread()) {
            this.nativeOps.freeDevice(srcPtr, 0);
        }
        if (this.nativeOps.lastErrorCode() != 0) {
            throw new RuntimeException(this.nativeOps.lastErrorMessage());
        }
        Nd4j.getExecutioner().commit();
    }

    public DataBuffer convertDataEx(DataTypeEx typeSrc, DataBuffer source, DataTypeEx typeDst) {
        int elementSize = 0;
        if (typeDst.ordinal() <= 2) {
            elementSize = 1;
        } else if (typeDst.ordinal() <= 5) {
            elementSize = 2;
        } else if (typeDst.ordinal() == 6) {
            elementSize = 4;
        } else if (typeDst.ordinal() == 7) {
            elementSize = 8;
        } else {
            throw new UnsupportedOperationException("Unknown target TypeEx: " + typeDst.name());
        }
        Nd4j.getExecutioner().commit();
        DataBuffer buffer = null;
        if (!(source instanceof CompressedDataBuffer)) {
            AtomicAllocator.getInstance().synchronizeHostData(source);
        }
        if (CompressionUtils.goingToCompress((DataTypeEx)typeSrc, (DataTypeEx)typeDst)) {
            BytePointer pointer = new BytePointer(source.length() * (long)elementSize);
            CompressionDescriptor descriptor = new CompressionDescriptor(source, typeDst.name());
            descriptor.setCompressionType(CompressionType.LOSSY);
            descriptor.setCompressedLength(source.length() * (long)elementSize);
            buffer = new CompressedDataBuffer((Pointer)pointer, descriptor);
        } else {
            CompressedDataBuffer compressed = (CompressedDataBuffer)source;
            CompressionDescriptor descriptor = compressed.getCompressionDescriptor();
            buffer = Nd4j.createBuffer((long)descriptor.getNumberOfElements(), (boolean)false);
            AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(buffer);
            point.tickDeviceWrite();
        }
        this.convertDataEx(typeSrc, source, typeDst, buffer);
        return buffer;
    }

    public INDArray[] tear(INDArray tensor, int ... dimensions) {
        if (tensor.isCompressed()) {
            Nd4j.getCompressor().decompressi(tensor);
        }
        Arrays.sort(dimensions);
        Pair tadBuffers = Nd4j.getExecutioner().getTADManager().getTADOnlyShapeInfo(tensor, dimensions);
        long tadLength = 1L;
        long[] shape = new long[dimensions.length];
        for (int i = 0; i < dimensions.length; ++i) {
            tadLength *= tensor.shape()[dimensions[i]];
            shape[i] = tensor.shape()[dimensions[i]];
        }
        int numTads = (int)(tensor.length() / tadLength);
        INDArray[] result = new INDArray[numTads];
        long[] xPointers = new long[numTads];
        CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(null, tensor);
        for (int x = 0; x < numTads; ++x) {
            result[x] = Nd4j.createUninitialized((long[])shape);
            context = AtomicAllocator.getInstance().getFlowController().prepareAction(result[x], new INDArray[0]);
            xPointers[x] = AtomicAllocator.getInstance().getPointer(result[x], context).address();
        }
        CudaDoubleDataBuffer tempX = new CudaDoubleDataBuffer(numTads);
        AtomicAllocator.getInstance().memcpyBlocking(tempX, (Pointer)new LongPointer(xPointers), xPointers.length * 8, 0L);
        PointerPointer extraz = new PointerPointer(new Pointer[]{null, context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer()});
        OpaqueDataBuffer x = ((BaseCudaDataBuffer)tensor.data()).getOpaqueDataBuffer();
        this.nativeOps.tear(extraz, x, (LongPointer)tensor.shapeInfoDataBuffer().addressPointer(), (LongPointer)AtomicAllocator.getInstance().getPointer(tensor.shapeInfoDataBuffer(), context), new PointerPointer(AtomicAllocator.getInstance().getPointer(tempX, context)), (LongPointer)AtomicAllocator.getInstance().getPointer(result[0].shapeInfoDataBuffer(), context), (LongPointer)AtomicAllocator.getInstance().getPointer((DataBuffer)tadBuffers.getFirst(), context), (LongPointer)new LongPointerWrapper(AtomicAllocator.getInstance().getPointer((DataBuffer)tadBuffers.getSecond(), context)));
        if (this.nativeOps.lastErrorCode() != 0) {
            throw new RuntimeException(this.nativeOps.lastErrorMessage());
        }
        AtomicAllocator.getInstance().getFlowController().registerActionAllWrite(context, result);
        AtomicAllocator.getInstance().getFlowController().registerAction(context, null, result);
        return result;
    }

    public INDArray sort(INDArray x, boolean descending) {
        if (x.isScalar()) {
            return x;
        }
        Nd4j.getExecutioner().push();
        CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(x, new INDArray[0]);
        Pointer ptr = AtomicAllocator.getInstance().getHostPointer(x.shapeInfoDataBuffer());
        PointerPointer extraz = new PointerPointer(new Pointer[]{ptr, context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer(), null, context.getBufferReduction(), context.getBufferScalar(), null, ptr, AtomicAllocator.getInstance().getHostPointer(x.shapeInfoDataBuffer()), ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, new CudaPointer(0L)});
        boolean isRadix = !x.isView() && x.length() > 0xA00000L;
        INDArray tmpX = x;
        if (isRadix) {
            Nd4j.getExecutioner().commit();
        }
        this.nativeOps.sort(extraz, null, (LongPointer)x.shapeInfoDataBuffer().addressPointer(), AtomicAllocator.getInstance().getPointer(tmpX, context), (LongPointer)AtomicAllocator.getInstance().getPointer(tmpX.shapeInfoDataBuffer(), context), descending);
        if (this.nativeOps.lastErrorCode() != 0) {
            throw new RuntimeException(this.nativeOps.lastErrorMessage());
        }
        AtomicAllocator.getInstance().getFlowController().registerAction(context, x, new INDArray[0]);
        return x;
    }

    public INDArray empty(DataType type) {
        long extras = ArrayOptionsHelper.setOptionBit((long)0L, (ArrayType)ArrayType.EMPTY);
        extras = ArrayOptionsHelper.setOptionBit((long)extras, (DataType)type);
        Pair shape = Nd4j.getShapeInfoProvider().createShapeInformation(new long[0], new long[0], 1L, 'c', extras);
        return new JCublasNDArray(null, (CudaLongDataBuffer)shape.getFirst(), (long[])shape.getSecond());
    }

    public INDArray sort(INDArray x, boolean descending, int ... dimension) {
        if (x.isScalar()) {
            return x;
        }
        Arrays.sort(dimension);
        Nd4j.getExecutioner().push();
        Pair tadBuffers = Nd4j.getExecutioner().getTADManager().getTADOnlyShapeInfo(x, dimension);
        CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(x, new INDArray[0]);
        PointerPointer extraz = new PointerPointer(new Pointer[]{AtomicAllocator.getInstance().getHostPointer(x.shapeInfoDataBuffer()), context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer()});
        Pointer dimensionPointer = AtomicAllocator.getInstance().getHostPointer(AtomicAllocator.getInstance().getConstantBuffer(dimension));
        this.nativeOps.sortTad(extraz, null, (LongPointer)x.shapeInfoDataBuffer().addressPointer(), AtomicAllocator.getInstance().getPointer(x, context), (LongPointer)AtomicAllocator.getInstance().getPointer(x.shapeInfoDataBuffer(), context), (IntPointer)dimensionPointer, dimension.length, (LongPointer)AtomicAllocator.getInstance().getPointer((DataBuffer)tadBuffers.getFirst(), context), (LongPointer)new LongPointerWrapper(AtomicAllocator.getInstance().getPointer((DataBuffer)tadBuffers.getSecond(), context)), descending);
        if (this.nativeOps.lastErrorCode() != 0) {
            throw new RuntimeException(this.nativeOps.lastErrorMessage());
        }
        AtomicAllocator.getInstance().getFlowController().registerAction(context, x, new INDArray[0]);
        return x;
    }

    public INDArray create(float[] data, long[] shape, long[] stride, long offset) {
        return new JCublasNDArray(data, shape, stride, offset, Nd4j.order().charValue());
    }

    public INDArray create(float[] data, long[] shape, long[] stride, char order, DataType dataType) {
        return new JCublasNDArray(Nd4j.createTypedBuffer((float[])data, (DataType)dataType), shape, stride, order, dataType);
    }

    public INDArray create(double[] data, long[] shape, long[] stride, long offset) {
        return new JCublasNDArray(data, shape, stride, offset, Nd4j.order().charValue());
    }

    public INDArray create(double[] data, long[] shape, long[] stride, DataType dataType, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createTypedBuffer((double[])data, (DataType)dataType, (MemoryWorkspace)workspace), shape, stride, Nd4j.order().charValue(), dataType);
    }

    public INDArray create(float[] data, long[] shape, long[] stride, DataType dataType, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createTypedBuffer((float[])data, (DataType)dataType, (MemoryWorkspace)workspace), shape, stride, Nd4j.order().charValue(), dataType);
    }

    public INDArray create(long[] data, long[] shape, long[] stride, DataType dataType, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createTypedBuffer((long[])data, (DataType)dataType), shape, stride, Nd4j.order().charValue(), dataType);
    }

    public INDArray create(int[] data, long[] shape, long[] stride, DataType dataType, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createTypedBuffer((int[])data, (DataType)dataType), shape, stride, Nd4j.order().charValue(), dataType);
    }

    public INDArray create(short[] data, long[] shape, long[] stride, DataType dataType, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createTypedBuffer((short[])data, (DataType)dataType), shape, stride, Nd4j.order().charValue(), dataType);
    }

    public INDArray create(byte[] data, long[] shape, long[] stride, DataType dataType, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createTypedBuffer((byte[])data, (DataType)dataType), shape, stride, Nd4j.order().charValue(), dataType);
    }

    public INDArray create(boolean[] data, long[] shape, long[] stride, DataType dataType, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createTypedBuffer((boolean[])data, (DataType)dataType), shape, stride, Nd4j.order().charValue(), dataType);
    }

    public INDArray create(double[] data, long[] shape, long[] stride, char order, DataType dataType, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createTypedBuffer((double[])data, (DataType)dataType, (MemoryWorkspace)workspace), shape, stride, order, dataType);
    }

    public INDArray create(DataBuffer data, long[] shape) {
        return new JCublasNDArray(data, shape);
    }

    public INDArray create(DataBuffer data, long[] shape, long[] stride, long offset) {
        return new JCublasNDArray(data, shape, stride, offset, Nd4j.order().charValue(), data.dataType());
    }

    public INDArray create(List<INDArray> list, long[] shape) {
        return new JCublasNDArray(list, shape);
    }

    public INDArray create(long rows, long columns, long[] stride, long offset) {
        return this.create(new long[]{rows, columns}, stride, offset, Nd4j.order().charValue());
    }

    public INDArray create(long[] shape, char ordering) {
        return new JCublasNDArray(shape, 0L, ordering);
    }

    public INDArray create(DataType dataType, long[] shape, char ordering, MemoryWorkspace workspace) {
        return this.create(dataType, shape, Nd4j.getStrides((long[])shape, (char)ordering), ordering, workspace);
    }

    public INDArray create(DataType dataType, long[] shape, long[] strides, char ordering, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createBuffer((DataType)dataType, (long)Shape.lengthOf((long[])shape), (boolean)true, (MemoryWorkspace)workspace), shape, strides, ordering, dataType);
    }

    public INDArray createUninitialized(long[] shape, char ordering) {
        return new JCublasNDArray(shape, Nd4j.getStrides((long[])shape, (char)ordering), 0L, ordering, false);
    }

    public INDArray createUninitialized(DataType dataType, long[] shape, char ordering, MemoryWorkspace workspace) {
        return new JCublasNDArray(Nd4j.createBuffer((DataType)dataType, (long)Shape.lengthOf((long[])shape), (boolean)false), shape, Nd4j.getStrides((long[])shape, (char)ordering), ordering, dataType);
    }

    public INDArray createUninitializedDetached(DataType dataType, char ordering, long ... shape) {
        return new JCublasNDArray(Nd4j.createBufferDetached((long[])shape, (DataType)dataType), shape, Nd4j.getStrides((long[])shape, (char)this.order), this.order, dataType);
    }

    public INDArray create(DataBuffer data, long[] newShape, long[] newStride, long offset, char ordering) {
        return new JCublasNDArray(data, newShape, newStride, offset, ordering, data.dataType());
    }

    public INDArray create(DataBuffer data, long[] newShape, long[] newStride, long offset, long ews, char ordering) {
        return new JCublasNDArray(data, newShape, newStride, offset, ews, ordering, data.dataType());
    }

    public INDArray create(DataBuffer data, long[] newShape, long[] newStride, long offset, char ordering, DataType dataType) {
        return new JCublasNDArray(data, newShape, newStride, offset, ordering, dataType);
    }

    public INDArray create(List<INDArray> list, long[] shape, char ordering) {
        return new JCublasNDArray(list, shape, ordering);
    }

    public INDArray create(float[] data, long[] shape, long[] stride, char order, long offset) {
        return new JCublasNDArray(data, shape, stride, offset, order);
    }

    public INDArray create(float[] data, long[] shape, long[] stride, long offset, char ordering) {
        return new JCublasNDArray(data, shape, stride, offset, ordering);
    }

    public INDArray create(double[] data, long[] shape, long[] stride, long offset, char ordering) {
        return new JCublasNDArray(data, shape, stride, offset, ordering);
    }

    public INDArray create(float[] data, long[] shape, long offset, Character order) {
        return new JCublasNDArray(data, shape, Nd4j.getStrides((long[])shape, (char)order.charValue()), offset, order.charValue());
    }

    public INDArray create(double[] data, long[] shape, long offset, Character order) {
        return new JCublasNDArray(data, shape, Nd4j.getStrides((long[])shape, (char)order.charValue()), offset, order.charValue());
    }

    public INDArray create(float[] data, long[] shape, char ordering) {
        return new JCublasNDArray(data, shape, Nd4j.getStrides((long[])shape, (char)this.order), 0L, ordering);
    }

    public INDArray sortCooIndices(INDArray x) {
        throw new UnsupportedOperationException();
    }

    public INDArray create(DataType dataType, long[] shape, long[] paddings, long[] paddingOffsets, char ordering, MemoryWorkspace workspace) {
        return new JCublasNDArray(dataType, shape, paddings, paddingOffsets, ordering, workspace);
    }
}

