/*
 * Decompiled with CFR 0.152.
 */
package org.nd4j.linalg.api.parallel.tasks.cpu.misc;

import io.netty.buffer.ByteBuf;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Future;
import java.util.concurrent.RecursiveTask;
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.parallel.tasks.Task;
import org.nd4j.linalg.api.parallel.tasks.TaskExecutorProvider;
import org.nd4j.linalg.convolution.Convolution;
import org.nd4j.linalg.factory.Nd4j;

public class CPUIm2ColTask
extends RecursiveTask<INDArray>
implements Task<INDArray> {
    protected Future<INDArray> future;
    protected List<CPUIm2ColTask> subTasks;
    protected final INDArray img;
    protected INDArray out;
    protected final int kernelHeight;
    protected final int kernelWidth;
    protected final int strideY;
    protected final int strideX;
    protected final int padHeight;
    protected final int padWidth;
    protected final boolean coverAll;
    protected final int parallelThreshold;
    protected final int exampleFrom;
    protected final int exampleTo;
    protected final int depthFrom;
    protected final int depthTo;
    protected final int xOutFrom;
    protected final int xOutTo;
    protected final int yOutFrom;
    protected final int yOutTo;

    public CPUIm2ColTask(INDArray img, int kernelHeight, int kernelWidth, int strideY, int strideX, int padHeight, int padWidth, boolean coverAll, int parallelThreshold) {
        this(img, CPUIm2ColTask.getNewOutputArray(img, kernelHeight, kernelWidth, strideY, strideX, padHeight, padWidth, coverAll), kernelHeight, kernelWidth, strideY, strideX, padHeight, padWidth, 0, img.size(0), 0, img.size(1), 0, Convolution.outSize(img.size(2), kernelHeight, strideY, padHeight, coverAll), 0, Convolution.outSize(img.size(3), kernelWidth, strideX, padWidth, coverAll), coverAll, parallelThreshold);
    }

    public CPUIm2ColTask(INDArray img, INDArray out, int kernelHeight, int kernelWidth, int strideY, int strideX, int padHeight, int padWidth, int exampleFrom, int exampleTo, int depthFrom, int depthTo, int yOutFrom, int yOutTo, int xOutFrom, int xOutTo, boolean coverAll, int parallelThreshold) {
        this.img = img;
        this.out = out;
        this.kernelHeight = kernelHeight;
        this.kernelWidth = kernelWidth;
        this.strideY = strideY;
        this.strideX = strideX;
        this.padHeight = padHeight;
        this.padWidth = padWidth;
        this.coverAll = coverAll;
        this.parallelThreshold = parallelThreshold;
        this.exampleFrom = exampleFrom;
        this.exampleTo = exampleTo;
        this.depthFrom = depthFrom;
        this.depthTo = depthTo;
        this.xOutFrom = xOutFrom;
        this.xOutTo = xOutTo;
        this.yOutFrom = yOutFrom;
        this.yOutTo = yOutTo;
    }

    private static INDArray getNewOutputArray(INDArray img, int kernelHeight, int kernelWidth, int strideY, int strideX, int padHeight, int padWidth, boolean coverAll) {
        int n = img.size(0);
        int c = img.size(1);
        int h = img.size(2);
        int w = img.size(3);
        int outHeight = Convolution.outSize(h, kernelHeight, strideY, padHeight, coverAll);
        int outWidth = Convolution.outSize(w, kernelWidth, strideX, padWidth, coverAll);
        return Nd4j.create(n, c, kernelHeight, kernelWidth, outHeight, outWidth);
    }

    @Override
    protected INDArray compute() {
        this.splitOrExecute(true);
        return this.out;
    }

    @Override
    public INDArray call() {
        this.splitOrExecute(true);
        return null;
    }

    private void splitOrExecute(boolean forkJoin) {
        if (!forkJoin) {
            this.subTasks = new ArrayList<CPUIm2ColTask>();
        }
        if (this.parallelThreshold != Integer.MAX_VALUE && this.opSize() > this.parallelThreshold) {
            CPUIm2ColTask second;
            CPUIm2ColTask first;
            int temp = this.exampleTo - this.exampleFrom;
            if (temp > 1) {
                int countFirst = temp / 2;
                first = new CPUIm2ColTask(this.img, this.out, this.kernelHeight, this.kernelWidth, this.strideY, this.strideX, this.padHeight, this.padWidth, this.exampleFrom, this.exampleFrom + countFirst, this.depthFrom, this.depthTo, this.yOutFrom, this.yOutTo, this.xOutFrom, this.xOutTo, this.coverAll, this.parallelThreshold);
                if (forkJoin) {
                    first.fork();
                } else {
                    first.invokeAsync();
                    this.subTasks.add(first);
                }
                second = new CPUIm2ColTask(this.img, this.out, this.kernelHeight, this.kernelWidth, this.strideY, this.strideX, this.padHeight, this.padWidth, this.exampleFrom + countFirst, this.exampleTo, this.depthFrom, this.depthTo, this.yOutFrom, this.yOutTo, this.xOutFrom, this.xOutTo, this.coverAll, this.parallelThreshold);
                if (forkJoin) {
                    second.fork();
                } else {
                    second.invokeAsync();
                    this.subTasks.add(second);
                }
            } else {
                temp = this.depthTo - this.depthFrom;
                if (temp > 1) {
                    int countFirst = temp / 2;
                    first = new CPUIm2ColTask(this.img, this.out, this.kernelHeight, this.kernelWidth, this.strideY, this.strideX, this.padHeight, this.padWidth, this.exampleFrom, this.exampleTo, this.depthFrom, this.depthFrom + countFirst, this.yOutFrom, this.yOutTo, this.xOutFrom, this.xOutTo, this.coverAll, this.parallelThreshold);
                    first.fork();
                    second = new CPUIm2ColTask(this.img, this.out, this.kernelHeight, this.kernelWidth, this.strideY, this.strideX, this.padHeight, this.padWidth, this.exampleFrom, this.exampleTo, this.depthFrom + countFirst, this.depthTo, this.yOutFrom, this.yOutTo, this.xOutFrom, this.xOutTo, this.coverAll, this.parallelThreshold);
                    second.fork();
                } else {
                    temp = this.yOutTo - this.yOutFrom;
                    if (temp > 1) {
                        int countFirst = temp / 2;
                        first = new CPUIm2ColTask(this.img, this.out, this.kernelHeight, this.kernelWidth, this.strideY, this.strideX, this.padHeight, this.padWidth, this.exampleFrom, this.exampleTo, this.depthFrom, this.depthTo, this.yOutFrom, this.yOutFrom + countFirst, this.xOutFrom, this.xOutTo, this.coverAll, this.parallelThreshold);
                        if (forkJoin) {
                            first.fork();
                        } else {
                            first.invokeAsync();
                            this.subTasks.add(first);
                        }
                        second = new CPUIm2ColTask(this.img, this.out, this.kernelHeight, this.kernelWidth, this.strideY, this.strideX, this.padHeight, this.padWidth, this.exampleFrom, this.exampleTo, this.depthFrom, this.depthTo, this.yOutFrom + countFirst, this.yOutTo, this.xOutFrom, this.xOutTo, this.coverAll, this.parallelThreshold);
                        if (forkJoin) {
                            second.fork();
                        } else {
                            second.invokeAsync();
                            this.subTasks.add(second);
                        }
                    } else {
                        temp = this.xOutTo - this.xOutFrom;
                        if (temp > 1) {
                            int countFirst = temp / 2;
                            first = new CPUIm2ColTask(this.img, this.out, this.kernelHeight, this.kernelWidth, this.strideY, this.strideX, this.padHeight, this.padWidth, this.exampleFrom, this.exampleTo, this.depthFrom, this.depthTo, this.yOutFrom, this.yOutTo, this.xOutFrom, this.xOutFrom + countFirst, this.coverAll, this.parallelThreshold);
                            if (forkJoin) {
                                first.fork();
                            } else {
                                first.invokeAsync();
                                this.subTasks.add(first);
                            }
                            second = new CPUIm2ColTask(this.img, this.out, this.kernelHeight, this.kernelWidth, this.strideY, this.strideX, this.padHeight, this.padWidth, this.exampleFrom, this.exampleTo, this.depthFrom, this.depthTo, this.yOutFrom, this.yOutTo, this.xOutFrom + countFirst, this.xOutTo, this.coverAll, this.parallelThreshold);
                            if (forkJoin) {
                                second.fork();
                            } else {
                                second.invokeAsync();
                                this.subTasks.add(second);
                            }
                        } else {
                            this.execute();
                            return;
                        }
                    }
                }
            }
            if (forkJoin) {
                first.join();
                second.join();
            }
        } else {
            this.execute();
        }
    }

    private int opSize() {
        return (this.exampleTo - this.exampleFrom) * (this.depthTo - this.depthFrom) * (this.xOutTo - this.xOutFrom) * (this.yOutTo - this.yOutFrom) * this.kernelHeight * this.kernelWidth;
    }

    private void execute() {
        DataBuffer dbIn = this.img.data();
        if (dbIn.allocationMode() == DataBuffer.AllocationMode.HEAP) {
            if (dbIn.dataType() == DataBuffer.Type.FLOAT) {
                this.doHeapFloat();
            } else {
                this.doHeapDouble();
            }
        } else if (dbIn.dataType() == DataBuffer.Type.FLOAT) {
            this.doDirectFloat();
        } else {
            this.doDirectDouble();
        }
    }

    private void doHeapFloat() {
        DataBuffer dbIn = this.img.data();
        DataBuffer dbOut = this.out.data();
        int outArrayOffset = this.out.offset();
        int[] outShape = this.out.shape();
        int[] outStride = this.out.stride();
        int inArrayOffset = this.img.offset();
        int[] inShape = this.img.shape();
        int[] inStride = this.img.stride();
        int[] outIndices = new int[6];
        int[] inIndices = new int[4];
        int inStride2 = inStride[2];
        int inStride3 = inStride[3];
        int outStride2 = outStride[2];
        int outStride3 = outStride[3];
        int inShape2 = inShape[2];
        int inShape3 = inShape[3];
        boolean padding = this.padHeight > 0 || this.padWidth > 0;
        float[] fIn = (float[])dbIn.array();
        float[] fOut = (float[])dbOut.array();
        for (int ex = this.exampleFrom; ex < this.exampleTo; ++ex) {
            for (int d = this.depthFrom; d < this.depthTo; ++d) {
                inIndices[0] = ex;
                inIndices[1] = d;
                outIndices[0] = ex;
                outIndices[1] = d;
                for (int x = this.xOutFrom; x < this.xOutTo; ++x) {
                    for (int y = this.yOutFrom; y < this.yOutTo; ++y) {
                        int patchX;
                        int inBufferIdxY;
                        int outBufferIdxY;
                        int patchY;
                        int patchY2;
                        int inBufferIdxX;
                        int outBufferIdxX;
                        int patchX2;
                        int baseOffsetIn;
                        int j;
                        int i;
                        outIndices[4] = y;
                        outIndices[5] = x;
                        int baseOffsetOut = CPUIm2ColTask.getOffsetUnsafe6(outArrayOffset, outShape, outStride, outIndices);
                        if (padding) {
                            i = y * this.strideY - this.padHeight;
                            j = x * this.strideX - this.padWidth;
                            inIndices[2] = i;
                            inIndices[3] = j;
                            baseOffsetIn = CPUIm2ColTask.getOffsetUnsafe4(inArrayOffset, inShape, inStride, inIndices);
                            if (outStride2 <= outStride3) {
                                for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                    outBufferIdxX = baseOffsetOut + patchX2 * outStride3;
                                    inBufferIdxX = baseOffsetIn + patchX2 * inStride3;
                                    for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                        fOut[outBufferIdxX + patchY2 * outStride2] = i + patchY2 < 0 || j + patchX2 < 0 || i + patchY2 >= inShape2 || j + patchX2 >= inShape3 ? 0.0f : fIn[inBufferIdxX + patchY2 * inStride2];
                                    }
                                }
                                continue;
                            }
                            for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                                outBufferIdxY = baseOffsetOut + patchY * outStride2;
                                inBufferIdxY = baseOffsetIn + patchY * inStride2;
                                for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                    fOut[outBufferIdxY + patchX * outStride3] = i + patchY < 0 || j + patchX < 0 || i + patchY >= inShape2 || j + patchX >= inShape3 ? 0.0f : fIn[inBufferIdxY + patchX * inStride3];
                                }
                            }
                            continue;
                        }
                        i = y * this.strideY;
                        j = x * this.strideX;
                        inIndices[2] = i;
                        inIndices[3] = j;
                        baseOffsetIn = CPUIm2ColTask.getOffsetUnsafe4(inArrayOffset, inShape, inStride, inIndices);
                        if (outStride2 <= outStride3) {
                            for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                outBufferIdxX = baseOffsetOut + patchX2 * outStride3;
                                inBufferIdxX = baseOffsetIn + patchX2 * inStride3;
                                for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                    fOut[outBufferIdxX + patchY2 * outStride2] = fIn[inBufferIdxX + patchY2 * inStride2];
                                }
                            }
                            continue;
                        }
                        for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                            outBufferIdxY = baseOffsetOut + patchY * outStride2;
                            inBufferIdxY = baseOffsetIn + patchY * inStride2;
                            for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                fOut[outBufferIdxY + patchX * outStride3] = fIn[inBufferIdxY + patchX * inStride3];
                            }
                        }
                    }
                }
            }
        }
    }

    private void doHeapDouble() {
        DataBuffer dbIn = this.img.data();
        DataBuffer dbOut = this.out.data();
        int outArrayOffset = this.out.offset();
        int[] outShape = this.out.shape();
        int[] outStride = this.out.stride();
        int inArrayOffset = this.img.offset();
        int[] inShape = this.img.shape();
        int[] inStride = this.img.stride();
        int[] outIndices = new int[6];
        int[] inIndices = new int[4];
        int inStride2 = inStride[2];
        int inStride3 = inStride[3];
        int outStride2 = outStride[2];
        int outStride3 = outStride[3];
        int inShape2 = inShape[2];
        int inShape3 = inShape[3];
        boolean padding = this.padHeight > 0 || this.padWidth > 0;
        double[] dIn = (double[])dbIn.array();
        double[] dOut = (double[])dbOut.array();
        for (int ex = this.exampleFrom; ex < this.exampleTo; ++ex) {
            for (int d = this.depthFrom; d < this.depthTo; ++d) {
                inIndices[0] = ex;
                inIndices[1] = d;
                outIndices[0] = ex;
                outIndices[1] = d;
                for (int x = this.xOutFrom; x < this.xOutTo; ++x) {
                    for (int y = this.yOutFrom; y < this.yOutTo; ++y) {
                        int patchX;
                        int inBufferIdxY;
                        int outBufferIdxY;
                        int patchY;
                        int patchY2;
                        int inBufferIdxX;
                        int outBufferIdxX;
                        int patchX2;
                        int baseOffsetIn;
                        int j;
                        int i;
                        outIndices[4] = y;
                        outIndices[5] = x;
                        int baseOffsetOut = CPUIm2ColTask.getOffsetUnsafe6(outArrayOffset, outShape, outStride, outIndices);
                        if (padding) {
                            i = y * this.strideY - this.padHeight;
                            j = x * this.strideX - this.padWidth;
                            inIndices[2] = i;
                            inIndices[3] = j;
                            baseOffsetIn = CPUIm2ColTask.getOffsetUnsafe4(inArrayOffset, inShape, inStride, inIndices);
                            if (outStride2 <= outStride3) {
                                for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                    outBufferIdxX = baseOffsetOut + patchX2 * outStride3;
                                    inBufferIdxX = baseOffsetIn + patchX2 * inStride3;
                                    for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                        dOut[outBufferIdxX + patchY2 * outStride2] = i + patchY2 < 0 || j + patchX2 < 0 || i + patchY2 >= inShape2 || j + patchX2 >= inShape3 ? 0.0 : dIn[inBufferIdxX + patchY2 * inStride2];
                                    }
                                }
                                continue;
                            }
                            for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                                outBufferIdxY = baseOffsetOut + patchY * outStride2;
                                inBufferIdxY = baseOffsetIn + patchY * inStride2;
                                for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                    dOut[outBufferIdxY + patchX * outStride3] = i + patchY < 0 || j + patchX < 0 || i + patchY >= inShape[2] || j + patchX >= inShape[3] ? 0.0 : dIn[inBufferIdxY + patchX * inStride3];
                                }
                            }
                            continue;
                        }
                        i = y * this.strideY;
                        j = x * this.strideX;
                        inIndices[2] = i;
                        inIndices[3] = j;
                        baseOffsetIn = CPUIm2ColTask.getOffsetUnsafe4(inArrayOffset, inShape, inStride, inIndices);
                        if (outStride2 <= outStride3) {
                            for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                outBufferIdxX = baseOffsetOut + patchX2 * outStride3;
                                inBufferIdxX = baseOffsetIn + patchX2 * inStride3;
                                for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                    dOut[outBufferIdxX + patchY2 * outStride2] = dIn[inBufferIdxX + patchY2 * inStride2];
                                }
                            }
                            continue;
                        }
                        for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                            outBufferIdxY = baseOffsetOut + patchY * outStride2;
                            inBufferIdxY = baseOffsetIn + patchY * inStride2;
                            for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                dOut[outBufferIdxY + patchX * outStride3] = dIn[inBufferIdxY + patchX * inStride3];
                            }
                        }
                    }
                }
            }
        }
    }

    private void doDirectFloat() {
        DataBuffer dbIn = this.img.data();
        DataBuffer dbOut = this.out.data();
        int outArrayOffset = this.out.offset();
        int[] outShape = this.out.shape();
        int[] outStride = this.out.stride();
        int inArrayOffset = this.img.offset();
        int[] inShape = this.img.shape();
        int[] inStride = this.img.stride();
        int[] outIndices = new int[6];
        int[] inIndices = new int[4];
        int inStride2_times4 = inStride[2] * 4;
        int inStride3_times4 = inStride[3] * 4;
        int outStride2_times4 = outStride[2] * 4;
        int outStride3_times4 = outStride[3] * 4;
        int inShape2 = inShape[2];
        int inShape3 = inShape[3];
        boolean padding = this.padHeight > 0 || this.padWidth > 0;
        ByteBuf nbbIn = dbIn.asNetty();
        ByteBuf nbbOut = dbOut.asNetty();
        for (int ex = this.exampleFrom; ex < this.exampleTo; ++ex) {
            for (int d = this.depthFrom; d < this.depthTo; ++d) {
                inIndices[0] = ex;
                inIndices[1] = d;
                outIndices[0] = ex;
                outIndices[1] = d;
                for (int x = this.xOutFrom; x < this.xOutTo; ++x) {
                    for (int y = this.yOutFrom; y < this.yOutTo; ++y) {
                        int patchX;
                        int inBufferIdxYBytes;
                        int outBufferIdxYBytes;
                        int patchY;
                        int patchY2;
                        int inBufferIdxXBytes;
                        int outBufferIdxXBytes;
                        int patchX2;
                        int baseOffsetInBytes;
                        int j;
                        int i;
                        outIndices[4] = y;
                        outIndices[5] = x;
                        int baseOffsetOutBytes = 4 * CPUIm2ColTask.getOffsetUnsafe6(outArrayOffset, outShape, outStride, outIndices);
                        if (padding) {
                            i = y * this.strideY - this.padHeight;
                            j = x * this.strideX - this.padWidth;
                            inIndices[2] = i;
                            inIndices[3] = j;
                            baseOffsetInBytes = 4 * CPUIm2ColTask.getOffsetUnsafe4(inArrayOffset, inShape, inStride, inIndices);
                            if (outStride2_times4 <= outStride3_times4) {
                                for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                    outBufferIdxXBytes = baseOffsetOutBytes + patchX2 * outStride3_times4;
                                    inBufferIdxXBytes = baseOffsetInBytes + patchX2 * inStride3_times4;
                                    for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                        if (i + patchY2 < 0 || j + patchX2 < 0 || i + patchY2 >= inShape2 || j + patchX2 >= inShape3) {
                                            nbbOut.setFloat(outBufferIdxXBytes + patchY2 * outStride2_times4, 0.0f);
                                            continue;
                                        }
                                        nbbOut.setFloat(outBufferIdxXBytes + patchY2 * outStride2_times4, nbbIn.getFloat(inBufferIdxXBytes + patchY2 * inStride2_times4));
                                    }
                                }
                                continue;
                            }
                            for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                                outBufferIdxYBytes = baseOffsetOutBytes + patchY * outStride2_times4;
                                inBufferIdxYBytes = baseOffsetInBytes + patchY * inStride2_times4;
                                for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                    if (i + patchY < 0 || j + patchX < 0 || i + patchY >= inShape2 || j + patchX >= inShape3) {
                                        nbbOut.setFloat(outBufferIdxYBytes + patchX * outStride3_times4, 0.0f);
                                        continue;
                                    }
                                    nbbOut.setFloat(outBufferIdxYBytes + patchX * outStride3_times4, nbbIn.getFloat(inBufferIdxYBytes + patchX * inStride3_times4));
                                }
                            }
                            continue;
                        }
                        i = y * this.strideY;
                        j = x * this.strideX;
                        inIndices[2] = i;
                        inIndices[3] = j;
                        baseOffsetInBytes = 4 * CPUIm2ColTask.getOffsetUnsafe4(inArrayOffset, inShape, inStride, inIndices);
                        if (outStride2_times4 <= outStride3_times4) {
                            for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                outBufferIdxXBytes = baseOffsetOutBytes + patchX2 * outStride3_times4;
                                inBufferIdxXBytes = baseOffsetInBytes + patchX2 * inStride3_times4;
                                for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                    nbbOut.setFloat(outBufferIdxXBytes + patchY2 * outStride2_times4, nbbIn.getFloat(inBufferIdxXBytes + patchY2 * inStride2_times4));
                                }
                            }
                            continue;
                        }
                        for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                            outBufferIdxYBytes = baseOffsetOutBytes + patchY * outStride2_times4;
                            inBufferIdxYBytes = baseOffsetInBytes + patchY * inStride2_times4;
                            for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                nbbOut.setFloat(outBufferIdxYBytes + patchX * outStride3_times4, nbbIn.getFloat(inBufferIdxYBytes + patchX * inStride3_times4));
                            }
                        }
                    }
                }
            }
        }
    }

    private void doDirectDouble() {
        DataBuffer dbIn = this.img.data();
        DataBuffer dbOut = this.out.data();
        int outArrayOffset = this.out.offset();
        int[] outShape = this.out.shape();
        int[] outStride = this.out.stride();
        int inArrayOffset = this.img.offset();
        int[] inShape = this.img.shape();
        int[] inStride = this.img.stride();
        int[] outIndices = new int[6];
        int[] inIndices = new int[4];
        int inStride2_times8 = inStride[2] * 8;
        int inStride3_times8 = inStride[3] * 8;
        int outStride2_times8 = outStride[2] * 8;
        int outStride3_times8 = outStride[3] * 8;
        int inShape2 = inShape[2];
        int inShape3 = inShape[3];
        boolean padding = this.padHeight > 0 || this.padWidth > 0;
        ByteBuf nbbIn = dbIn.asNetty();
        ByteBuf nbbOut = dbOut.asNetty();
        for (int ex = this.exampleFrom; ex < this.exampleTo; ++ex) {
            for (int d = this.depthFrom; d < this.depthTo; ++d) {
                inIndices[0] = ex;
                inIndices[1] = d;
                outIndices[0] = ex;
                outIndices[1] = d;
                for (int x = this.xOutFrom; x < this.xOutTo; ++x) {
                    for (int y = this.yOutFrom; y < this.yOutTo; ++y) {
                        int patchX;
                        int inBufferIdxYBytes;
                        int outBufferIdxYBytes;
                        int patchY;
                        int patchY2;
                        int inBufferIdxXBytes;
                        int outBufferIdxXBytes;
                        int patchX2;
                        int baseOffsetInBytes;
                        int j;
                        int i;
                        outIndices[4] = y;
                        outIndices[5] = x;
                        int baseOffsetOutBytes = 8 * CPUIm2ColTask.getOffsetUnsafe6(outArrayOffset, outShape, outStride, outIndices);
                        if (padding) {
                            i = y * this.strideY - this.padHeight;
                            j = x * this.strideX - this.padWidth;
                            inIndices[2] = i;
                            inIndices[3] = j;
                            baseOffsetInBytes = 8 * CPUIm2ColTask.getOffsetUnsafe4(inArrayOffset, inShape, inStride, inIndices);
                            if (outStride2_times8 <= outStride3_times8) {
                                for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                    outBufferIdxXBytes = baseOffsetOutBytes + patchX2 * outStride3_times8;
                                    inBufferIdxXBytes = baseOffsetInBytes + patchX2 * inStride3_times8;
                                    for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                        if (i + patchY2 < 0 || j + patchX2 < 0 || i + patchY2 >= inShape2 || j + patchX2 >= inShape3) {
                                            nbbOut.setDouble(outBufferIdxXBytes + patchY2 * outStride2_times8, 0.0);
                                            continue;
                                        }
                                        nbbOut.setDouble(outBufferIdxXBytes + patchY2 * outStride2_times8, nbbIn.getDouble(inBufferIdxXBytes + patchY2 * inStride2_times8));
                                    }
                                }
                                continue;
                            }
                            for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                                outBufferIdxYBytes = baseOffsetOutBytes + patchY * outStride2_times8;
                                inBufferIdxYBytes = baseOffsetInBytes + patchY * inStride2_times8;
                                for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                    if (i + patchY < 0 || j + patchX < 0 || i + patchY >= inShape[2] || j + patchX >= inShape[3]) {
                                        nbbOut.setDouble(outBufferIdxYBytes + patchX * outStride3_times8, 0.0);
                                        continue;
                                    }
                                    nbbOut.setDouble(outBufferIdxYBytes + patchX * outStride3_times8, nbbIn.getDouble(inBufferIdxYBytes + patchX * inStride3_times8));
                                }
                            }
                            continue;
                        }
                        i = y * this.strideY;
                        j = x * this.strideX;
                        inIndices[2] = i;
                        inIndices[3] = j;
                        baseOffsetInBytes = 8 * CPUIm2ColTask.getOffsetUnsafe4(inArrayOffset, inShape, inStride, inIndices);
                        if (outStride2_times8 <= outStride3_times8) {
                            for (patchX2 = 0; patchX2 < this.kernelWidth; ++patchX2) {
                                outBufferIdxXBytes = baseOffsetOutBytes + patchX2 * outStride3_times8;
                                inBufferIdxXBytes = baseOffsetInBytes + patchX2 * inStride3_times8;
                                for (patchY2 = 0; patchY2 < this.kernelHeight; ++patchY2) {
                                    nbbOut.setDouble(outBufferIdxXBytes + patchY2 * outStride2_times8, nbbIn.getDouble(inBufferIdxXBytes + patchY2 * inStride2_times8));
                                }
                            }
                            continue;
                        }
                        for (patchY = 0; patchY < this.kernelHeight; ++patchY) {
                            outBufferIdxYBytes = baseOffsetOutBytes + patchY * outStride2_times8;
                            inBufferIdxYBytes = baseOffsetInBytes + patchY * inStride2_times8;
                            for (patchX = 0; patchX < this.kernelWidth; ++patchX) {
                                nbbOut.setDouble(outBufferIdxYBytes + patchX * outStride3_times8, nbbIn.getDouble(inBufferIdxYBytes + patchX * inStride3_times8));
                            }
                        }
                    }
                }
            }
        }
    }

    private static int getOffsetUnsafe4(int baseOffset, int[] shape, int[] stride, int[] indices) {
        int offset = baseOffset;
        if (shape[0] != 1) {
            offset += indices[0] * stride[0];
        }
        if (shape[1] != 1) {
            offset += indices[1] * stride[1];
        }
        if (shape[2] != 1) {
            offset += indices[2] * stride[2];
        }
        if (shape[3] != 1) {
            offset += indices[3] * stride[3];
        }
        return offset;
    }

    private static int getOffsetUnsafe6(int baseOffset, int[] shape, int[] stride, int[] indices) {
        int offset = baseOffset;
        if (shape[0] != 1) {
            offset += indices[0] * stride[0];
        }
        if (shape[1] != 1) {
            offset += indices[1] * stride[1];
        }
        if (shape[4] != 1) {
            offset += indices[4] * stride[4];
        }
        if (shape[5] != 1) {
            offset += indices[5] * stride[5];
        }
        return offset;
    }

    @Override
    public INDArray invokeBlocking() {
        this.invokeAsync();
        return this.blockUntilComplete();
    }

    @Override
    public void invokeAsync() {
        this.future = TaskExecutorProvider.getTaskExecutor().executeAsync(this);
    }

    @Override
    public INDArray blockUntilComplete() {
        try {
            this.future.get();
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
        if (this.subTasks != null) {
            for (CPUIm2ColTask task : this.subTasks) {
                task.blockUntilComplete();
            }
        }
        return this.out;
    }
}

