/*
 * Decompiled with CFR 0.152.
 */
package org.deeplearning4j.nn.multilayer;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.math3.random.RandomGenerator;
import org.deeplearning4j.berkeley.Pair;
import org.deeplearning4j.datasets.iterator.DataSetIterator;
import org.deeplearning4j.eval.Evaluation;
import org.deeplearning4j.nn.api.Classifier;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.OutputPreProcessor;
import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.layers.OutputLayer;
import org.deeplearning4j.optimize.solvers.StochasticHessianFree;
import org.deeplearning4j.util.MultiLayerUtil;
import org.nd4j.linalg.api.activation.ActivationFunction;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.api.DataSet;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
import org.nd4j.linalg.ops.transforms.Transforms;
import org.nd4j.linalg.sampling.Sampling;
import org.nd4j.linalg.transformation.MatrixTransform;
import org.nd4j.linalg.util.FeatureUtil;
import org.nd4j.linalg.util.LinAlgExceptions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class MultiLayerNetwork
implements Serializable,
Classifier {
    private static Logger log = LoggerFactory.getLogger(MultiLayerNetwork.class);
    private static final long serialVersionUID = -5029161847383716484L;
    protected Layer[] layers;
    protected INDArray input;
    protected INDArray labels;
    protected Map<Integer, MatrixTransform> weightTransforms = new HashMap<Integer, MatrixTransform>();
    protected Map<Integer, MatrixTransform> hiddenBiasTransforms = new HashMap<Integer, MatrixTransform>();
    protected Map<Integer, MatrixTransform> visibleBiasTransforms = new HashMap<Integer, MatrixTransform>();
    protected boolean initCalled = false;
    protected NeuralNetConfiguration defaultConfiguration;
    protected MultiLayerConfiguration layerWiseConfigurations;
    protected INDArray mask;

    public MultiLayerNetwork(MultiLayerConfiguration conf) {
        this.layerWiseConfigurations = conf;
        this.defaultConfiguration = conf.getConf(0);
    }

    protected void intializeConfigurations() {
        if (this.layerWiseConfigurations == null) {
            this.layerWiseConfigurations = new MultiLayerConfiguration.Builder().build();
        }
        if (this.layers == null) {
            this.layers = new Layer[this.getnLayers() + 1];
        }
        if (this.defaultConfiguration == null) {
            this.defaultConfiguration = new NeuralNetConfiguration.Builder().build();
        }
        if (this.layerWiseConfigurations == null || this.layerWiseConfigurations.getConfs().isEmpty()) {
            for (int i = 0; i < this.layerWiseConfigurations.getHiddenLayerSizes().length + 1; ++i) {
                this.layerWiseConfigurations.getConfs().add(this.defaultConfiguration.clone());
            }
        }
    }

    public void pretrain(DataSetIterator iter) {
        if (!this.layerWiseConfigurations.isPretrain()) {
            return;
        }
        for (int i = 0; i < this.getnLayers(); ++i) {
            org.nd4j.linalg.dataset.DataSet next;
            if (i == 0) {
                while (iter.hasNext()) {
                    next = (org.nd4j.linalg.dataset.DataSet)iter.next();
                    this.input = next.getFeatureMatrix();
                    if (this.getInput() == null || this.getLayers() == null) {
                        this.setInput(this.input);
                        this.initializeLayers(this.input);
                    } else {
                        this.setInput(this.input);
                    }
                    this.getLayers()[i].fit(next.getFeatureMatrix());
                }
                iter.reset();
                continue;
            }
            while (iter.hasNext()) {
                next = (org.nd4j.linalg.dataset.DataSet)iter.next();
                INDArray layerInput = next.getFeatureMatrix();
                for (int j = 1; j <= i; ++j) {
                    layerInput = this.activationFromPrevLayer(j - 1, layerInput);
                }
                log.info("Training on layer " + (i + 1));
                this.getLayers()[i].fit(layerInput);
            }
            iter.reset();
        }
    }

    public void pretrain(INDArray input) {
        if (!this.layerWiseConfigurations.isPretrain()) {
            return;
        }
        if (this.getInput() == null || this.getLayers() == null) {
            this.setInput(input);
            this.initializeLayers(input);
        } else {
            this.setInput(input);
        }
        INDArray layerInput = null;
        for (int i = 0; i < this.getnLayers() - 1; ++i) {
            layerInput = i == 0 ? this.getInput() : this.activationFromPrevLayer(i - 1, layerInput);
            log.info("Training on layer " + (i + 1));
            this.getLayers()[i].fit(layerInput);
        }
    }

    @Override
    public int batchSize() {
        return this.input.rows();
    }

    @Override
    public NeuralNetConfiguration conf() {
        throw new UnsupportedOperationException();
    }

    @Override
    public void setConf(NeuralNetConfiguration conf) {
        throw new UnsupportedOperationException();
    }

    @Override
    public INDArray input() {
        return this.input;
    }

    @Override
    public void validateInput() {
    }

    @Override
    public INDArray transform(INDArray data) {
        return this.output(data);
    }

    public NeuralNetConfiguration getDefaultConfiguration() {
        return this.defaultConfiguration;
    }

    public void setDefaultConfiguration(NeuralNetConfiguration defaultConfiguration) {
        this.defaultConfiguration = defaultConfiguration;
    }

    public MultiLayerConfiguration getLayerWiseConfigurations() {
        return this.layerWiseConfigurations;
    }

    public void setLayerWiseConfigurations(MultiLayerConfiguration layerWiseConfigurations) {
        this.layerWiseConfigurations = layerWiseConfigurations;
    }

    public void initializeLayers(INDArray input) {
        if (input == null) {
            throw new IllegalArgumentException("Unable to initialize neuralNets with empty input");
        }
        int[] hiddenLayerSizes = this.getLayerWiseConfigurations().getHiddenLayerSizes();
        if (input.shape().length == 2) {
            for (int i = 0; i < hiddenLayerSizes.length; ++i) {
                if (hiddenLayerSizes[i] >= 1) continue;
                throw new IllegalArgumentException("All hidden layer sizes must be >= 1");
            }
        }
        this.input = input;
        if (!this.initCalled) {
            this.init();
        }
    }

    public void init() {
        if (this.layerWiseConfigurations == null || this.layers == null) {
            this.intializeConfigurations();
        }
        INDArray layerInput = this.input;
        if (this.getnLayers() < 1) {
            throw new IllegalStateException("Unable to createComplex network neuralNets; number specified is less than 1");
        }
        int[] hiddenLayerSizes = this.layerWiseConfigurations.getHiddenLayerSizes();
        if (this.layers == null || this.layers[0] == null) {
            this.layers = new Layer[hiddenLayerSizes.length + 1];
            for (int i = 0; i < this.getnLayers(); ++i) {
                int inputSize = i == 0 ? this.layerWiseConfigurations.getConf(0).getnIn() : hiddenLayerSizes[i - 1];
                if (i == 0) {
                    this.layerWiseConfigurations.getConf(i).setnIn(inputSize);
                    this.layerWiseConfigurations.getConf(i).setnOut(hiddenLayerSizes[i]);
                    this.layers[i] = this.layerWiseConfigurations.getConf(i).getLayerFactory().create(this.layerWiseConfigurations.getConf(i));
                    continue;
                }
                if (i >= this.getLayers().length - 1) continue;
                if (this.input != null) {
                    layerInput = this.activationFromPrevLayer(i - 1, layerInput);
                }
                this.layerWiseConfigurations.getConf(i).setnIn(inputSize);
                this.layerWiseConfigurations.getConf(i).setnOut(hiddenLayerSizes[i]);
                this.layers[i] = this.layerWiseConfigurations.getConf(i).getLayerFactory().create(this.layerWiseConfigurations.getConf(i));
            }
        }
        NeuralNetConfiguration last = this.layerWiseConfigurations.getConf(this.layerWiseConfigurations.getConfs().size() - 1);
        NeuralNetConfiguration secondToLast = this.layerWiseConfigurations.getConf(this.layerWiseConfigurations.getConfs().size() - 2);
        last.setnIn(secondToLast.getnOut());
        this.layers[this.layers.length - 1] = last.getLayerFactory().create(last);
        this.initCalled = true;
        this.initMask();
    }

    public INDArray activate() {
        return this.getLayers()[this.getLayers().length - 1].activate();
    }

    public INDArray activate(int layer) {
        return this.getLayers()[layer].activate();
    }

    public INDArray activate(int layer, INDArray input) {
        return this.getLayers()[layer].activate(input);
    }

    public void initialize(org.nd4j.linalg.dataset.DataSet data) {
        this.setInput(data.getFeatureMatrix());
        this.feedForward(data.getFeatureMatrix());
        this.labels = data.getLabels();
        if (this.getOutputLayer() instanceof OutputLayer) {
            OutputLayer o = (OutputLayer)this.getOutputLayer();
            o.setLabels(this.labels);
        }
    }

    public INDArray activationFromPrevLayer(int curr, INDArray input) {
        return this.layers[curr].activate(input);
    }

    public List<INDArray> feedForward() {
        INDArray currInput = this.input;
        if (this.input.isMatrix() && this.input.columns() != this.defaultConfiguration.getnIn()) {
            throw new IllegalStateException("Illegal input length");
        }
        ArrayList<INDArray> activations = new ArrayList<INDArray>();
        activations.add(currInput);
        for (int i = 0; i < this.layers.length; ++i) {
            currInput = this.activationFromPrevLayer(i, currInput);
            OutputPreProcessor preProcessor = this.getLayerWiseConfigurations().getPreProcessor(i);
            if (preProcessor != null) {
                currInput = preProcessor.preProcess(currInput);
            }
            this.applyDropConnectIfNecessary(currInput);
            activations.add(currInput);
        }
        return activations;
    }

    public List<INDArray> feedForward(INDArray input) {
        if (input == null) {
            throw new IllegalStateException("Unable to perform feed forward; no input found");
        }
        this.input = input;
        return this.feedForward();
    }

    @Override
    public Gradient getGradient() {
        DefaultGradient ret = new DefaultGradient();
        for (int i = 0; i < this.layers.length; i += 2) {
            ret.gradientLookupTable().put(String.valueOf(i), this.layers[i].getGradient().gradient());
        }
        return ret;
    }

    @Override
    public Pair<Gradient, Double> gradientAndScore() {
        return new Pair<Gradient, Double>(this.getGradient(), this.getOutputLayer().score());
    }

    protected void applyDropConnectIfNecessary(INDArray input) {
        if (this.layerWiseConfigurations.isUseDropConnect()) {
            INDArray mask = Sampling.binomial((INDArray)Nd4j.valueArrayOf((int)input.rows(), (int)input.columns(), (double)0.5), (int)1, (RandomGenerator)this.defaultConfiguration.getRng());
            input.muli(mask);
            if (this.defaultConfiguration.getL2() > 0.0) {
                input.muli((Number)this.defaultConfiguration.getL2());
            }
        }
    }

    protected List<INDArray> computeDeltasR(INDArray v) {
        int i;
        ArrayList<INDArray> deltaRet = new ArrayList<INDArray>();
        INDArray[] deltas = new INDArray[this.getnLayers() + 1];
        List<INDArray> activations = this.feedForward();
        List<INDArray> rActivations = this.feedForwardR(activations, v);
        ArrayList<INDArray> weights = new ArrayList<INDArray>();
        ArrayList<INDArray> biases = new ArrayList<INDArray>();
        ArrayList<ActivationFunction> activationFunctions = new ArrayList<ActivationFunction>();
        for (int j = 0; j < this.getLayers().length; ++j) {
            weights.add(this.getLayers()[j].getParam("W"));
            biases.add(this.getLayers()[j].getParam("b"));
            activationFunctions.add(this.getLayers()[j].conf().getActivationFunction());
        }
        INDArray rix = rActivations.get(rActivations.size() - 1).divi((Number)this.input.rows());
        LinAlgExceptions.assertValidNum((INDArray)rix);
        for (i = this.getnLayers() - 1; i >= 0; --i) {
            deltas[i] = activations.get(i).transpose().mmul(rix);
            this.applyDropConnectIfNecessary(deltas[i]);
            if (i <= 0) continue;
            rix = rix.mmul(((INDArray)weights.get(i)).addRowVector((INDArray)biases.get(i)).transpose()).muli(((ActivationFunction)activationFunctions.get(i - 1)).applyDerivative(activations.get(i)));
        }
        for (i = 0; i < deltas.length - 1; ++i) {
            if (this.defaultConfiguration.isConstrainGradientToUnitNorm()) {
                double sum = deltas[i].sum(Integer.MAX_VALUE).getDouble(0);
                if (sum > 0.0) {
                    deltaRet.add(deltas[i].div(deltas[i].norm2(Integer.MAX_VALUE)));
                } else {
                    deltaRet.add(deltas[i]);
                }
            } else {
                deltaRet.add(deltas[i]);
            }
            LinAlgExceptions.assertValidNum((INDArray)((INDArray)deltaRet.get(i)));
        }
        return deltaRet;
    }

    public void dampingUpdate(double rho, double boost, double decrease) {
        if (rho < 0.25 || Double.isNaN(rho)) {
            this.layerWiseConfigurations.setDampingFactor(this.getLayerWiseConfigurations().getDampingFactor() * boost);
        } else if (rho > 0.75) {
            this.layerWiseConfigurations.setDampingFactor(this.getLayerWiseConfigurations().getDampingFactor() * decrease);
        }
    }

    public double reductionRatio(INDArray p, double currScore, double score, INDArray gradient) {
        double currentDamp = this.layerWiseConfigurations.getDampingFactor();
        this.layerWiseConfigurations.setDampingFactor(0.0);
        INDArray denom = this.getBackPropRGradient(p);
        denom.muli((Number)0.5).muli(p.mul(denom)).sum(0);
        denom.subi(gradient.mul(p).sum(0));
        double rho = (currScore - score) / (Double)denom.getScalar(0).element();
        this.layerWiseConfigurations.setDampingFactor(currentDamp);
        if (score - currScore > 0.0) {
            return Double.NEGATIVE_INFINITY;
        }
        return rho;
    }

    protected List<Pair<INDArray, INDArray>> computeDeltas2() {
        int i;
        ArrayList<Pair<INDArray, INDArray>> deltaRet = new ArrayList<Pair<INDArray, INDArray>>();
        List<INDArray> activations = this.feedForward();
        INDArray[] deltas = new INDArray[activations.size() - 1];
        INDArray[] preCons = new INDArray[activations.size() - 1];
        INDArray ix = activations.get(activations.size() - 1).sub(this.labels).div((Number)this.labels.rows());
        ArrayList<INDArray> weights = new ArrayList<INDArray>();
        ArrayList<INDArray> biases = new ArrayList<INDArray>();
        ArrayList<ActivationFunction> activationFunctions = new ArrayList<ActivationFunction>();
        for (int j = 0; j < this.getLayers().length; ++j) {
            weights.add(this.getLayers()[j].getParam("W"));
            biases.add(this.getLayers()[j].getParam("b"));
            activationFunctions.add(this.getLayers()[j].conf().getActivationFunction());
        }
        for (i = weights.size() - 1; i >= 0; --i) {
            deltas[i] = activations.get(i).transpose().mmul(ix);
            preCons[i] = Transforms.pow((INDArray)activations.get(i).transpose(), (Number)2).mmul(Transforms.pow((INDArray)ix, (Number)2)).muli((Number)this.labels.rows());
            this.applyDropConnectIfNecessary(deltas[i]);
            if (i <= 0) continue;
            ix = ix.mmul(((INDArray)weights.get(i)).transpose()).muli(((ActivationFunction)activationFunctions.get(i - 1)).applyDerivative(activations.get(i)));
        }
        for (i = 0; i < deltas.length; ++i) {
            if (this.defaultConfiguration.isConstrainGradientToUnitNorm()) {
                deltaRet.add(new Pair<INDArray, INDArray>(deltas[i].divi(deltas[i].norm2(Integer.MAX_VALUE)), preCons[i]));
                continue;
            }
            deltaRet.add(new Pair<INDArray, INDArray>(deltas[i], preCons[i]));
        }
        return deltaRet;
    }

    protected List<INDArray> computeDeltas() {
        int i;
        ArrayList<INDArray> deltaRet = new ArrayList<INDArray>();
        INDArray[] deltas = new INDArray[this.getnLayers() + 2];
        List<INDArray> activations = this.feedForward();
        INDArray ix = this.labels.sub(activations.get(activations.size() - 1)).subi(this.getOutputLayer().conf().getActivationFunction().applyDerivative(activations.get(activations.size() - 1)));
        ArrayList<INDArray> weights = new ArrayList<INDArray>();
        ArrayList<INDArray> biases = new ArrayList<INDArray>();
        ArrayList<ActivationFunction> activationFunctions = new ArrayList<ActivationFunction>();
        for (int j = 0; j < this.getLayers().length; ++j) {
            weights.add(this.getLayers()[j].getParam("W"));
            biases.add(this.getLayers()[j].getParam("b"));
            activationFunctions.add(this.getLayers()[j].conf().getActivationFunction());
        }
        weights.add(this.getOutputLayer().getParam("W"));
        biases.add(this.getOutputLayer().getParam("b"));
        activationFunctions.add(this.getOutputLayer().conf().getActivationFunction());
        for (i = this.getnLayers() + 1; i >= 0; --i) {
            INDArray delta;
            if (i >= this.getnLayers() + 1) {
                deltas[i] = ix;
                continue;
            }
            deltas[i] = delta = activations.get(i).transpose().mmul(ix);
            this.applyDropConnectIfNecessary(deltas[i]);
            INDArray weightsPlusBias = ((INDArray)weights.get(i)).transpose();
            INDArray activation = activations.get(i);
            if (i <= 0) continue;
            ix = ix.mmul(weightsPlusBias).muli(((ActivationFunction)activationFunctions.get(i - 1)).applyDerivative(activation));
        }
        for (i = 0; i < deltas.length; ++i) {
            if (this.defaultConfiguration.isConstrainGradientToUnitNorm()) {
                deltaRet.add(deltas[i].divi(deltas[i].norm2(Integer.MAX_VALUE)));
                continue;
            }
            deltaRet.add(deltas[i]);
        }
        return deltaRet;
    }

    public void backPropStep() {
        List<Pair<INDArray, INDArray>> deltas = this.backPropGradient();
        for (int i = 0; i < this.layers.length; ++i) {
            this.layers[i].getParam("W").addi(deltas.get(i).getFirst());
            this.layers[i].getParam("b").addi(deltas.get(i).getSecond());
        }
    }

    public INDArray getBackPropRGradient(INDArray v) {
        return this.pack(this.backPropGradientR(v));
    }

    public Pair<INDArray, INDArray> getBackPropGradient2() {
        List<Pair<Pair<INDArray, INDArray>, Pair<INDArray, INDArray>>> deltas = this.backPropGradient2();
        ArrayList<Pair<INDArray, INDArray>> deltaNormal = new ArrayList<Pair<INDArray, INDArray>>();
        ArrayList<Pair<INDArray, INDArray>> deltasPreCon = new ArrayList<Pair<INDArray, INDArray>>();
        for (int i = 0; i < deltas.size(); ++i) {
            deltaNormal.add(deltas.get(i).getFirst());
            deltasPreCon.add(deltas.get(i).getSecond());
        }
        return new Pair<INDArray, INDArray>(this.pack(deltaNormal), this.pack(deltasPreCon));
    }

    public MultiLayerNetwork clone() {
        MultiLayerNetwork ret = null;
        try {
            ret = (MultiLayerNetwork)this.getClass().newInstance();
            ret.update(this);
        }
        catch (Exception e) {
            throw new IllegalStateException("Unable to cloe network");
        }
        return ret;
    }

    @Override
    public INDArray params() {
        ArrayList<INDArray> params = new ArrayList<INDArray>();
        for (int i = 0; i < this.layers.length; ++i) {
            params.add(this.layers[i].params());
        }
        return Nd4j.toFlattened(params);
    }

    @Override
    public void setParams(INDArray params) {
        this.setParameters(params);
    }

    @Override
    public int numParams() {
        int length = 0;
        for (int i = 0; i < this.layers.length; ++i) {
            length += this.layers[i].numParams();
        }
        return length;
    }

    public INDArray pack() {
        return this.params();
    }

    public INDArray pack(List<Pair<INDArray, INDArray>> layers) {
        ArrayList<INDArray> list = new ArrayList<INDArray>();
        for (int i = 0; i < layers.size(); ++i) {
            list.add(layers.get(i).getFirst());
            list.add(layers.get(i).getSecond());
        }
        INDArray ret = Nd4j.toFlattened(list);
        return ret;
    }

    @Override
    public double score(DataSet data) {
        return this.score(data.getFeatureMatrix(), data.getLabels());
    }

    public List<Pair<INDArray, INDArray>> backPropGradient() {
        List<INDArray> deltas = this.computeDeltas();
        ArrayList<Pair<INDArray, INDArray>> vWvB = new ArrayList<Pair<INDArray, INDArray>>();
        for (int i = 0; i < this.layers.length; ++i) {
            vWvB.add(new Pair<INDArray, INDArray>(this.layers[i].getParam("W"), this.layers[i].getParam("b")));
        }
        ArrayList<Pair<INDArray, INDArray>> list = new ArrayList<Pair<INDArray, INDArray>>();
        for (int l = 0; l < this.getnLayers() + 1; ++l) {
            INDArray gradientChange = deltas.get(l);
            if (gradientChange.length() != this.getLayers()[l].getParam("W").length()) {
                throw new IllegalStateException("Gradient change not equal to weight change");
            }
            INDArray deltaColumnSums = deltas.get(l).isVector() ? deltas.get(l) : deltas.get(l).mean(0);
            list.add(new Pair<INDArray, INDArray>(gradientChange, deltaColumnSums));
        }
        if (this.mask == null) {
            this.initMask();
        }
        return list;
    }

    public List<Pair<INDArray, INDArray>> unPack(INDArray param) {
        if (param.rows() != 1) {
            param = param.reshape(1, param.length());
        }
        ArrayList<Pair<INDArray, INDArray>> ret = new ArrayList<Pair<INDArray, INDArray>>();
        int curr = 0;
        for (int i = 0; i < this.layers.length; ++i) {
            int layerLength = this.layers[i].getParam("W").length() + this.layers[i].getParam("b").length();
            INDArray subMatrix = param.get(new NDArrayIndex[]{NDArrayIndex.interval((int)curr, (int)(curr + layerLength))});
            INDArray weightPortion = subMatrix.get(new NDArrayIndex[]{NDArrayIndex.interval((int)0, (int)this.layers[i].getParam("W").length())});
            int beginHBias = this.layers[i].getParam("W").length();
            int endHbias = subMatrix.length();
            INDArray hBiasPortion = subMatrix.get(new NDArrayIndex[]{NDArrayIndex.interval((int)beginHBias, (int)endHbias)});
            int layerLengthSum = weightPortion.length() + hBiasPortion.length();
            if (layerLengthSum != layerLength) {
                if (hBiasPortion.length() != this.layers[i].getParam("b").length()) {
                    throw new IllegalStateException("Hidden bias on layer " + i + " was off");
                }
                if (weightPortion.length() != this.layers[i].getParam("W").length()) {
                    throw new IllegalStateException("Weight portion on layer " + i + " was off");
                }
            }
            ret.add(new Pair<INDArray, INDArray>(weightPortion.reshape(this.layers[i].getParam("W").rows(), this.layers[i].getParam("W").columns()), hBiasPortion.reshape(this.layers[i].getParam("b").rows(), this.layers[i].getParam("b").columns())));
            curr += layerLength;
        }
        return ret;
    }

    protected List<Pair<Pair<INDArray, INDArray>, Pair<INDArray, INDArray>>> backPropGradient2() {
        List<Pair<INDArray, INDArray>> deltas = this.computeDeltas2();
        ArrayList<Pair<Pair<INDArray, INDArray>, Pair<INDArray, INDArray>>> list = new ArrayList<Pair<Pair<INDArray, INDArray>, Pair<INDArray, INDArray>>>();
        ArrayList<Pair<INDArray, INDArray>> grad = new ArrayList<Pair<INDArray, INDArray>>();
        ArrayList<Pair<INDArray, INDArray>> preCon = new ArrayList<Pair<INDArray, INDArray>>();
        for (int l = 0; l < deltas.size(); ++l) {
            INDArray gradientChange = deltas.get(l).getFirst();
            INDArray preConGradientChange = deltas.get(l).getSecond();
            if (l < this.layers.length && gradientChange.length() != this.layers[l].getParam("W").length()) {
                throw new IllegalStateException("Gradient change not equal to weight change");
            }
            INDArray deltaColumnSums = deltas.get(l).getFirst().mean(0);
            INDArray preConColumnSums = deltas.get(l).getSecond().mean(0);
            grad.add(new Pair<INDArray, INDArray>(gradientChange, deltaColumnSums));
            preCon.add(new Pair<INDArray, INDArray>(preConGradientChange, preConColumnSums));
            if (l < this.layers.length && deltaColumnSums.length() != this.layers[l].getParam("b").length()) {
                throw new IllegalStateException("Bias change not equal to weight change");
            }
            if (l != this.getLayers().length || deltaColumnSums.length() == this.getOutputLayer().getParam("b").length()) continue;
            throw new IllegalStateException("Bias change not equal to weight change");
        }
        INDArray g = this.pack(grad);
        INDArray con = this.pack(preCon);
        INDArray theta = this.params();
        if (this.mask == null) {
            this.initMask();
        }
        g.addi(theta.mul((Number)this.defaultConfiguration.getL2()).muli(this.mask));
        INDArray conAdd = Transforms.pow((INDArray)this.mask.mul((Number)this.defaultConfiguration.getL2()).add(Nd4j.valueArrayOf((int)g.rows(), (int)g.columns(), (double)this.layerWiseConfigurations.getDampingFactor())), (Number)0.75);
        con.addi(conAdd);
        List<Pair<INDArray, INDArray>> gUnpacked = this.unPack(g);
        List<Pair<INDArray, INDArray>> conUnpacked = this.unPack(con);
        for (int i = 0; i < gUnpacked.size(); ++i) {
            list.add(new Pair<Pair<INDArray, INDArray>, Pair<INDArray, INDArray>>(gUnpacked.get(i), conUnpacked.get(i)));
        }
        return list;
    }

    @Override
    public void fit(DataSetIterator iter) {
        this.pretrain(iter);
        iter.reset();
        this.finetune(iter);
    }

    public void finetune(DataSetIterator iter) {
        org.nd4j.linalg.dataset.DataSet data;
        iter.reset();
        while (iter.hasNext() && (data = (org.nd4j.linalg.dataset.DataSet)iter.next()).getFeatureMatrix() != null && data.getLabels() != null) {
            this.setInput(data.getFeatureMatrix());
            this.setLabels(data.getLabels());
            if (this.getOutputLayer().conf().getOptimizationAlgo() != OptimizationAlgorithm.HESSIAN_FREE) {
                this.feedForward();
                if (!(this.getOutputLayer() instanceof OutputLayer)) continue;
                OutputLayer o = (OutputLayer)this.getOutputLayer();
                o.fit();
                continue;
            }
            StochasticHessianFree hessianFree = new StochasticHessianFree(this.getOutputLayer().conf(), this.getOutputLayer().conf().getStepFunction(), this.getOutputLayer().conf().getListeners(), this);
            hessianFree.optimize();
        }
    }

    public void finetune(INDArray labels) {
        if (labels != null) {
            this.labels = labels;
        }
        if (!(this.getOutputLayer() instanceof OutputLayer)) {
            log.warn("Output layer not instance of output layer returning.");
            return;
        }
        OutputLayer o = (OutputLayer)this.getOutputLayer();
        if (this.getOutputLayer().conf().getOptimizationAlgo() != OptimizationAlgorithm.HESSIAN_FREE) {
            this.feedForward();
            o.fit(this.getOutputLayer().getInput(), labels);
        } else {
            this.feedForward();
            o.setLabels(labels);
            StochasticHessianFree hessianFree = new StochasticHessianFree(this.getOutputLayer().conf(), this.getOutputLayer().conf().getStepFunction(), this.getOutputLayer().conf().getListeners(), this);
            hessianFree.optimize();
        }
    }

    @Override
    public int[] predict(INDArray d) {
        INDArray output = this.output(d);
        int[] ret = new int[d.rows()];
        for (int i = 0; i < ret.length; ++i) {
            ret[i] = Nd4j.getBlasWrapper().iamax(output.getRow(i));
        }
        return ret;
    }

    @Override
    public INDArray labelProbabilities(INDArray examples) {
        List<INDArray> feed = this.feedForward(examples);
        OutputLayer o = (OutputLayer)this.getOutputLayer();
        return o.labelProbabilities(feed.get(feed.size() - 1));
    }

    @Override
    public void fit(INDArray examples, INDArray labels) {
        this.pretrain(examples);
        if (!this.layerWiseConfigurations.isPretrain()) {
            this.input = examples;
        }
        this.finetune(labels);
    }

    @Override
    public void fit(INDArray data) {
        this.pretrain(data);
    }

    @Override
    public void iterate(INDArray input) {
        this.pretrain(input);
    }

    @Override
    public void fit(DataSet data) {
        this.fit(data.getFeatureMatrix(), data.getLabels());
    }

    @Override
    public void fit(INDArray examples, int[] labels) {
        this.fit(examples, FeatureUtil.toOutcomeMatrix((int[])labels, (int)this.getOutputLayer().conf().getnOut()));
    }

    public INDArray output(INDArray x) {
        List<INDArray> activations = this.feedForward(x);
        INDArray predicted = activations.get(activations.size() - 1);
        return predicted;
    }

    public INDArray reconstruct(INDArray x, int layerNum) {
        INDArray currInput = x;
        List<INDArray> forward = this.feedForward(currInput);
        return forward.get(layerNum - 1);
    }

    public void printConfiguration() {
        StringBuffer sb = new StringBuffer();
        int count = 0;
        for (NeuralNetConfiguration conf : this.getLayerWiseConfigurations().getConfs()) {
            sb.append(" Layer " + count++ + " conf " + conf);
        }
        log.info(sb.toString());
    }

    public void update(MultiLayerNetwork network) {
        this.defaultConfiguration = network.defaultConfiguration;
        this.input = network.input;
        this.labels = network.labels;
        this.weightTransforms = network.weightTransforms;
        this.visibleBiasTransforms = network.visibleBiasTransforms;
        this.hiddenBiasTransforms = network.hiddenBiasTransforms;
        this.layers = (Layer[])ArrayUtils.clone((Object[])network.layers);
    }

    @Override
    public double score(INDArray input, INDArray labels) {
        this.feedForward(input);
        this.setLabels(labels);
        Evaluation eval = new Evaluation();
        eval.eval(labels, this.labelProbabilities(input));
        return eval.f1();
    }

    @Override
    public int numLabels() {
        return this.labels.columns();
    }

    public double score(org.nd4j.linalg.dataset.DataSet data) {
        this.feedForward(data.getFeatureMatrix());
        this.setLabels(data.getLabels());
        return this.score();
    }

    @Override
    public void fit() {
        this.fit(this.input, this.labels);
    }

    @Override
    public void update(Gradient gradient) {
    }

    @Override
    public double score() {
        if (this.getOutputLayer().getInput() == null) {
            this.feedForward();
        }
        return this.getOutputLayer().score();
    }

    public double score(INDArray param) {
        INDArray params = this.params();
        this.setParameters(param);
        double ret = this.score();
        double regCost = 0.5 * this.defaultConfiguration.getL2() * (Double)Transforms.pow((INDArray)this.mask.mul(param), (Number)2).sum(Integer.MAX_VALUE).element();
        this.setParameters(params);
        return ret + regCost;
    }

    public void merge(MultiLayerNetwork network, int batchSize) {
        if (network.layers.length != this.layers.length) {
            throw new IllegalArgumentException("Unable to merge networks that are not of equal length");
        }
        for (int i = 0; i < this.getnLayers(); ++i) {
            Layer n = this.layers[i];
            Layer otherNetwork = network.layers[i];
            n.merge(otherNetwork, batchSize);
        }
        this.getOutputLayer().merge(network.getOutputLayer(), batchSize);
    }

    public void setInput(INDArray input) {
        if (input != null && this.layers == null) {
            this.initializeLayers(input);
        }
        this.input = input;
    }

    private void initMask() {
        this.setMask(Nd4j.ones((int)1, (int)this.pack().length()));
    }

    public Layer getInputLayer() {
        return this.getLayers()[0];
    }

    public Layer getOutputLayer() {
        return this.getLayers()[this.getLayers().length - 1];
    }

    public void setParameters(INDArray params) {
        int idx = 0;
        for (int i = 0; i < this.getLayers().length; ++i) {
            Layer layer = this.getLayers()[i];
            int range = layer.numParams();
            layer.setParams(params.get(new NDArrayIndex[]{NDArrayIndex.interval((int)idx, (int)(range + idx))}));
            idx += range;
        }
    }

    public ParamRange startIndexForLayer(int layer) {
        int wEnd;
        int start = 0;
        for (int i = 0; i < layer; ++i) {
            start += this.getLayers()[i].getParam("W").length();
            start += this.getLayers()[i].getParam("b").length();
        }
        if (layer < this.getLayers().length) {
            wEnd = start + this.getLayers()[layer].getParam("W").length();
            return new ParamRange(start, wEnd, wEnd, wEnd + this.getLayers()[layer].getParam("b").length());
        }
        wEnd = start + this.getOutputLayer().getParam("W").length();
        return new ParamRange(start, wEnd, wEnd, wEnd + this.getOutputLayer().getParam("b").length());
    }

    public List<INDArray> feedForwardR(List<INDArray> acts, INDArray v) {
        ArrayList<INDArray> R = new ArrayList<INDArray>();
        R.add(Nd4j.zeros((int)this.input.rows(), (int)this.input.columns()));
        List<Pair<INDArray, INDArray>> vWvB = this.unPack(v);
        List<INDArray> W = MultiLayerUtil.weightMatrices(this);
        for (int i = 0; i < this.layers.length; ++i) {
            ActivationFunction derivative = this.getLayers()[i].conf().getActivationFunction();
            R.add(((INDArray)R.get(i)).mmul(W.get(i)).addi(acts.get(i).mmul(vWvB.get(i).getFirst().addRowVector(vWvB.get(i).getSecond()))).muli(derivative.applyDerivative(acts.get(i + 1))));
        }
        return R;
    }

    public List<INDArray> feedForwardR(INDArray v) {
        return this.feedForwardR(this.feedForward(), v);
    }

    protected List<Pair<INDArray, INDArray>> backPropGradientR(INDArray v) {
        if (this.mask == null) {
            this.initMask();
        }
        List<INDArray> deltas = this.computeDeltasR(v);
        ArrayList<Pair<INDArray, INDArray>> list = new ArrayList<Pair<INDArray, INDArray>>();
        for (int l = 0; l < this.getnLayers(); ++l) {
            INDArray gradientChange = deltas.get(l);
            if (gradientChange.length() != this.getLayers()[l].getParam("W").length()) {
                throw new IllegalStateException("Gradient change not equal to weight change");
            }
            INDArray deltaColumnSums = deltas.get(l).mean(0);
            if (deltaColumnSums.length() != this.layers[l].getParam("b").length()) {
                throw new IllegalStateException("Bias change not equal to weight change");
            }
            list.add(new Pair<INDArray, INDArray>(gradientChange, deltaColumnSums));
        }
        INDArray pack = this.pack(list).addi(this.mask.mul((Number)this.defaultConfiguration.getL2()).muli(v)).addi(v.mul((Number)this.layerWiseConfigurations.getDampingFactor()));
        return this.unPack(pack);
    }

    public INDArray getLabels() {
        return this.labels;
    }

    public INDArray getInput() {
        return this.input;
    }

    public Map<Integer, MatrixTransform> getWeightTransforms() {
        return this.weightTransforms;
    }

    public void setLabels(INDArray labels) {
        this.labels = labels;
    }

    public Map<Integer, MatrixTransform> getHiddenBiasTransforms() {
        return this.hiddenBiasTransforms;
    }

    public Map<Integer, MatrixTransform> getVisibleBiasTransforms() {
        return this.visibleBiasTransforms;
    }

    public int getnLayers() {
        return this.layerWiseConfigurations.getHiddenLayerSizes().length + 1;
    }

    public Layer[] getLayers() {
        return this.layers;
    }

    public void setLayers(Layer[] layers) {
        this.layers = layers;
    }

    public INDArray getMask() {
        return this.mask;
    }

    public void setMask(INDArray mask) {
        this.mask = mask;
    }

    public static class ParamRange
    implements Serializable {
        private int wStart;
        private int wEnd;
        private int biasStart;
        private int biasEnd;

        private ParamRange(int wStart, int wEnd, int biasStart, int biasEnd) {
            this.wStart = wStart;
            this.wEnd = wEnd;
            this.biasStart = biasStart;
            this.biasEnd = biasEnd;
        }

        public int getwStart() {
            return this.wStart;
        }

        public void setwStart(int wStart) {
            this.wStart = wStart;
        }

        public int getwEnd() {
            return this.wEnd;
        }

        public void setwEnd(int wEnd) {
            this.wEnd = wEnd;
        }

        public int getBiasStart() {
            return this.biasStart;
        }

        public void setBiasStart(int biasStart) {
            this.biasStart = biasStart;
        }

        public int getBiasEnd() {
            return this.biasEnd;
        }

        public void setBiasEnd(int biasEnd) {
            this.biasEnd = biasEnd;
        }
    }
}

