001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.util;
020    
021    import java.io.DataInputStream;
022    import java.io.DataOutputStream;
023    import java.io.IOException;
024    import java.nio.ByteBuffer;
025    import java.util.zip.CRC32;
026    import java.util.zip.Checksum;
027    
028    import org.apache.hadoop.classification.InterfaceAudience;
029    import org.apache.hadoop.classification.InterfaceStability;
030    import org.apache.hadoop.fs.ChecksumException;
031    
032    /**
033     * This class provides interface and utilities for processing checksums for
034     * DFS data transfers.
035     */
036    @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
037    @InterfaceStability.Evolving
038    public class DataChecksum implements Checksum {
039      
040      // checksum types
041      public static final int CHECKSUM_NULL    = 0;
042      public static final int CHECKSUM_CRC32   = 1;
043      public static final int CHECKSUM_CRC32C  = 2;
044      public static final int CHECKSUM_DEFAULT = 3; 
045      public static final int CHECKSUM_MIXED   = 4;
046     
047      /** The checksum types */
048      public static enum Type {
049        NULL  (CHECKSUM_NULL, 0),
050        CRC32 (CHECKSUM_CRC32, 4),
051        CRC32C(CHECKSUM_CRC32C, 4),
052        DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
053        MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
054    
055        public final int id;
056        public final int size;
057        
058        private Type(int id, int size) {
059          this.id = id;
060          this.size = size;
061        }
062    
063        /** @return the type corresponding to the id. */
064        public static Type valueOf(int id) {
065          if (id < 0 || id >= values().length) {
066            throw new IllegalArgumentException("id=" + id
067                + " out of range [0, " + values().length + ")");
068          }
069          return values()[id];
070        }
071      }
072    
073      /**
074       * Create a Crc32 Checksum object. The implementation of the Crc32 algorithm
075       * is chosen depending on the platform.
076       */
077      public static Checksum newCrc32() {
078        return Shell.isJava7OrAbove()? new CRC32(): new PureJavaCrc32();
079      }
080    
081      public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) {
082        if ( bytesPerChecksum <= 0 ) {
083          return null;
084        }
085        
086        switch ( type ) {
087        case NULL :
088          return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum );
089        case CRC32 :
090          return new DataChecksum(type, newCrc32(), bytesPerChecksum );
091        case CRC32C:
092          return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum);
093        default:
094          return null;  
095        }
096      }
097      
098      /**
099       * Creates a DataChecksum from HEADER_LEN bytes from arr[offset].
100       * @return DataChecksum of the type in the array or null in case of an error.
101       */
102      public static DataChecksum newDataChecksum( byte bytes[], int offset ) {
103        if (offset < 0 || bytes.length < offset + getChecksumHeaderSize()) {
104          return null;
105        }
106        
107        // like readInt():
108        int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | 
109                               ( (bytes[offset+2] & 0xff) << 16 ) |
110                               ( (bytes[offset+3] & 0xff) << 8 )  |
111                               ( (bytes[offset+4] & 0xff) );
112        return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum );
113      }
114      
115      /**
116       * This constructs a DataChecksum by reading HEADER_LEN bytes from input
117       * stream <i>in</i>
118       */
119      public static DataChecksum newDataChecksum( DataInputStream in )
120                                     throws IOException {
121        int type = in.readByte();
122        int bpc = in.readInt();
123        DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc );
124        if ( summer == null ) {
125          throw new IOException( "Could not create DataChecksum of type " +
126                                 type + " with bytesPerChecksum " + bpc );
127        }
128        return summer;
129      }
130      
131      /**
132       * Writes the checksum header to the output stream <i>out</i>.
133       */
134      public void writeHeader( DataOutputStream out ) 
135                               throws IOException { 
136        out.writeByte( type.id );
137        out.writeInt( bytesPerChecksum );
138      }
139    
140      public byte[] getHeader() {
141        byte[] header = new byte[getChecksumHeaderSize()];
142        header[0] = (byte) (type.id & 0xff);
143        // Writing in buffer just like DataOutput.WriteInt()
144        header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff);
145        header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff);
146        header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff);
147        header[1+3] = (byte) (bytesPerChecksum & 0xff);
148        return header;
149      }
150      
151      /**
152       * Writes the current checksum to the stream.
153       * If <i>reset</i> is true, then resets the checksum.
154       * @return number of bytes written. Will be equal to getChecksumSize();
155       */
156       public int writeValue( DataOutputStream out, boolean reset )
157                              throws IOException {
158         if ( type.size <= 0 ) {
159           return 0;
160         }
161    
162         if ( type.size == 4 ) {
163           out.writeInt( (int) summer.getValue() );
164         } else {
165           throw new IOException( "Unknown Checksum " + type );
166         }
167         
168         if ( reset ) {
169           reset();
170         }
171         
172         return type.size;
173       }
174       
175       /**
176        * Writes the current checksum to a buffer.
177        * If <i>reset</i> is true, then resets the checksum.
178        * @return number of bytes written. Will be equal to getChecksumSize();
179        */
180        public int writeValue( byte[] buf, int offset, boolean reset )
181                               throws IOException {
182          if ( type.size <= 0 ) {
183            return 0;
184          }
185    
186          if ( type.size == 4 ) {
187            int checksum = (int) summer.getValue();
188            buf[offset+0] = (byte) ((checksum >>> 24) & 0xff);
189            buf[offset+1] = (byte) ((checksum >>> 16) & 0xff);
190            buf[offset+2] = (byte) ((checksum >>> 8) & 0xff);
191            buf[offset+3] = (byte) (checksum & 0xff);
192          } else {
193            throw new IOException( "Unknown Checksum " + type );
194          }
195          
196          if ( reset ) {
197            reset();
198          }
199          
200          return type.size;
201        }
202       
203       /**
204        * Compares the checksum located at buf[offset] with the current checksum.
205        * @return true if the checksum matches and false otherwise.
206        */
207       public boolean compare( byte buf[], int offset ) {
208         if ( type.size == 4 ) {
209           int checksum = ( (buf[offset+0] & 0xff) << 24 ) | 
210                          ( (buf[offset+1] & 0xff) << 16 ) |
211                          ( (buf[offset+2] & 0xff) << 8 )  |
212                          ( (buf[offset+3] & 0xff) );
213           return checksum == (int) summer.getValue();
214         }
215         return type.size == 0;
216       }
217       
218      private final Type type;
219      private final Checksum summer;
220      private final int bytesPerChecksum;
221      private int inSum = 0;
222      
223      private DataChecksum( Type type, Checksum checksum, int chunkSize ) {
224        this.type = type;
225        summer = checksum;
226        bytesPerChecksum = chunkSize;
227      }
228      
229      /** @return the checksum algorithm type. */
230      public Type getChecksumType() {
231        return type;
232      }
233      /** @return the size for a checksum. */
234      public int getChecksumSize() {
235        return type.size;
236      }
237      /** @return the required checksum size given the data length. */
238      public int getChecksumSize(int dataSize) {
239        return ((dataSize - 1)/getBytesPerChecksum() + 1) * getChecksumSize(); 
240      }
241      public int getBytesPerChecksum() {
242        return bytesPerChecksum;
243      }
244      public int getNumBytesInSum() {
245        return inSum;
246      }
247      
248      public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;
249      static public int getChecksumHeaderSize() {
250        return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int
251      }
252      //Checksum Interface. Just a wrapper around member summer.
253      @Override
254      public long getValue() {
255        return summer.getValue();
256      }
257      @Override
258      public void reset() {
259        summer.reset();
260        inSum = 0;
261      }
262      @Override
263      public void update( byte[] b, int off, int len ) {
264        if ( len > 0 ) {
265          summer.update( b, off, len );
266          inSum += len;
267        }
268      }
269      @Override
270      public void update( int b ) {
271        summer.update( b );
272        inSum += 1;
273      }
274      
275      /**
276       * Verify that the given checksums match the given data.
277       * 
278       * The 'mark' of the ByteBuffer parameters may be modified by this function,.
279       * but the position is maintained.
280       *  
281       * @param data the DirectByteBuffer pointing to the data to verify.
282       * @param checksums the DirectByteBuffer pointing to a series of stored
283       *                  checksums
284       * @param fileName the name of the file being read, for error-reporting
285       * @param basePos the file position to which the start of 'data' corresponds
286       * @throws ChecksumException if the checksums do not match
287       */
288      public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums,
289          String fileName, long basePos)
290      throws ChecksumException {
291        if (type.size == 0) return;
292        
293        if (data.hasArray() && checksums.hasArray()) {
294          verifyChunkedSums(
295              data.array(), data.arrayOffset() + data.position(), data.remaining(),
296              checksums.array(), checksums.arrayOffset() + checksums.position(),
297              fileName, basePos);
298          return;
299        }
300        if (NativeCrc32.isAvailable()) {
301          NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data,
302              fileName, basePos);
303          return;
304        }
305        
306        int startDataPos = data.position();
307        data.mark();
308        checksums.mark();
309        try {
310          byte[] buf = new byte[bytesPerChecksum];
311          byte[] sum = new byte[type.size];
312          while (data.remaining() > 0) {
313            int n = Math.min(data.remaining(), bytesPerChecksum);
314            checksums.get(sum);
315            data.get(buf, 0, n);
316            summer.reset();
317            summer.update(buf, 0, n);
318            int calculated = (int)summer.getValue();
319            int stored = (sum[0] << 24 & 0xff000000) |
320              (sum[1] << 16 & 0xff0000) |
321              (sum[2] << 8 & 0xff00) |
322              sum[3] & 0xff;
323            if (calculated != stored) {
324              long errPos = basePos + data.position() - startDataPos - n;
325              throw new ChecksumException(
326                  "Checksum error: "+ fileName + " at "+ errPos +
327                  " exp: " + stored + " got: " + calculated, errPos);
328            }
329          }
330        } finally {
331          data.reset();
332          checksums.reset();
333        }
334      }
335      
336      /**
337       * Implementation of chunked verification specifically on byte arrays. This
338       * is to avoid the copy when dealing with ByteBuffers that have array backing.
339       */
340      private void verifyChunkedSums(
341          byte[] data, int dataOff, int dataLen,
342          byte[] checksums, int checksumsOff, String fileName,
343          long basePos) throws ChecksumException {
344        if (type.size == 0) return;
345    
346        if (NativeCrc32.isAvailable()) {
347          NativeCrc32.verifyChunkedSumsByteArray(bytesPerChecksum, type.id,
348              checksums, checksumsOff, data, dataOff, dataLen, fileName, basePos);
349          return;
350        }
351        
352        int remaining = dataLen;
353        int dataPos = 0;
354        while (remaining > 0) {
355          int n = Math.min(remaining, bytesPerChecksum);
356          
357          summer.reset();
358          summer.update(data, dataOff + dataPos, n);
359          dataPos += n;
360          remaining -= n;
361          
362          int calculated = (int)summer.getValue();
363          int stored = (checksums[checksumsOff] << 24 & 0xff000000) |
364            (checksums[checksumsOff + 1] << 16 & 0xff0000) |
365            (checksums[checksumsOff + 2] << 8 & 0xff00) |
366            checksums[checksumsOff + 3] & 0xff;
367          checksumsOff += 4;
368          if (calculated != stored) {
369            long errPos = basePos + dataPos - n;
370            throw new ChecksumException(
371                "Checksum error: "+ fileName + " at "+ errPos +
372                " exp: " + stored + " got: " + calculated, errPos);
373          }
374        }
375      }
376    
377      /**
378       * Calculate checksums for the given data.
379       * 
380       * The 'mark' of the ByteBuffer parameters may be modified by this function,
381       * but the position is maintained.
382       * 
383       * @param data the DirectByteBuffer pointing to the data to checksum.
384       * @param checksums the DirectByteBuffer into which checksums will be
385       *                  stored. Enough space must be available in this
386       *                  buffer to put the checksums.
387       */
388      public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) {
389        if (type.size == 0) return;
390        
391        if (data.hasArray() && checksums.hasArray()) {
392          calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(),
393              checksums.array(), checksums.arrayOffset() + checksums.position());
394          return;
395        }
396    
397        if (NativeCrc32.isAvailable()) {
398          NativeCrc32.calculateChunkedSums(bytesPerChecksum, type.id,
399              checksums, data);
400          return;
401        }
402        
403        data.mark();
404        checksums.mark();
405        try {
406          byte[] buf = new byte[bytesPerChecksum];
407          while (data.remaining() > 0) {
408            int n = Math.min(data.remaining(), bytesPerChecksum);
409            data.get(buf, 0, n);
410            summer.reset();
411            summer.update(buf, 0, n);
412            checksums.putInt((int)summer.getValue());
413          }
414        } finally {
415          data.reset();
416          checksums.reset();
417        }
418      }
419    
420      /**
421       * Implementation of chunked calculation specifically on byte arrays. This
422       * is to avoid the copy when dealing with ByteBuffers that have array backing.
423       */
424      public void calculateChunkedSums(
425          byte[] data, int dataOffset, int dataLength,
426          byte[] sums, int sumsOffset) {
427        if (type.size == 0) return;
428    
429        if (NativeCrc32.isAvailable()) {
430          NativeCrc32.calculateChunkedSumsByteArray(bytesPerChecksum, type.id,
431              sums, sumsOffset, data, dataOffset, dataLength);
432          return;
433        }
434    
435        int remaining = dataLength;
436        while (remaining > 0) {
437          int n = Math.min(remaining, bytesPerChecksum);
438          summer.reset();
439          summer.update(data, dataOffset, n);
440          dataOffset += n;
441          remaining -= n;
442          long calculated = summer.getValue();
443          sums[sumsOffset++] = (byte) (calculated >> 24);
444          sums[sumsOffset++] = (byte) (calculated >> 16);
445          sums[sumsOffset++] = (byte) (calculated >> 8);
446          sums[sumsOffset++] = (byte) (calculated);
447        }
448      }
449    
450      @Override
451      public boolean equals(Object other) {
452        if (!(other instanceof DataChecksum)) {
453          return false;
454        }
455        DataChecksum o = (DataChecksum)other;
456        return o.bytesPerChecksum == this.bytesPerChecksum &&
457          o.type == this.type;
458      }
459      
460      @Override
461      public int hashCode() {
462        return (this.type.id + 31) * this.bytesPerChecksum;
463      }
464      
465      @Override
466      public String toString() {
467        return "DataChecksum(type=" + type +
468          ", chunkSize=" + bytesPerChecksum + ")";
469      }
470      
471      /**
472       * This just provides a dummy implimentation for Checksum class
473       * This is used when there is no checksum available or required for 
474       * data
475       */
476      static class ChecksumNull implements Checksum {
477        
478        public ChecksumNull() {}
479        
480        //Dummy interface
481        @Override
482        public long getValue() { return 0; }
483        @Override
484        public void reset() {}
485        @Override
486        public void update(byte[] b, int off, int len) {}
487        @Override
488        public void update(int b) {}
489      };
490    }