001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.util;
020
021 import java.io.DataInputStream;
022 import java.io.DataOutputStream;
023 import java.io.IOException;
024 import java.nio.ByteBuffer;
025 import java.util.zip.CRC32;
026 import java.util.zip.Checksum;
027
028 import org.apache.hadoop.classification.InterfaceAudience;
029 import org.apache.hadoop.classification.InterfaceStability;
030 import org.apache.hadoop.fs.ChecksumException;
031
032 /**
033 * This class provides interface and utilities for processing checksums for
034 * DFS data transfers.
035 */
036 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
037 @InterfaceStability.Evolving
038 public class DataChecksum implements Checksum {
039
040 // checksum types
041 public static final int CHECKSUM_NULL = 0;
042 public static final int CHECKSUM_CRC32 = 1;
043 public static final int CHECKSUM_CRC32C = 2;
044 public static final int CHECKSUM_DEFAULT = 3;
045 public static final int CHECKSUM_MIXED = 4;
046
047 /** The checksum types */
048 public static enum Type {
049 NULL (CHECKSUM_NULL, 0),
050 CRC32 (CHECKSUM_CRC32, 4),
051 CRC32C(CHECKSUM_CRC32C, 4),
052 DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
053 MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
054
055 public final int id;
056 public final int size;
057
058 private Type(int id, int size) {
059 this.id = id;
060 this.size = size;
061 }
062
063 /** @return the type corresponding to the id. */
064 public static Type valueOf(int id) {
065 if (id < 0 || id >= values().length) {
066 throw new IllegalArgumentException("id=" + id
067 + " out of range [0, " + values().length + ")");
068 }
069 return values()[id];
070 }
071 }
072
073 /**
074 * Create a Crc32 Checksum object. The implementation of the Crc32 algorithm
075 * is chosen depending on the platform.
076 */
077 public static Checksum newCrc32() {
078 return Shell.isJava7OrAbove()? new CRC32(): new PureJavaCrc32();
079 }
080
081 public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) {
082 if ( bytesPerChecksum <= 0 ) {
083 return null;
084 }
085
086 switch ( type ) {
087 case NULL :
088 return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum );
089 case CRC32 :
090 return new DataChecksum(type, newCrc32(), bytesPerChecksum );
091 case CRC32C:
092 return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum);
093 default:
094 return null;
095 }
096 }
097
098 /**
099 * Creates a DataChecksum from HEADER_LEN bytes from arr[offset].
100 * @return DataChecksum of the type in the array or null in case of an error.
101 */
102 public static DataChecksum newDataChecksum( byte bytes[], int offset ) {
103 if (offset < 0 || bytes.length < offset + getChecksumHeaderSize()) {
104 return null;
105 }
106
107 // like readInt():
108 int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) |
109 ( (bytes[offset+2] & 0xff) << 16 ) |
110 ( (bytes[offset+3] & 0xff) << 8 ) |
111 ( (bytes[offset+4] & 0xff) );
112 return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum );
113 }
114
115 /**
116 * This constructs a DataChecksum by reading HEADER_LEN bytes from input
117 * stream <i>in</i>
118 */
119 public static DataChecksum newDataChecksum( DataInputStream in )
120 throws IOException {
121 int type = in.readByte();
122 int bpc = in.readInt();
123 DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc );
124 if ( summer == null ) {
125 throw new IOException( "Could not create DataChecksum of type " +
126 type + " with bytesPerChecksum " + bpc );
127 }
128 return summer;
129 }
130
131 /**
132 * Writes the checksum header to the output stream <i>out</i>.
133 */
134 public void writeHeader( DataOutputStream out )
135 throws IOException {
136 out.writeByte( type.id );
137 out.writeInt( bytesPerChecksum );
138 }
139
140 public byte[] getHeader() {
141 byte[] header = new byte[getChecksumHeaderSize()];
142 header[0] = (byte) (type.id & 0xff);
143 // Writing in buffer just like DataOutput.WriteInt()
144 header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff);
145 header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff);
146 header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff);
147 header[1+3] = (byte) (bytesPerChecksum & 0xff);
148 return header;
149 }
150
151 /**
152 * Writes the current checksum to the stream.
153 * If <i>reset</i> is true, then resets the checksum.
154 * @return number of bytes written. Will be equal to getChecksumSize();
155 */
156 public int writeValue( DataOutputStream out, boolean reset )
157 throws IOException {
158 if ( type.size <= 0 ) {
159 return 0;
160 }
161
162 if ( type.size == 4 ) {
163 out.writeInt( (int) summer.getValue() );
164 } else {
165 throw new IOException( "Unknown Checksum " + type );
166 }
167
168 if ( reset ) {
169 reset();
170 }
171
172 return type.size;
173 }
174
175 /**
176 * Writes the current checksum to a buffer.
177 * If <i>reset</i> is true, then resets the checksum.
178 * @return number of bytes written. Will be equal to getChecksumSize();
179 */
180 public int writeValue( byte[] buf, int offset, boolean reset )
181 throws IOException {
182 if ( type.size <= 0 ) {
183 return 0;
184 }
185
186 if ( type.size == 4 ) {
187 int checksum = (int) summer.getValue();
188 buf[offset+0] = (byte) ((checksum >>> 24) & 0xff);
189 buf[offset+1] = (byte) ((checksum >>> 16) & 0xff);
190 buf[offset+2] = (byte) ((checksum >>> 8) & 0xff);
191 buf[offset+3] = (byte) (checksum & 0xff);
192 } else {
193 throw new IOException( "Unknown Checksum " + type );
194 }
195
196 if ( reset ) {
197 reset();
198 }
199
200 return type.size;
201 }
202
203 /**
204 * Compares the checksum located at buf[offset] with the current checksum.
205 * @return true if the checksum matches and false otherwise.
206 */
207 public boolean compare( byte buf[], int offset ) {
208 if ( type.size == 4 ) {
209 int checksum = ( (buf[offset+0] & 0xff) << 24 ) |
210 ( (buf[offset+1] & 0xff) << 16 ) |
211 ( (buf[offset+2] & 0xff) << 8 ) |
212 ( (buf[offset+3] & 0xff) );
213 return checksum == (int) summer.getValue();
214 }
215 return type.size == 0;
216 }
217
218 private final Type type;
219 private final Checksum summer;
220 private final int bytesPerChecksum;
221 private int inSum = 0;
222
223 private DataChecksum( Type type, Checksum checksum, int chunkSize ) {
224 this.type = type;
225 summer = checksum;
226 bytesPerChecksum = chunkSize;
227 }
228
229 /** @return the checksum algorithm type. */
230 public Type getChecksumType() {
231 return type;
232 }
233 /** @return the size for a checksum. */
234 public int getChecksumSize() {
235 return type.size;
236 }
237 /** @return the required checksum size given the data length. */
238 public int getChecksumSize(int dataSize) {
239 return ((dataSize - 1)/getBytesPerChecksum() + 1) * getChecksumSize();
240 }
241 public int getBytesPerChecksum() {
242 return bytesPerChecksum;
243 }
244 public int getNumBytesInSum() {
245 return inSum;
246 }
247
248 public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;
249 static public int getChecksumHeaderSize() {
250 return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int
251 }
252 //Checksum Interface. Just a wrapper around member summer.
253 @Override
254 public long getValue() {
255 return summer.getValue();
256 }
257 @Override
258 public void reset() {
259 summer.reset();
260 inSum = 0;
261 }
262 @Override
263 public void update( byte[] b, int off, int len ) {
264 if ( len > 0 ) {
265 summer.update( b, off, len );
266 inSum += len;
267 }
268 }
269 @Override
270 public void update( int b ) {
271 summer.update( b );
272 inSum += 1;
273 }
274
275 /**
276 * Verify that the given checksums match the given data.
277 *
278 * The 'mark' of the ByteBuffer parameters may be modified by this function,.
279 * but the position is maintained.
280 *
281 * @param data the DirectByteBuffer pointing to the data to verify.
282 * @param checksums the DirectByteBuffer pointing to a series of stored
283 * checksums
284 * @param fileName the name of the file being read, for error-reporting
285 * @param basePos the file position to which the start of 'data' corresponds
286 * @throws ChecksumException if the checksums do not match
287 */
288 public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums,
289 String fileName, long basePos)
290 throws ChecksumException {
291 if (type.size == 0) return;
292
293 if (data.hasArray() && checksums.hasArray()) {
294 verifyChunkedSums(
295 data.array(), data.arrayOffset() + data.position(), data.remaining(),
296 checksums.array(), checksums.arrayOffset() + checksums.position(),
297 fileName, basePos);
298 return;
299 }
300 if (NativeCrc32.isAvailable()) {
301 NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data,
302 fileName, basePos);
303 return;
304 }
305
306 int startDataPos = data.position();
307 data.mark();
308 checksums.mark();
309 try {
310 byte[] buf = new byte[bytesPerChecksum];
311 byte[] sum = new byte[type.size];
312 while (data.remaining() > 0) {
313 int n = Math.min(data.remaining(), bytesPerChecksum);
314 checksums.get(sum);
315 data.get(buf, 0, n);
316 summer.reset();
317 summer.update(buf, 0, n);
318 int calculated = (int)summer.getValue();
319 int stored = (sum[0] << 24 & 0xff000000) |
320 (sum[1] << 16 & 0xff0000) |
321 (sum[2] << 8 & 0xff00) |
322 sum[3] & 0xff;
323 if (calculated != stored) {
324 long errPos = basePos + data.position() - startDataPos - n;
325 throw new ChecksumException(
326 "Checksum error: "+ fileName + " at "+ errPos +
327 " exp: " + stored + " got: " + calculated, errPos);
328 }
329 }
330 } finally {
331 data.reset();
332 checksums.reset();
333 }
334 }
335
336 /**
337 * Implementation of chunked verification specifically on byte arrays. This
338 * is to avoid the copy when dealing with ByteBuffers that have array backing.
339 */
340 private void verifyChunkedSums(
341 byte[] data, int dataOff, int dataLen,
342 byte[] checksums, int checksumsOff, String fileName,
343 long basePos) throws ChecksumException {
344 if (type.size == 0) return;
345
346 if (NativeCrc32.isAvailable()) {
347 NativeCrc32.verifyChunkedSumsByteArray(bytesPerChecksum, type.id,
348 checksums, checksumsOff, data, dataOff, dataLen, fileName, basePos);
349 return;
350 }
351
352 int remaining = dataLen;
353 int dataPos = 0;
354 while (remaining > 0) {
355 int n = Math.min(remaining, bytesPerChecksum);
356
357 summer.reset();
358 summer.update(data, dataOff + dataPos, n);
359 dataPos += n;
360 remaining -= n;
361
362 int calculated = (int)summer.getValue();
363 int stored = (checksums[checksumsOff] << 24 & 0xff000000) |
364 (checksums[checksumsOff + 1] << 16 & 0xff0000) |
365 (checksums[checksumsOff + 2] << 8 & 0xff00) |
366 checksums[checksumsOff + 3] & 0xff;
367 checksumsOff += 4;
368 if (calculated != stored) {
369 long errPos = basePos + dataPos - n;
370 throw new ChecksumException(
371 "Checksum error: "+ fileName + " at "+ errPos +
372 " exp: " + stored + " got: " + calculated, errPos);
373 }
374 }
375 }
376
377 /**
378 * Calculate checksums for the given data.
379 *
380 * The 'mark' of the ByteBuffer parameters may be modified by this function,
381 * but the position is maintained.
382 *
383 * @param data the DirectByteBuffer pointing to the data to checksum.
384 * @param checksums the DirectByteBuffer into which checksums will be
385 * stored. Enough space must be available in this
386 * buffer to put the checksums.
387 */
388 public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) {
389 if (type.size == 0) return;
390
391 if (data.hasArray() && checksums.hasArray()) {
392 calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(),
393 checksums.array(), checksums.arrayOffset() + checksums.position());
394 return;
395 }
396
397 if (NativeCrc32.isAvailable()) {
398 NativeCrc32.calculateChunkedSums(bytesPerChecksum, type.id,
399 checksums, data);
400 return;
401 }
402
403 data.mark();
404 checksums.mark();
405 try {
406 byte[] buf = new byte[bytesPerChecksum];
407 while (data.remaining() > 0) {
408 int n = Math.min(data.remaining(), bytesPerChecksum);
409 data.get(buf, 0, n);
410 summer.reset();
411 summer.update(buf, 0, n);
412 checksums.putInt((int)summer.getValue());
413 }
414 } finally {
415 data.reset();
416 checksums.reset();
417 }
418 }
419
420 /**
421 * Implementation of chunked calculation specifically on byte arrays. This
422 * is to avoid the copy when dealing with ByteBuffers that have array backing.
423 */
424 public void calculateChunkedSums(
425 byte[] data, int dataOffset, int dataLength,
426 byte[] sums, int sumsOffset) {
427 if (type.size == 0) return;
428
429 if (NativeCrc32.isAvailable()) {
430 NativeCrc32.calculateChunkedSumsByteArray(bytesPerChecksum, type.id,
431 sums, sumsOffset, data, dataOffset, dataLength);
432 return;
433 }
434
435 int remaining = dataLength;
436 while (remaining > 0) {
437 int n = Math.min(remaining, bytesPerChecksum);
438 summer.reset();
439 summer.update(data, dataOffset, n);
440 dataOffset += n;
441 remaining -= n;
442 long calculated = summer.getValue();
443 sums[sumsOffset++] = (byte) (calculated >> 24);
444 sums[sumsOffset++] = (byte) (calculated >> 16);
445 sums[sumsOffset++] = (byte) (calculated >> 8);
446 sums[sumsOffset++] = (byte) (calculated);
447 }
448 }
449
450 @Override
451 public boolean equals(Object other) {
452 if (!(other instanceof DataChecksum)) {
453 return false;
454 }
455 DataChecksum o = (DataChecksum)other;
456 return o.bytesPerChecksum == this.bytesPerChecksum &&
457 o.type == this.type;
458 }
459
460 @Override
461 public int hashCode() {
462 return (this.type.id + 31) * this.bytesPerChecksum;
463 }
464
465 @Override
466 public String toString() {
467 return "DataChecksum(type=" + type +
468 ", chunkSize=" + bytesPerChecksum + ")";
469 }
470
471 /**
472 * This just provides a dummy implimentation for Checksum class
473 * This is used when there is no checksum available or required for
474 * data
475 */
476 static class ChecksumNull implements Checksum {
477
478 public ChecksumNull() {}
479
480 //Dummy interface
481 @Override
482 public long getValue() { return 0; }
483 @Override
484 public void reset() {}
485 @Override
486 public void update(byte[] b, int off, int len) {}
487 @Override
488 public void update(int b) {}
489 };
490 }