001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.util;
020
021 import java.io.DataInputStream;
022 import java.io.DataOutputStream;
023 import java.io.IOException;
024 import java.nio.ByteBuffer;
025 import java.util.zip.Checksum;
026
027 import org.apache.hadoop.classification.InterfaceAudience;
028 import org.apache.hadoop.classification.InterfaceStability;
029 import org.apache.hadoop.fs.ChecksumException;
030
031 /**
032 * This class provides inteface and utilities for processing checksums for
033 * DFS data transfers.
034 */
035 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
036 @InterfaceStability.Evolving
037 public class DataChecksum implements Checksum {
038
039 // Misc constants
040 public static final int HEADER_LEN = 5; /// 1 byte type and 4 byte len
041
042 // checksum types
043 public static final int CHECKSUM_NULL = 0;
044 public static final int CHECKSUM_CRC32 = 1;
045 public static final int CHECKSUM_CRC32C = 2;
046 public static final int CHECKSUM_DEFAULT = 3;
047 public static final int CHECKSUM_MIXED = 4;
048
049 /** The checksum types */
050 public static enum Type {
051 NULL (CHECKSUM_NULL, 0),
052 CRC32 (CHECKSUM_CRC32, 4),
053 CRC32C(CHECKSUM_CRC32C, 4),
054 DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
055 MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
056
057 public final int id;
058 public final int size;
059
060 private Type(int id, int size) {
061 this.id = id;
062 this.size = size;
063 }
064
065 /** @return the type corresponding to the id. */
066 public static Type valueOf(int id) {
067 if (id < 0 || id >= values().length) {
068 throw new IllegalArgumentException("id=" + id
069 + " out of range [0, " + values().length + ")");
070 }
071 return values()[id];
072 }
073 }
074
075
076 public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) {
077 if ( bytesPerChecksum <= 0 ) {
078 return null;
079 }
080
081 switch ( type ) {
082 case NULL :
083 return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum );
084 case CRC32 :
085 return new DataChecksum(type, new PureJavaCrc32(), bytesPerChecksum );
086 case CRC32C:
087 return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum);
088 default:
089 return null;
090 }
091 }
092
093 /**
094 * Creates a DataChecksum from HEADER_LEN bytes from arr[offset].
095 * @return DataChecksum of the type in the array or null in case of an error.
096 */
097 public static DataChecksum newDataChecksum( byte bytes[], int offset ) {
098 if ( offset < 0 || bytes.length < offset + HEADER_LEN ) {
099 return null;
100 }
101
102 // like readInt():
103 int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) |
104 ( (bytes[offset+2] & 0xff) << 16 ) |
105 ( (bytes[offset+3] & 0xff) << 8 ) |
106 ( (bytes[offset+4] & 0xff) );
107 return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum );
108 }
109
110 /**
111 * This constructucts a DataChecksum by reading HEADER_LEN bytes from
112 * input stream <i>in</i>
113 */
114 public static DataChecksum newDataChecksum( DataInputStream in )
115 throws IOException {
116 int type = in.readByte();
117 int bpc = in.readInt();
118 DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc );
119 if ( summer == null ) {
120 throw new IOException( "Could not create DataChecksum of type " +
121 type + " with bytesPerChecksum " + bpc );
122 }
123 return summer;
124 }
125
126 /**
127 * Writes the checksum header to the output stream <i>out</i>.
128 */
129 public void writeHeader( DataOutputStream out )
130 throws IOException {
131 out.writeByte( type.id );
132 out.writeInt( bytesPerChecksum );
133 }
134
135 public byte[] getHeader() {
136 byte[] header = new byte[DataChecksum.HEADER_LEN];
137 header[0] = (byte) (type.id & 0xff);
138 // Writing in buffer just like DataOutput.WriteInt()
139 header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff);
140 header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff);
141 header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff);
142 header[1+3] = (byte) (bytesPerChecksum & 0xff);
143 return header;
144 }
145
146 /**
147 * Writes the current checksum to the stream.
148 * If <i>reset</i> is true, then resets the checksum.
149 * @return number of bytes written. Will be equal to getChecksumSize();
150 */
151 public int writeValue( DataOutputStream out, boolean reset )
152 throws IOException {
153 if ( type.size <= 0 ) {
154 return 0;
155 }
156
157 if ( type.size == 4 ) {
158 out.writeInt( (int) summer.getValue() );
159 } else {
160 throw new IOException( "Unknown Checksum " + type );
161 }
162
163 if ( reset ) {
164 reset();
165 }
166
167 return type.size;
168 }
169
170 /**
171 * Writes the current checksum to a buffer.
172 * If <i>reset</i> is true, then resets the checksum.
173 * @return number of bytes written. Will be equal to getChecksumSize();
174 */
175 public int writeValue( byte[] buf, int offset, boolean reset )
176 throws IOException {
177 if ( type.size <= 0 ) {
178 return 0;
179 }
180
181 if ( type.size == 4 ) {
182 int checksum = (int) summer.getValue();
183 buf[offset+0] = (byte) ((checksum >>> 24) & 0xff);
184 buf[offset+1] = (byte) ((checksum >>> 16) & 0xff);
185 buf[offset+2] = (byte) ((checksum >>> 8) & 0xff);
186 buf[offset+3] = (byte) (checksum & 0xff);
187 } else {
188 throw new IOException( "Unknown Checksum " + type );
189 }
190
191 if ( reset ) {
192 reset();
193 }
194
195 return type.size;
196 }
197
198 /**
199 * Compares the checksum located at buf[offset] with the current checksum.
200 * @return true if the checksum matches and false otherwise.
201 */
202 public boolean compare( byte buf[], int offset ) {
203 if ( type.size == 4 ) {
204 int checksum = ( (buf[offset+0] & 0xff) << 24 ) |
205 ( (buf[offset+1] & 0xff) << 16 ) |
206 ( (buf[offset+2] & 0xff) << 8 ) |
207 ( (buf[offset+3] & 0xff) );
208 return checksum == (int) summer.getValue();
209 }
210 return type.size == 0;
211 }
212
213 private final Type type;
214 private final Checksum summer;
215 private final int bytesPerChecksum;
216 private int inSum = 0;
217
218 private DataChecksum( Type type, Checksum checksum, int chunkSize ) {
219 this.type = type;
220 summer = checksum;
221 bytesPerChecksum = chunkSize;
222 }
223
224 // Accessors
225 public Type getChecksumType() {
226 return type;
227 }
228 public int getChecksumSize() {
229 return type.size;
230 }
231 public int getBytesPerChecksum() {
232 return bytesPerChecksum;
233 }
234 public int getNumBytesInSum() {
235 return inSum;
236 }
237
238 public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;
239 static public int getChecksumHeaderSize() {
240 return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int
241 }
242 //Checksum Interface. Just a wrapper around member summer.
243 @Override
244 public long getValue() {
245 return summer.getValue();
246 }
247 @Override
248 public void reset() {
249 summer.reset();
250 inSum = 0;
251 }
252 @Override
253 public void update( byte[] b, int off, int len ) {
254 if ( len > 0 ) {
255 summer.update( b, off, len );
256 inSum += len;
257 }
258 }
259 @Override
260 public void update( int b ) {
261 summer.update( b );
262 inSum += 1;
263 }
264
265 /**
266 * Verify that the given checksums match the given data.
267 *
268 * The 'mark' of the ByteBuffer parameters may be modified by this function,.
269 * but the position is maintained.
270 *
271 * @param data the DirectByteBuffer pointing to the data to verify.
272 * @param checksums the DirectByteBuffer pointing to a series of stored
273 * checksums
274 * @param fileName the name of the file being read, for error-reporting
275 * @param basePos the file position to which the start of 'data' corresponds
276 * @throws ChecksumException if the checksums do not match
277 */
278 public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums,
279 String fileName, long basePos)
280 throws ChecksumException {
281 if (type.size == 0) return;
282
283 if (data.hasArray() && checksums.hasArray()) {
284 verifyChunkedSums(
285 data.array(), data.arrayOffset() + data.position(), data.remaining(),
286 checksums.array(), checksums.arrayOffset() + checksums.position(),
287 fileName, basePos);
288 return;
289 }
290 if (NativeCrc32.isAvailable()) {
291 NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data,
292 fileName, basePos);
293 return;
294 }
295
296 int startDataPos = data.position();
297 data.mark();
298 checksums.mark();
299 try {
300 byte[] buf = new byte[bytesPerChecksum];
301 byte[] sum = new byte[type.size];
302 while (data.remaining() > 0) {
303 int n = Math.min(data.remaining(), bytesPerChecksum);
304 checksums.get(sum);
305 data.get(buf, 0, n);
306 summer.reset();
307 summer.update(buf, 0, n);
308 int calculated = (int)summer.getValue();
309 int stored = (sum[0] << 24 & 0xff000000) |
310 (sum[1] << 16 & 0xff0000) |
311 (sum[2] << 8 & 0xff00) |
312 sum[3] & 0xff;
313 if (calculated != stored) {
314 long errPos = basePos + data.position() - startDataPos - n;
315 throw new ChecksumException(
316 "Checksum error: "+ fileName + " at "+ errPos +
317 " exp: " + stored + " got: " + calculated, errPos);
318 }
319 }
320 } finally {
321 data.reset();
322 checksums.reset();
323 }
324 }
325
326 /**
327 * Implementation of chunked verification specifically on byte arrays. This
328 * is to avoid the copy when dealing with ByteBuffers that have array backing.
329 */
330 private void verifyChunkedSums(
331 byte[] data, int dataOff, int dataLen,
332 byte[] checksums, int checksumsOff, String fileName,
333 long basePos) throws ChecksumException {
334
335 int remaining = dataLen;
336 int dataPos = 0;
337 while (remaining > 0) {
338 int n = Math.min(remaining, bytesPerChecksum);
339
340 summer.reset();
341 summer.update(data, dataOff + dataPos, n);
342 dataPos += n;
343 remaining -= n;
344
345 int calculated = (int)summer.getValue();
346 int stored = (checksums[checksumsOff] << 24 & 0xff000000) |
347 (checksums[checksumsOff + 1] << 16 & 0xff0000) |
348 (checksums[checksumsOff + 2] << 8 & 0xff00) |
349 checksums[checksumsOff + 3] & 0xff;
350 checksumsOff += 4;
351 if (calculated != stored) {
352 long errPos = basePos + dataPos - n;
353 throw new ChecksumException(
354 "Checksum error: "+ fileName + " at "+ errPos +
355 " exp: " + stored + " got: " + calculated, errPos);
356 }
357 }
358 }
359
360 /**
361 * Calculate checksums for the given data.
362 *
363 * The 'mark' of the ByteBuffer parameters may be modified by this function,
364 * but the position is maintained.
365 *
366 * @param data the DirectByteBuffer pointing to the data to checksum.
367 * @param checksums the DirectByteBuffer into which checksums will be
368 * stored. Enough space must be available in this
369 * buffer to put the checksums.
370 */
371 public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) {
372 if (type.size == 0) return;
373
374 if (data.hasArray() && checksums.hasArray()) {
375 calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(),
376 checksums.array(), checksums.arrayOffset() + checksums.position());
377 return;
378 }
379
380 data.mark();
381 checksums.mark();
382 try {
383 byte[] buf = new byte[bytesPerChecksum];
384 while (data.remaining() > 0) {
385 int n = Math.min(data.remaining(), bytesPerChecksum);
386 data.get(buf, 0, n);
387 summer.reset();
388 summer.update(buf, 0, n);
389 checksums.putInt((int)summer.getValue());
390 }
391 } finally {
392 data.reset();
393 checksums.reset();
394 }
395 }
396
397 /**
398 * Implementation of chunked calculation specifically on byte arrays. This
399 * is to avoid the copy when dealing with ByteBuffers that have array backing.
400 */
401 private void calculateChunkedSums(
402 byte[] data, int dataOffset, int dataLength,
403 byte[] sums, int sumsOffset) {
404
405 int remaining = dataLength;
406 while (remaining > 0) {
407 int n = Math.min(remaining, bytesPerChecksum);
408 summer.reset();
409 summer.update(data, dataOffset, n);
410 dataOffset += n;
411 remaining -= n;
412 long calculated = summer.getValue();
413 sums[sumsOffset++] = (byte) (calculated >> 24);
414 sums[sumsOffset++] = (byte) (calculated >> 16);
415 sums[sumsOffset++] = (byte) (calculated >> 8);
416 sums[sumsOffset++] = (byte) (calculated);
417 }
418 }
419
420 @Override
421 public boolean equals(Object other) {
422 if (!(other instanceof DataChecksum)) {
423 return false;
424 }
425 DataChecksum o = (DataChecksum)other;
426 return o.bytesPerChecksum == this.bytesPerChecksum &&
427 o.type == this.type;
428 }
429
430 @Override
431 public int hashCode() {
432 return (this.type.id + 31) * this.bytesPerChecksum;
433 }
434
435 @Override
436 public String toString() {
437 return "DataChecksum(type=" + type +
438 ", chunkSize=" + bytesPerChecksum + ")";
439 }
440
441 /**
442 * This just provides a dummy implimentation for Checksum class
443 * This is used when there is no checksum available or required for
444 * data
445 */
446 static class ChecksumNull implements Checksum {
447
448 public ChecksumNull() {}
449
450 //Dummy interface
451 @Override
452 public long getValue() { return 0; }
453 @Override
454 public void reset() {}
455 @Override
456 public void update(byte[] b, int off, int len) {}
457 @Override
458 public void update(int b) {}
459 };
460 }