hadoop Utils 源码

  • 2022-10-20
  • 浏览 (348)

haddop Utils 代码

文件路径:/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Utils.java

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package org.apache.hadoop.io.file.tfile;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Comparator;
import java.util.List;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.io.Text;

/**
 * Supporting Utility classes used by TFile, and shared by users of TFile.
 */
@InterfaceAudience.Public
@InterfaceStability.Evolving
public final class Utils {

  /**
   * Prevent the instantiation of Utils.
   */
  private Utils() {
    // nothing
  }

  /**
   * Encoding an integer into a variable-length encoding format. Synonymous to
   * <code>Utils#writeVLong(out, n)</code>.
   * 
   * @param out
   *          output stream
   * @param n
   *          The integer to be encoded
   * @throws IOException raised on errors performing I/O.
   * @see Utils#writeVLong(DataOutput, long)
   */
  public static void writeVInt(DataOutput out, int n) throws IOException {
    writeVLong(out, n);
  }

  /**
   * Encoding a Long integer into a variable-length encoding format.
   * <ul>
   * <li>if n in [-32, 127): encode in one byte with the actual value.
   * Otherwise,
   * <li>if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52;
   * byte[1]=n&amp;0xff. Otherwise,
   * <li>if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 -
   * 88; byte[1]=(n&gt;&gt;8)&amp;0xff; byte[2]=n&amp;0xff. Otherwise,
   * <li>if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112;
   * byte[1] = (n&gt;&gt;16)&amp;0xff; byte[2] = (n&gt;&gt;8)&amp;0xff;
   * byte[3]=n&amp;0xff.
   * Otherwise:
   * <li>if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] =
   * (n&gt;&gt;24)&amp;0xff; byte[2]=(n&gt;&gt;16)&amp;0xff;
   * byte[3]=(n&gt;&gt;8)&amp;0xff; byte[4]=n&amp;0xff;
   * <li>if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] =
   * (n&gt;&gt;32)&amp;0xff; byte[2]=(n&gt;&gt;24)&amp;0xff;
   * byte[3]=(n&gt;&gt;16)&amp;0xff; byte[4]=(n&gt;&gt;8)&amp;0xff;
   * byte[5]=n&amp;0xff
   * <li>if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] =
   * (n&gt;&gt;40)&amp;0xff; byte[2]=(n&gt;&gt;32)&amp;0xff;
   * byte[3]=(n&gt;&gt;24)&amp;0xff; byte[4]=(n&gt;&gt;16)&amp;0xff;
   * byte[5]=(n&gt;&gt;8)&amp;0xff; byte[6]=n&amp;0xff;
   * <li>if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] =
   * (n&gt;&gt;48)&amp;0xff; byte[2] = (n&gt;&gt;40)&amp;0xff;
   * byte[3]=(n&gt;&gt;32)&amp;0xff; byte[4]=(n&gt;&gt;24)&amp;0xff; byte[5]=
   * (n&gt;&gt;16)&amp;0xff; byte[6]=(n&gt;&gt;8)&amp;0xff; byte[7]=n&amp;0xff;
   * <li>if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] =
   * (n&gt;&gt;54)&amp;0xff; byte[2] = (n&gt;&gt;48)&amp;0xff;
   * byte[3] = (n&gt;&gt;40)&amp;0xff; byte[4]=(n&gt;&gt;32)&amp;0xff;
   * byte[5]=(n&gt;&gt;24)&amp;0xff; byte[6]=(n&gt;&gt;16)&amp;0xff; byte[7]=
   * (n&gt;&gt;8)&amp;0xff; byte[8]=n&amp;0xff;
   * </ul>
   * 
   * @param out
   *          output stream
   * @param n
   *          the integer number
   * @throws IOException raised on errors performing I/O.
   */
  @SuppressWarnings("fallthrough")
  public static void writeVLong(DataOutput out, long n) throws IOException {
    if ((n < 128) && (n >= -32)) {
      out.writeByte((int) n);
      return;
    }

    long un = (n < 0) ? ~n : n;
    // how many bytes do we need to represent the number with sign bit?
    int len = (Long.SIZE - Long.numberOfLeadingZeros(un)) / 8 + 1;
    int firstByte = (int) (n >> ((len - 1) * 8));
    switch (len) {
      case 1:
        // fall it through to firstByte==-1, len=2.
        firstByte >>= 8;
      case 2:
        if ((firstByte < 20) && (firstByte >= -20)) {
          out.writeByte(firstByte - 52);
          out.writeByte((int) n);
          return;
        }
        // fall it through to firstByte==0/-1, len=3.
        firstByte >>= 8;
      case 3:
        if ((firstByte < 16) && (firstByte >= -16)) {
          out.writeByte(firstByte - 88);
          out.writeShort((int) n);
          return;
        }
        // fall it through to firstByte==0/-1, len=4.
        firstByte >>= 8;
      case 4:
        if ((firstByte < 8) && (firstByte >= -8)) {
          out.writeByte(firstByte - 112);
          out.writeShort(((int) n) >>> 8);
          out.writeByte((int) n);
          return;
        }
        out.writeByte(len - 129);
        out.writeInt((int) n);
        return;
      case 5:
        out.writeByte(len - 129);
        out.writeInt((int) (n >>> 8));
        out.writeByte((int) n);
        return;
      case 6:
        out.writeByte(len - 129);
        out.writeInt((int) (n >>> 16));
        out.writeShort((int) n);
        return;
      case 7:
        out.writeByte(len - 129);
        out.writeInt((int) (n >>> 24));
        out.writeShort((int) (n >>> 8));
        out.writeByte((int) n);
        return;
      case 8:
        out.writeByte(len - 129);
        out.writeLong(n);
        return;
      default:
        throw new RuntimeException("Internal error");
    }
  }

  /**
   * Decoding the variable-length integer. Synonymous to
   * <code>(int)Utils#readVLong(in)</code>.
   * 
   * @param in
   *          input stream
   * @return the decoded integer
   * @throws IOException raised on errors performing I/O.
   * 
   * @see Utils#readVLong(DataInput)
   */
  public static int readVInt(DataInput in) throws IOException {
    long ret = readVLong(in);
    if ((ret > Integer.MAX_VALUE) || (ret < Integer.MIN_VALUE)) {
      throw new RuntimeException(
          "Number too large to be represented as Integer");
    }
    return (int) ret;
  }

  /**
   * Decoding the variable-length integer. Suppose the value of the first byte
   * is FB, and the following bytes are NB[*].
   * <ul>
   * <li>if (FB &gt;= -32), return (long)FB;
   * <li>if (FB in [-72, -33]), return (FB+52)&lt;&lt;8 + NB[0]&amp;0xff;
   * <li>if (FB in [-104, -73]), return (FB+88)&lt;&lt;16 +
   * (NB[0]&amp;0xff)&lt;&lt;8 + NB[1]&amp;0xff;
   * <li>if (FB in [-120, -105]), return (FB+112)&lt;&lt;24 + (NB[0]&amp;0xff)
   * &lt;&lt;16 + (NB[1]&amp;0xff)&lt;&lt;8 + NB[2]&amp;0xff;
   * <li>if (FB in [-128, -121]), return interpret NB[FB+129] as a signed
   * big-endian integer.
   * </ul>
   * @param in
   *          input stream
   * @return the decoded long integer.
   * @throws IOException raised on errors performing I/O.
   */

  public static long readVLong(DataInput in) throws IOException {
    int firstByte = in.readByte();
    if (firstByte >= -32) {
      return firstByte;
    }

    switch ((firstByte + 128) / 8) {
      case 11:
      case 10:
      case 9:
      case 8:
      case 7:
        return ((firstByte + 52) << 8) | in.readUnsignedByte();
      case 6:
      case 5:
      case 4:
      case 3:
        return ((firstByte + 88) << 16) | in.readUnsignedShort();
      case 2:
      case 1:
        return ((firstByte + 112) << 24) | (in.readUnsignedShort() << 8)
            | in.readUnsignedByte();
      case 0:
        int len = firstByte + 129;
        switch (len) {
          case 4:
            return in.readInt();
          case 5:
            return ((long) in.readInt()) << 8 | in.readUnsignedByte();
          case 6:
            return ((long) in.readInt()) << 16 | in.readUnsignedShort();
          case 7:
            return ((long) in.readInt()) << 24 | (in.readUnsignedShort() << 8)
                | in.readUnsignedByte();
          case 8:
            return in.readLong();
          default:
            throw new IOException("Corrupted VLong encoding");
        }
      default:
        throw new RuntimeException("Internal error");
    }
  }

  /**
   * Write a String as a VInt n, followed by n Bytes as in Text format.
   * 
   * @param out out.
   * @param s s.
   * @throws IOException raised on errors performing I/O.
   */
  public static void writeString(DataOutput out, String s) throws IOException {
    if (s != null) {
      Text text = new Text(s);
      byte[] buffer = text.getBytes();
      int len = text.getLength();
      writeVInt(out, len);
      out.write(buffer, 0, len);
    } else {
      writeVInt(out, -1);
    }
  }

  /**
   * Read a String as a VInt n, followed by n Bytes in Text format.
   * 
   * @param in
   *          The input stream.
   * @return The string
   * @throws IOException raised on errors performing I/O.
   */
  public static String readString(DataInput in) throws IOException {
    int length = readVInt(in);
    if (length == -1) return null;
    byte[] buffer = new byte[length];
    in.readFully(buffer);
    return Text.decode(buffer);
  }

  /**
   * A generic Version class. We suggest applications built on top of TFile use
   * this class to maintain version information in their meta blocks.
   * 
   * A version number consists of a major version and a minor version. The
   * suggested usage of major and minor version number is to increment major
   * version number when the new storage format is not backward compatible, and
   * increment the minor version otherwise.
   */
  public static final class Version implements Comparable<Version> {
    private final short major;
    private final short minor;

    /**
     * Construct the Version object by reading from the input stream.
     * 
     * @param in
     *          input stream
     * @throws IOException raised on errors performing I/O.
     */
    public Version(DataInput in) throws IOException {
      major = in.readShort();
      minor = in.readShort();
    }

    /**
     * Constructor.
     * 
     * @param major
     *          major version.
     * @param minor
     *          minor version.
     */
    public Version(short major, short minor) {
      this.major = major;
      this.minor = minor;
    }

    /**
     * Write the objec to a DataOutput. The serialized format of the Version is
     * major version followed by minor version, both as big-endian short
     * integers.
     * 
     * @param out
     *          The DataOutput object.
     * @throws IOException raised on errors performing I/O.
     */
    public void write(DataOutput out) throws IOException {
      out.writeShort(major);
      out.writeShort(minor);
    }

    /**
     * Get the major version.
     * 
     * @return Major version.
     */
    public int getMajor() {
      return major;
    }

    /**
     * Get the minor version.
     * 
     * @return The minor version.
     */
    public int getMinor() {
      return minor;
    }

    /**
     * Get the size of the serialized Version object.
     * 
     * @return serialized size of the version object.
     */
    public static int size() {
      return (Short.SIZE + Short.SIZE) / Byte.SIZE;
    }

    /**
     * Return a string representation of the version.
     */
    @Override
    public String toString() {
      return new StringBuilder("v").append(major).append(".").append(minor)
          .toString();
    }

    /**
     * Test compatibility.
     * 
     * @param other
     *          The Version object to test compatibility with.
     * @return true if both versions have the same major version number; false
     *         otherwise.
     */
    public boolean compatibleWith(Version other) {
      return major == other.major;
    }

    /**
     * Compare this version with another version.
     */
    @Override
    public int compareTo(Version that) {
      if (major != that.major) {
        return major - that.major;
      }
      return minor - that.minor;
    }

    @Override
    public boolean equals(Object other) {
      if (this == other) return true;
      if (!(other instanceof Version)) return false;
      return compareTo((Version) other) == 0;
    }

    @Override
    public int hashCode() {
      return (major << 16) + minor;
    }
  }

  /**
   * Lower bound binary search. Find the index to the first element in the list
   * that compares greater than or equal to key.
   * 
   * @param <T>
   *          Type of the input key.
   * @param list
   *          The list
   * @param key
   *          The input key.
   * @param cmp
   *          Comparator for the key.
   * @return The index to the desired element if it exists; or list.size()
   *         otherwise.
   */
  public static <T> int lowerBound(List<? extends T> list, T key,
      Comparator<? super T> cmp) {
    int low = 0;
    int high = list.size();

    while (low < high) {
      int mid = (low + high) >>> 1;
      T midVal = list.get(mid);
      int ret = cmp.compare(midVal, key);
      if (ret < 0)
        low = mid + 1;
      else high = mid;
    }
    return low;
  }

  /**
   * Upper bound binary search. Find the index to the first element in the list
   * that compares greater than the input key.
   * 
   * @param <T>
   *          Type of the input key.
   * @param list
   *          The list
   * @param key
   *          The input key.
   * @param cmp
   *          Comparator for the key.
   * @return The index to the desired element if it exists; or list.size()
   *         otherwise.
   */
  public static <T> int upperBound(List<? extends T> list, T key,
      Comparator<? super T> cmp) {
    int low = 0;
    int high = list.size();

    while (low < high) {
      int mid = (low + high) >>> 1;
      T midVal = list.get(mid);
      int ret = cmp.compare(midVal, key);
      if (ret <= 0)
        low = mid + 1;
      else high = mid;
    }
    return low;
  }

  /**
   * Lower bound binary search. Find the index to the first element in the list
   * that compares greater than or equal to key.
   * 
   * @param <T>
   *          Type of the input key.
   * @param list
   *          The list
   * @param key
   *          The input key.
   * @return The index to the desired element if it exists; or list.size()
   *         otherwise.
   */
  public static <T> int lowerBound(List<? extends Comparable<? super T>> list,
      T key) {
    int low = 0;
    int high = list.size();

    while (low < high) {
      int mid = (low + high) >>> 1;
      Comparable<? super T> midVal = list.get(mid);
      int ret = midVal.compareTo(key);
      if (ret < 0)
        low = mid + 1;
      else high = mid;
    }
    return low;
  }

  /**
   * Upper bound binary search. Find the index to the first element in the list
   * that compares greater than the input key.
   * 
   * @param <T>
   *          Type of the input key.
   * @param list
   *          The list
   * @param key
   *          The input key.
   * @return The index to the desired element if it exists; or list.size()
   *         otherwise.
   */
  public static <T> int upperBound(List<? extends Comparable<? super T>> list,
      T key) {
    int low = 0;
    int high = list.size();

    while (low < high) {
      int mid = (low + high) >>> 1;
      Comparable<? super T> midVal = list.get(mid);
      int ret = midVal.compareTo(key);
      if (ret <= 0)
        low = mid + 1;
      else high = mid;
    }
    return low;
  }
}

相关信息

hadoop 源码目录

相关文章

hadoop BCFile 源码

hadoop BoundedRangeFileInputStream 源码

hadoop ByteArray 源码

hadoop Chunk 源码

hadoop CompareUtils 源码

hadoop Compression 源码

hadoop MetaBlockAlreadyExists 源码

hadoop MetaBlockDoesNotExist 源码

hadoop RawComparable 源码

hadoop SimpleBufferedOutputStream 源码

0  赞