hadoop TFileDumper 源码

2022-10-20
浏览 (373)

haddop TFileDumper 代码

文件路径：/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/TFileDumper.java

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.apache.hadoop.io.file.tfile;

import java.io.IOException;
import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.file.tfile.BCFile.BlockRegion;
import org.apache.hadoop.io.file.tfile.BCFile.MetaIndexEntry;
import org.apache.hadoop.io.file.tfile.TFile.TFileIndexEntry;
import org.apache.hadoop.io.file.tfile.Utils.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Dumping the information of a TFile.
 */
class TFileDumper {
  static final Logger LOG = LoggerFactory.getLogger(TFileDumper.class);

  private TFileDumper() {
    // namespace object not constructable.
  }

  private enum Align {
    LEFT, CENTER, RIGHT, ZERO_PADDED;
    static String format(String s, int width, Align align) {
      if (s.length() >= width) return s;
      int room = width - s.length();
      Align alignAdjusted = align;
      if (room == 1) {
        alignAdjusted = LEFT;
      }
      if (alignAdjusted == LEFT) {
        return s + String.format("%" + room + "s", "");
      }
      if (alignAdjusted == RIGHT) {
        return String.format("%" + room + "s", "") + s;
      }
      if (alignAdjusted == CENTER) {
        int half = room / 2;
        return String.format("%" + half + "s", "") + s
            + String.format("%" + (room - half) + "s", "");
      }
      throw new IllegalArgumentException("Unsupported alignment");
    }

    static String format(long l, int width, Align align) {
      if (align == ZERO_PADDED) {
        return String.format("%0" + width + "d", l);
      }
      return format(Long.toString(l), width, align);
    }

    static int calculateWidth(String caption, long max) {
      return Math.max(caption.length(), Long.toString(max).length());
    }
  }

  /**
   * Dump information about TFile.
   * 
   * @param file
   *          Path string of the TFile
   * @param out
   *          PrintStream to output the information.
   * @param conf
   *          The configuration object.
   * @throws IOException
   */
  static public void dumpInfo(String file, PrintStream out, Configuration conf)
      throws IOException {
    final int maxKeySampleLen = 16;
    Path path = new Path(file);
    FileSystem fs = path.getFileSystem(conf);
    long length = fs.getFileStatus(path).getLen();
    FSDataInputStream fsdis = fs.open(path);
    TFile.Reader reader = new TFile.Reader(fsdis, length, conf);
    try {
      LinkedHashMap<String, String> properties =
          new LinkedHashMap<String, String>();
      int blockCnt = reader.readerBCF.getBlockCount();
      int metaBlkCnt = reader.readerBCF.metaIndex.index.size();
      properties.put("BCFile Version", reader.readerBCF.version.toString());
      properties.put("TFile Version", reader.tfileMeta.version.toString());
      properties.put("File Length", Long.toString(length));
      properties.put("Data Compression", reader.readerBCF
          .getDefaultCompressionName());
      properties.put("Record Count", Long.toString(reader.getEntryCount()));
      properties.put("Sorted", Boolean.toString(reader.isSorted()));
      if (reader.isSorted()) {
        properties.put("Comparator", reader.getComparatorName());
      }
      properties.put("Data Block Count", Integer.toString(blockCnt));
      long dataSize = 0, dataSizeUncompressed = 0;
      if (blockCnt > 0) {
        for (int i = 0; i < blockCnt; ++i) {
          BlockRegion region =
              reader.readerBCF.dataIndex.getBlockRegionList().get(i);
          dataSize += region.getCompressedSize();
          dataSizeUncompressed += region.getRawSize();
        }
        properties.put("Data Block Bytes", Long.toString(dataSize));
        if (!reader.readerBCF.getDefaultCompressionName().equals("none")) {
          properties.put("Data Block Uncompressed Bytes", Long
              .toString(dataSizeUncompressed));
          properties.put("Data Block Compression Ratio", String.format(
              "1:%.1f", (double) dataSizeUncompressed / dataSize));
        }
      }

      properties.put("Meta Block Count", Integer.toString(metaBlkCnt));
      long metaSize = 0, metaSizeUncompressed = 0;
      if (metaBlkCnt > 0) {
        Collection<MetaIndexEntry> metaBlks =
            reader.readerBCF.metaIndex.index.values();
        boolean calculateCompression = false;
        for (Iterator<MetaIndexEntry> it = metaBlks.iterator(); it.hasNext();) {
          MetaIndexEntry e = it.next();
          metaSize += e.getRegion().getCompressedSize();
          metaSizeUncompressed += e.getRegion().getRawSize();
          if (e.getCompressionAlgorithm() != Compression.Algorithm.NONE) {
            calculateCompression = true;
          }
        }
        properties.put("Meta Block Bytes", Long.toString(metaSize));
        if (calculateCompression) {
          properties.put("Meta Block Uncompressed Bytes", Long
              .toString(metaSizeUncompressed));
          properties.put("Meta Block Compression Ratio", String.format(
              "1:%.1f", (double) metaSizeUncompressed / metaSize));
        }
      }
      properties.put("Meta-Data Size Ratio", String.format("1:%.1f",
          (double) dataSize / metaSize));
      long leftOverBytes = length - dataSize - metaSize;
      long miscSize =
          BCFile.Magic.size() * 2 + Long.SIZE / Byte.SIZE + Version.size();
      long metaIndexSize = leftOverBytes - miscSize;
      properties.put("Meta Block Index Bytes", Long.toString(metaIndexSize));
      properties.put("Headers Etc Bytes", Long.toString(miscSize));
      // Now output the properties table.
      int maxKeyLength = 0;
      Set<Map.Entry<String, String>> entrySet = properties.entrySet();
      for (Iterator<Map.Entry<String, String>> it = entrySet.iterator(); it
          .hasNext();) {
        Map.Entry<String, String> e = it.next();
        if (e.getKey().length() > maxKeyLength) {
          maxKeyLength = e.getKey().length();
        }
      }
      for (Iterator<Map.Entry<String, String>> it = entrySet.iterator(); it
          .hasNext();) {
        Map.Entry<String, String> e = it.next();
        out.printf("%s : %s%n", Align.format(e.getKey(), maxKeyLength,
            Align.LEFT), e.getValue());
      }
      out.println();
      reader.checkTFileDataIndex();
      if (blockCnt > 0) {
        String blkID = "Data-Block";
        int blkIDWidth = Align.calculateWidth(blkID, blockCnt);
        int blkIDWidth2 = Align.calculateWidth("", blockCnt);
        String offset = "Offset";
        int offsetWidth = Align.calculateWidth(offset, length);
        String blkLen = "Length";
        int blkLenWidth =
            Align.calculateWidth(blkLen, dataSize / blockCnt * 10);
        String rawSize = "Raw-Size";
        int rawSizeWidth =
            Align.calculateWidth(rawSize, dataSizeUncompressed / blockCnt * 10);
        String records = "Records";
        int recordsWidth =
            Align.calculateWidth(records, reader.getEntryCount() / blockCnt
                * 10);
        String endKey = "End-Key";
        int endKeyWidth = Math.max(endKey.length(), maxKeySampleLen * 2 + 5);

        out.printf("%s %s %s %s %s %s%n", Align.format(blkID, blkIDWidth,
            Align.CENTER), Align.format(offset, offsetWidth, Align.CENTER),
            Align.format(blkLen, blkLenWidth, Align.CENTER), Align.format(
                rawSize, rawSizeWidth, Align.CENTER), Align.format(records,
                recordsWidth, Align.CENTER), Align.format(endKey, endKeyWidth,
                Align.LEFT));

        for (int i = 0; i < blockCnt; ++i) {
          BlockRegion region =
              reader.readerBCF.dataIndex.getBlockRegionList().get(i);
          TFileIndexEntry indexEntry = reader.tfileIndex.getEntry(i);
          out.printf("%s %s %s %s %s ", Align.format(Align.format(i,
              blkIDWidth2, Align.ZERO_PADDED), blkIDWidth, Align.LEFT), Align
              .format(region.getOffset(), offsetWidth, Align.LEFT), Align
              .format(region.getCompressedSize(), blkLenWidth, Align.LEFT),
              Align.format(region.getRawSize(), rawSizeWidth, Align.LEFT),
              Align.format(indexEntry.kvEntries, recordsWidth, Align.LEFT));
          byte[] key = indexEntry.key;
          boolean asAscii = true;
          int sampleLen = Math.min(maxKeySampleLen, key.length);
          for (int j = 0; j < sampleLen; ++j) {
            byte b = key[j];
            if ((b < 32 && b != 9) || (b == 127)) {
              asAscii = false;
            }
          }
          if (!asAscii) {
            out.print("0X");
            for (int j = 0; j < sampleLen; ++j) {
              byte b = key[i];
              out.printf("%X", b);
            }
          } else {
            out.print(new String(key, 0, sampleLen, StandardCharsets.UTF_8));
          }
          if (sampleLen < key.length) {
            out.print("...");
          }
          out.println();
        }
      }

      out.println();
      if (metaBlkCnt > 0) {
        String name = "Meta-Block";
        int maxNameLen = 0;
        Set<Map.Entry<String, MetaIndexEntry>> metaBlkEntrySet =
            reader.readerBCF.metaIndex.index.entrySet();
        for (Iterator<Map.Entry<String, MetaIndexEntry>> it =
            metaBlkEntrySet.iterator(); it.hasNext();) {
          Map.Entry<String, MetaIndexEntry> e = it.next();
          if (e.getKey().length() > maxNameLen) {
            maxNameLen = e.getKey().length();
          }
        }
        int nameWidth = Math.max(name.length(), maxNameLen);
        String offset = "Offset";
        int offsetWidth = Align.calculateWidth(offset, length);
        String blkLen = "Length";
        int blkLenWidth =
            Align.calculateWidth(blkLen, metaSize / metaBlkCnt * 10);
        String rawSize = "Raw-Size";
        int rawSizeWidth =
            Align.calculateWidth(rawSize, metaSizeUncompressed / metaBlkCnt
                * 10);
        String compression = "Compression";
        int compressionWidth = compression.length();
        out.printf("%s %s %s %s %s%n", Align.format(name, nameWidth,
            Align.CENTER), Align.format(offset, offsetWidth, Align.CENTER),
            Align.format(blkLen, blkLenWidth, Align.CENTER), Align.format(
                rawSize, rawSizeWidth, Align.CENTER), Align.format(compression,
                compressionWidth, Align.LEFT));

        for (Iterator<Map.Entry<String, MetaIndexEntry>> it =
            metaBlkEntrySet.iterator(); it.hasNext();) {
          Map.Entry<String, MetaIndexEntry> e = it.next();
          String blkName = e.getValue().getMetaName();
          BlockRegion region = e.getValue().getRegion();
          String blkCompression =
              e.getValue().getCompressionAlgorithm().getName();
          out.printf("%s %s %s %s %s%n", Align.format(blkName, nameWidth,
              Align.LEFT), Align.format(region.getOffset(), offsetWidth,
              Align.LEFT), Align.format(region.getCompressedSize(),
              blkLenWidth, Align.LEFT), Align.format(region.getRawSize(),
              rawSizeWidth, Align.LEFT), Align.format(blkCompression,
              compressionWidth, Align.LEFT));
        }
      }
    } finally {
      IOUtils.cleanupWithLogger(LOG, reader, fsdis);
    }
  }
}

相关信息

hadoop 源码目录

相关文章

hadoop BCFile 源码

hadoop BoundedRangeFileInputStream 源码

hadoop ByteArray 源码

hadoop Chunk 源码

hadoop CompareUtils 源码

hadoop Compression 源码

hadoop MetaBlockAlreadyExists 源码

hadoop MetaBlockDoesNotExist 源码

hadoop RawComparable 源码

hadoop SimpleBufferedOutputStream 源码

0 赞

所属分类： 大数据
本文标签： 大数据
版权声明： 原创文章如转载，请注明本文链接: https://www.seaxiang.com/blog/f66dd5479bc04d849aced7fcbf42376e

热门推荐

1、直接访问google.com
2、 - 优质文章
3、 gate.io
4、 harmony 鸿蒙hdc使用指导
5、 harmony 鸿蒙初识ArkTS语言
6、 harmony 鸿蒙ArkUI组件（ArkTS）开发常见问题
7、 flink kafka connector scan.startup.mode 的几个选项
8、 golang
9、 openharmony
10、 Vue中input框自动聚焦

Loading...