hadoop HtmlQuoting 源码

  • 2022-10-20
  • 浏览 (422)

haddop HtmlQuoting 代码

文件路径:/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HtmlQuoting.java

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.http;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;

/**
 * This class is responsible for quoting HTML characters.
 */
public class HtmlQuoting {
  private static final byte[] AMP_BYTES =
      "&".getBytes(StandardCharsets.UTF_8);
  private static final byte[] APOS_BYTES =
      "'".getBytes(StandardCharsets.UTF_8);
  private static final byte[] GT_BYTES =
      ">".getBytes(StandardCharsets.UTF_8);
  private static final byte[] LT_BYTES =
      "<".getBytes(StandardCharsets.UTF_8);
  private static final byte[] QUOT_BYTES =
      """.getBytes(StandardCharsets.UTF_8);

  /**
   * Does the given string need to be quoted?
   * @param data the string to check
   * @param off the starting position
   * @param len the number of bytes to check
   * @return does the string contain any of the active html characters?
   */
  public static boolean needsQuoting(byte[] data, int off, int len) {
    for(int i=off; i< off+len; ++i) {
      switch(data[i]) {
      case '&':
      case '<':
      case '>':
      case '\'':
      case '"':
        return true;
      default:
        break;
      }
    }
    return false;
  }

  /**
   * Does the given string need to be quoted?
   * @param str the string to check
   * @return does the string contain any of the active html characters?
   */
  public static boolean needsQuoting(String str) {
    if (str == null) {
      return false;
    }
    byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
    return needsQuoting(bytes, 0 , bytes.length);
  }

  /**
   * Quote all of the active HTML characters in the given string as they
   * are added to the buffer.
   * @param output the stream to write the output to
   * @param buffer the byte array to take the characters from
   * @param off the index of the first byte to quote
   * @param len the number of bytes to quote
   * @throws IOException raised on errors performing I/O.
   */
  public static void quoteHtmlChars(OutputStream output, byte[] buffer,
                                    int off, int len) throws IOException {
    for(int i=off; i < off+len; i++) {
      switch (buffer[i]) {
      case '&':
        output.write(AMP_BYTES);
        break;
      case '<':
        output.write(LT_BYTES);
        break;
      case '>':
        output.write(GT_BYTES);
        break;
      case '\'':
        output.write(APOS_BYTES);
        break;
      case '"':
        output.write(QUOT_BYTES);
        break;
      default: output.write(buffer, i, 1);
      }
    }
  }
  
  /**
   * Quote the given item to make it html-safe.
   * @param item the string to quote
   * @return the quoted string
   */
  public static String quoteHtmlChars(String item) {
    if (item == null) {
      return null;
    }
    byte[] bytes = item.getBytes(StandardCharsets.UTF_8);
    if (needsQuoting(bytes, 0, bytes.length)) {
      ByteArrayOutputStream buffer = new ByteArrayOutputStream();
      try {
        quoteHtmlChars(buffer, bytes, 0, bytes.length);
        return buffer.toString("UTF-8");
      } catch (IOException ioe) {
        // Won't happen, since it is a bytearrayoutputstream
        return null;
      }
    } else {
      return item;
    }
  }

  /**
   * Return an output stream that quotes all of the output.
   * @param out the stream to write the quoted output to
   * @return a new stream that the application show write to
   * @throws IOException if the underlying output fails
   */
  public static OutputStream quoteOutputStream(final OutputStream out
                                               ) throws IOException {
    return new OutputStream() {
      private byte[] data = new byte[1];
      @Override
      public void write(byte[] data, int off, int len) throws IOException {
        quoteHtmlChars(out, data, off, len);
      }
      
      @Override
      public void write(int b) throws IOException {
        data[0] = (byte) b;
        quoteHtmlChars(out, data, 0, 1);
      }
      
      @Override
      public void flush() throws IOException {
        out.flush();
      }
      
      @Override
      public void close() throws IOException {
        out.close();
      }
    };
  }

  /**
   * Remove HTML quoting from a string.
   * @param item the string to unquote
   * @return the unquoted string
   */
  public static String unquoteHtmlChars(String item) {
    if (item == null) {
      return null;
    }
    int next = item.indexOf('&');
    // nothing was quoted
    if (next == -1) {
      return item;
    }
    int len = item.length();
    int posn = 0;
    StringBuilder buffer = new StringBuilder();
    while (next != -1) {
      buffer.append(item.substring(posn, next));
      if (item.startsWith("&amp;", next)) {
        buffer.append('&');
        next += 5;
      } else if (item.startsWith("&apos;", next)) {
        buffer.append('\'');
        next += 6;        
      } else if (item.startsWith("&gt;", next)) {
        buffer.append('>');
        next += 4;
      } else if (item.startsWith("&lt;", next)) {
        buffer.append('<');
        next += 4;
      } else if (item.startsWith("&quot;", next)) {
        buffer.append('"');
        next += 6;
      } else {
        int end = item.indexOf(';', next)+1;
        if (end == 0) {
          end = len;
        }
        throw new IllegalArgumentException("Bad HTML quoting for " + 
                                           item.substring(next,end));
      }
      posn = next;
      next = item.indexOf('&', posn);
    }
    buffer.append(item.substring(posn, len));
    return buffer.toString();
  }
  
  public static void main(String[] args) throws Exception {
    for(String arg:args) {
      System.out.println("Original: " + arg);
      String quoted = quoteHtmlChars(arg);
      System.out.println("Quoted: "+ quoted);
      String unquoted = unquoteHtmlChars(quoted);
      System.out.println("Unquoted: " + unquoted);
      System.out.println();
    }
  }
}

相关信息

hadoop 源码目录

相关文章

hadoop AdminAuthorizedServlet 源码

hadoop FilterContainer 源码

hadoop FilterInitializer 源码

hadoop HttpConfig 源码

hadoop HttpRequestLog 源码

hadoop HttpServer2 源码

hadoop HttpServer2Metrics 源码

hadoop IsActiveServlet 源码

hadoop JettyUtils 源码

hadoop NoCacheFilter 源码

0  赞