hadoop GlobExpander 源码
haddop GlobExpander 代码
文件路径:/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobExpander.java
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class GlobExpander {
static class StringWithOffset {
String string;
int offset;
public StringWithOffset(String string, int offset) {
super();
this.string = string;
this.offset = offset;
}
}
/**
* Expand globs in the given <code>filePattern</code> into a collection of
* file patterns so that in the expanded set no file pattern has a slash
* character ("/") in a curly bracket pair.
* <p>
* Some examples of how the filePattern is expanded:<br>
* <pre>
* <b>
* filePattern - Expanded file pattern </b>
* {a/b} - a/b
* /}{a/b} - /}a/b
* p{a/b,c/d}s - pa/bs, pc/ds
* {a/b,c/d,{e,f}} - a/b, c/d, {e,f}
* {a/b,c/d}{e,f} - a/b{e,f}, c/d{e,f}
* {a,b}/{b,{c/d,e/f}} - {a,b}/b, {a,b}/c/d, {a,b}/e/f
* {a,b}/{c/\d} - {a,b}/c/d
* </pre>
*
* @param filePattern file pattern.
* @return expanded file patterns
* @throws IOException raised on errors performing I/O.
*/
public static List<String> expand(String filePattern) throws IOException {
List<String> fullyExpanded = new ArrayList<String>();
List<StringWithOffset> toExpand = new ArrayList<StringWithOffset>();
toExpand.add(new StringWithOffset(filePattern, 0));
while (!toExpand.isEmpty()) {
StringWithOffset path = toExpand.remove(0);
List<StringWithOffset> expanded = expandLeftmost(path);
if (expanded == null) {
fullyExpanded.add(path.string);
} else {
toExpand.addAll(0, expanded);
}
}
return fullyExpanded;
}
/**
* Expand the leftmost outer curly bracket pair containing a
* slash character ("/") in <code>filePattern</code>.
* @param filePatternWithOffset
* @return expanded file patterns
* @throws IOException
*/
private static List<StringWithOffset> expandLeftmost(StringWithOffset
filePatternWithOffset) throws IOException {
String filePattern = filePatternWithOffset.string;
int leftmost = leftmostOuterCurlyContainingSlash(filePattern,
filePatternWithOffset.offset);
if (leftmost == -1) {
return null;
}
int curlyOpen = 0;
StringBuilder prefix = new StringBuilder(filePattern.substring(0, leftmost));
StringBuilder suffix = new StringBuilder();
List<String> alts = new ArrayList<String>();
StringBuilder alt = new StringBuilder();
StringBuilder cur = prefix;
for (int i = leftmost; i < filePattern.length(); i++) {
char c = filePattern.charAt(i);
if (cur == suffix) {
cur.append(c);
} else if (c == '\\') {
i++;
if (i >= filePattern.length()) {
throw new IOException("Illegal file pattern: "
+ "An escaped character does not present for glob "
+ filePattern + " at " + i);
}
c = filePattern.charAt(i);
cur.append(c);
} else if (c == '{') {
if (curlyOpen++ == 0) {
alt.setLength(0);
cur = alt;
} else {
cur.append(c);
}
} else if (c == '}' && curlyOpen > 0) {
if (--curlyOpen == 0) {
alts.add(alt.toString());
alt.setLength(0);
cur = suffix;
} else {
cur.append(c);
}
} else if (c == ',') {
if (curlyOpen == 1) {
alts.add(alt.toString());
alt.setLength(0);
} else {
cur.append(c);
}
} else {
cur.append(c);
}
}
List<StringWithOffset> exp = new ArrayList<StringWithOffset>();
for (String string : alts) {
exp.add(new StringWithOffset(prefix + string + suffix, prefix.length()));
}
return exp;
}
/**
* Finds the index of the leftmost opening curly bracket containing a
* slash character ("/") in <code>filePattern</code>.
* @param filePattern
* @return the index of the leftmost opening curly bracket containing a
* slash character ("/"), or -1 if there is no such bracket
* @throws IOException
*/
private static int leftmostOuterCurlyContainingSlash(String filePattern,
int offset) throws IOException {
int curlyOpen = 0;
int leftmost = -1;
boolean seenSlash = false;
for (int i = offset; i < filePattern.length(); i++) {
char c = filePattern.charAt(i);
if (c == '\\') {
i++;
if (i >= filePattern.length()) {
throw new IOException("Illegal file pattern: "
+ "An escaped character does not present for glob "
+ filePattern + " at " + i);
}
} else if (c == '{') {
if (curlyOpen++ == 0) {
leftmost = i;
}
} else if (c == '}' && curlyOpen > 0) {
if (--curlyOpen == 0 && leftmost != -1 && seenSlash) {
return leftmost;
}
} else if (c == '/' && curlyOpen > 0) {
seenSlash = true;
}
}
return -1;
}
}
相关信息
相关文章
hadoop BatchListingOperations 源码
hadoop BatchedRemoteIterator 源码
0
赞
热门推荐
-
2、 - 优质文章
-
3、 gate.io
-
8、 golang
-
9、 openharmony
-
10、 Vue中input框自动聚焦