hadoop DirMarkerTracker 源码
haddop DirMarkerTracker 代码
文件路径:/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirMarkerTracker.java
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.impl;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus;
/**
* Tracks directory markers which have been reported in object listings.
* This is needed for auditing and cleanup, including during rename
* operations.
* <p>
* Designed to be used while scanning through the results of listObject
* calls, where are we assume the results come in alphanumeric sort order
* and parent entries before children.
* <p>
* This lets as assume that we can identify all leaf markers as those
* markers which were added to set of leaf markers and not subsequently
* removed as a child entries were discovered.
* <p>
* To avoid scanning datastructures excessively, the path of the parent
* directory of the last file added is cached. This allows for a
* quick bailout when many children of the same directory are
* returned in a listing.
* <p>
* Consult the directory_markers document for details on this feature,
* including terminology.
*/
public class DirMarkerTracker {
private static final Logger LOG =
LoggerFactory.getLogger(DirMarkerTracker.class);
/**
* all leaf markers.
*/
private final Map<Path, Marker> leafMarkers
= new TreeMap<>();
/**
* all surplus markers.
*/
private final Map<Path, Marker> surplusMarkers
= new TreeMap<>();
/**
* Base path of the tracking operation.
*/
private final Path basePath;
/**
* Should surplus markers be recorded in
* the {@link #surplusMarkers} map?
*/
private final boolean recordSurplusMarkers;
/**
* last parent directory checked.
*/
private Path lastDirChecked;
/**
* Count of scans; used for test assertions.
*/
private int scanCount;
/**
* How many files were found.
*/
private int filesFound;
/**
* How many markers were found.
*/
private int markersFound;
/**
* How many objects of any kind were found?
*/
private int objectsFound;
/**
* Construct.
* <p>
* The base path is currently only used for information rather than
* validating paths supplied in other methods.
* @param basePath base path of track
* @param recordSurplusMarkers save surplus markers to a map?
*/
public DirMarkerTracker(final Path basePath,
boolean recordSurplusMarkers) {
this.basePath = basePath;
this.recordSurplusMarkers = recordSurplusMarkers;
}
/**
* Get the base path of the tracker.
* @return the path
*/
public Path getBasePath() {
return basePath;
}
/**
* A marker has been found; this may or may not be a leaf.
* <p>
* Trigger a move of all markers above it into the surplus map.
* @param path marker path
* @param key object key
* @param source listing source
* @return the surplus markers found.
*/
public List<Marker> markerFound(Path path,
final String key,
final S3ALocatedFileStatus source) {
markersFound++;
leafMarkers.put(path, new Marker(path, key, source));
return pathFound(path, key, source);
}
/**
* A file has been found. Trigger a move of all
* markers above it into the surplus map.
* @param path marker path
* @param key object key
* @param source listing source
* @return the surplus markers found.
*/
public List<Marker> fileFound(Path path,
final String key,
final S3ALocatedFileStatus source) {
filesFound++;
return pathFound(path, key, source);
}
/**
* A path has been found.
* <p>
* Declare all markers above it as surplus
* @param path marker path
* @param key object key
* @param source listing source
* @return the surplus markers found.
*/
private List<Marker> pathFound(Path path,
final String key,
final S3ALocatedFileStatus source) {
objectsFound++;
List<Marker> removed = new ArrayList<>();
// all parent entries are superfluous
final Path parent = path.getParent();
if (parent == null || parent.equals(lastDirChecked)) {
// short cut exit
return removed;
}
removeParentMarkers(parent, removed);
lastDirChecked = parent;
return removed;
}
/**
* Remove all markers from the path and its parents from the
* {@link #leafMarkers} map.
* <p>
* if {@link #recordSurplusMarkers} is true, the marker is
* moved to the surplus map. Not doing this is simply an
* optimisation designed to reduce risk of excess memory consumption
* when renaming (hypothetically) large directory trees.
* @param path path to start at
* @param removed list of markers removed; is built up during the
* recursive operation.
*/
private void removeParentMarkers(final Path path,
List<Marker> removed) {
if (path == null || path.isRoot()) {
return;
}
scanCount++;
removeParentMarkers(path.getParent(), removed);
final Marker value = leafMarkers.remove(path);
if (value != null) {
// marker is surplus
removed.add(value);
if (recordSurplusMarkers) {
surplusMarkers.put(path, value);
}
}
}
/**
* Get the map of leaf markers.
* @return all leaf markers.
*/
public Map<Path, Marker> getLeafMarkers() {
return leafMarkers;
}
/**
* Get the map of surplus markers.
* <p>
* Empty if they were not being recorded.
* @return all surplus markers.
*/
public Map<Path, Marker> getSurplusMarkers() {
return surplusMarkers;
}
public Path getLastDirChecked() {
return lastDirChecked;
}
/**
* How many objects were found.
* @return count
*/
public int getObjectsFound() {
return objectsFound;
}
public int getScanCount() {
return scanCount;
}
public int getFilesFound() {
return filesFound;
}
public int getMarkersFound() {
return markersFound;
}
@Override
public String toString() {
return "DirMarkerTracker{" +
"leafMarkers=" + leafMarkers.size() +
", surplusMarkers=" + surplusMarkers.size() +
", lastDirChecked=" + lastDirChecked +
", filesFound=" + filesFound +
", scanCount=" + scanCount +
'}';
}
/**
* Scan the surplus marker list and remove from it all where the directory
* policy says "keep". This is useful when auditing
* @param policy policy to use when auditing markers for
* inclusion/exclusion.
* @return list of markers stripped
*/
public List<Path> removeAllowedMarkers(DirectoryPolicy policy) {
List<Path> removed = new ArrayList<>();
Iterator<Map.Entry<Path, Marker>> entries =
surplusMarkers.entrySet().iterator();
while (entries.hasNext()) {
Map.Entry<Path, Marker> entry = entries.next();
Path path = entry.getKey();
if (policy.keepDirectoryMarkers(path)) {
// there's a match
// remove it from the map.
entries.remove();
LOG.debug("Removing {}", entry.getValue());
removed.add(path);
}
}
return removed;
}
/**
* This is a marker entry stored in the map and
* returned as markers are deleted.
*/
public static final class Marker {
/** Path of the marker. */
private final Path path;
/**
* Key in the store.
*/
private final String key;
/**
* The file status of the marker.
*/
private final S3ALocatedFileStatus status;
private Marker(final Path path,
final String key,
final S3ALocatedFileStatus status) {
this.path = path;
this.key = key;
this.status = status;
}
public Path getPath() {
return path;
}
public String getKey() {
return key;
}
public S3ALocatedFileStatus getStatus() {
return status;
}
/**
* Get the version ID of the status object; may be null.
* @return a version ID, if known.
*/
public String getVersionId() {
return status.getVersionId();
}
@Override
public String toString() {
return "Marker{" +
"path=" + path +
", key='" + key + '\'' +
", status=" + status +
'}';
}
}
}
相关信息
相关文章
hadoop AbstractStoreOperation 源码
hadoop ActiveOperationContext 源码
hadoop BulkDeleteRetryHandler 源码
hadoop ChangeDetectionPolicy 源码
hadoop ConfigureShadedAWSSocketFactory 源码
0
赞
热门推荐
-
2、 - 优质文章
-
3、 gate.io
-
8、 golang
-
9、 openharmony
-
10、 Vue中input框自动聚焦