Java Code Examples for org.apache.hadoop.fs.Path

The following code examples are extracted from open source projects. You can click to vote up the examples that are useful to you.

Example 1

From project aim3-tu-berlin, under directory /seminar/exercises/datamining/core/src/main/java/de/tuberlin/dima/aim/exercises/one/.

Source file: AverageTemperaturePerMonth.java

  23 
vote

@Override public int run(String[] args) throws Exception {
  Map<String,String> parsedArgs=parseArgs(args);
  Path inputPath=new Path(parsedArgs.get("--input"));
  Path outputPath=new Path(parsedArgs.get("--output"));
  double minimumQuality=Double.parseDouble(parsedArgs.get("--minimumQuality"));
  return 0;
}
 

Example 2

From project aim3-tu-berlin, under directory /seminar/exercises/datamining/core/src/main/java/de/tuberlin/dima/aim/exercises/one/.

Source file: FilteringWordCount.java

  23 
vote

@Override public int run(String[] args) throws Exception {
  Map<String,String> parsedArgs=parseArgs(args);
  Path inputPath=new Path(parsedArgs.get("--input"));
  Path outputPath=new Path(parsedArgs.get("--output"));
  Job wordCount=prepareJob(inputPath,outputPath,TextInputFormat.class,FilteringWordCountMapper.class,Text.class,IntWritable.class,WordCountReducer.class,Text.class,IntWritable.class,TextOutputFormat.class);
  wordCount.waitForCompletion(true);
  return 0;
}
 

Example 3

From project akela, under directory /src/main/java/com/mozilla/hadoop/fs/.

Source file: SequenceFileDirectoryReader.java

  22 
vote

public SequenceFileDirectoryReader(Path inputPath) throws IOException {
  fs=FileSystem.get(inputPath.toUri(),conf);
  paths=new ArrayList<Path>();
  for (  FileStatus status : fs.listStatus(inputPath)) {
    Path p=status.getPath();
    if (!status.isDir() && !p.getName().startsWith("_")) {
      paths.add(p);
    }
  }
  pathIter=paths.iterator();
}
 

Example 4

From project akela, under directory /src/main/java/com/mozilla/hadoop/.

Source file: UnknownPathFinder.java

  22 
vote

public static void main(String[] args) throws IOException {
  int retCode=0;
  Configuration hbaseConf=HBaseConfiguration.create(new Configuration());
  Path hbaseRootDir=new Path(hbaseConf.get("hbase.rootdir"));
  Set<String> knownRegionPaths=getRegionPaths(hbaseRootDir,HConstants.META_TABLE_NAME);
  Set<String> fsPaths=getFilesystemPaths(hbaseConf,hbaseRootDir);
  fsPaths.removeAll(knownRegionPaths);
  for (  String p : fsPaths) {
    System.out.println(p);
  }
  System.exit(retCode);
}
 

Example 5

From project archive-commons, under directory /ia-tools/src/main/java/org/archive/hadoop/cdx/.

Source file: CDXCluster.java

  22 
vote

public CDXCluster(Configuration conf,Path clusterPath) throws IOException {
  this.clusterPath=clusterPath;
  fs=clusterPath.getFileSystem(conf);
  loader=new HDFSBlockLoader(fs);
  Path summaryPath=new Path(clusterPath,"ALL.summary");
  HDFSSeekableLineReaderFactory factory=new HDFSSeekableLineReaderFactory(fs,summaryPath);
  summary=new SortedTextFile(factory);
}
 

Example 6

From project archive-commons, under directory /ia-tools/src/main/java/org/archive/hadoop/cdx/.

Source file: HDFSLSR.java

  22 
vote

public void listPath(FileStatus status,FileSystem fs,PrintWriter target) throws IOException {
  if (status.isDir()) {
    FileStatus entries[]=fs.listStatus(status.getPath());
    for (    FileStatus entry : entries) {
      listPath(entry,fs,target);
    }
  }
 else {
    Path path=status.getPath();
    target.format("%s\t%s\n",path.getName(),path.toUri().toASCIIString());
  }
}
 

Example 7

From project avro, under directory /lang/java/mapred/src/main/java/org/apache/avro/hadoop/file/.

Source file: SortedKeyValueFile.java

  22 
vote

/** 
 * Constructs a reader.
 * @param options The options.
 * @throws IOException If there is an error.
 */
public Reader(Options options) throws IOException {
  mKeySchema=options.getKeySchema();
  Path indexFilePath=new Path(options.getPath(),INDEX_FILENAME);
  LOG.debug("Loading the index from " + indexFilePath);
  mIndex=loadIndexFile(options.getConfiguration(),indexFilePath,mKeySchema);
  Path dataFilePath=new Path(options.getPath(),DATA_FILENAME);
  LOG.debug("Loading the data file " + dataFilePath);
  Schema recordSchema=AvroKeyValue.getSchema(mKeySchema,options.getValueSchema());
  DatumReader<GenericRecord> datumReader=new GenericDatumReader<GenericRecord>(recordSchema);
  mDataFileReader=new DataFileReader<GenericRecord>(new FsInput(dataFilePath,options.getConfiguration()),datumReader);
}
 

Example 8

From project avro, under directory /lang/java/mapred/src/main/java/org/apache/avro/mapred/.

Source file: AvroTextOutputFormat.java

  22 
vote

@Override public RecordWriter<K,V> getRecordWriter(FileSystem ignore,JobConf job,String name,Progressable prog) throws IOException {
  Schema schema=Schema.create(Schema.Type.BYTES);
  final byte[] keyValueSeparator=job.get("mapreduce.output.textoutputformat.separator","\t").getBytes(UTF8);
  final DataFileWriter<ByteBuffer> writer=new DataFileWriter<ByteBuffer>(new ReflectDatumWriter<ByteBuffer>());
  AvroOutputFormat.configureDataFileWriter(writer,job);
  Path path=FileOutputFormat.getTaskOutputPath(job,name + EXT);
  writer.create(schema,path.getFileSystem(job).create(path));
  return new AvroTextRecordWriter(writer,keyValueSeparator);
}
 

Example 9

From project avro-utils, under directory /src/main/java/com/tomslabs/grid/avro/.

Source file: AvroFileOutputFormat.java

  22 
vote

@Override public RecordWriter<T,Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
  Configuration config=context.getConfiguration();
  Schema schema=getWriteSchema(config);
  DatumWriter<T> datumWriter=getDatumWriter(config);
  final DataFileWriter<T> writer=new DataFileWriter<T>(datumWriter);
  if (getCompressOutput(context)) {
    int level=config.getInt(DEFLATE_LEVEL_KEY,DEFAULT_DEFLATE_LEVEL);
    writer.setCodec(CodecFactory.deflateCodec(level));
  }
  Path file=getDefaultWorkFile(context,EXT);
  FileSystem fs=file.getFileSystem(config);
  writer.create(schema,fs.create(file));
  return new AvroRecordWriter<T>(writer);
}
 

Example 10

From project avro-utils, under directory /src/main/java/com/tomslabs/grid/avro/.

Source file: AvroRecordReader.java

  22 
vote

@Override public void initialize(InputSplit split,TaskAttemptContext context) throws IOException, InterruptedException {
  FileSplit fileSplit=(FileSplit)split;
  Configuration config=context.getConfiguration();
  Path path=fileSplit.getPath();
  this.in=new FsInput(path,config);
  DatumReader<T> datumReader=getDatumReader(config);
  this.reader=new DataFileReader<T>(in,datumReader);
  reader.sync(fileSplit.getStart());
  this.start=in.tell();
  this.end=fileSplit.getStart() + split.getLength();
}
 

Example 11

From project azkaban, under directory /azkaban/src/java/azkaban/web/pages/.

Source file: HdfsBrowserServlet.java

  22 
vote

@Override protected void doGet(HttpServletRequest req,HttpServletResponse resp) throws ServletException, IOException {
  String prefix=req.getContextPath() + req.getServletPath();
  String fsPath=req.getRequestURI().substring(prefix.length());
  if (fsPath.length() == 0)   fsPath="/";
  if (logger.isDebugEnabled())   logger.debug("path=" + fsPath);
  Path path=new Path(fsPath);
  if (!_fs.exists(path))   throw new IllegalArgumentException(path.toUri().getPath() + " does not exist.");
 else   if (_fs.isFile(path))   displayFile(req,resp,path);
 else   if (_fs.getFileStatus(path).isDir())   displayDir(req,resp,path);
 else   throw new IllegalStateException("It exists, it is not a file, and it is not a directory, what is it precious?");
}
 

Example 12

From project azkaban, under directory /azkaban/src/java/azkaban/web/pages/.

Source file: HdfsBrowserServlet.java

  22 
vote

@Override protected void doPost(HttpServletRequest req,HttpServletResponse resp) throws ServletException, IOException {
  String action=req.getParameter("action");
  if ("delete".equals(action)) {
    Path theFile=new Path(req.getParameter("file"));
    _fs.delete(theFile,true);
  }
 else {
    throw new ServletException("Unknown action '" + action + "'!");
  }
}
 

Example 13

From project behemoth, under directory /io/src/main/java/com/digitalpebble/behemoth/io/nutch/.

Source file: NutchSegmentConverterJob.java

  22 
vote

public int run(String[] args) throws Exception {
  String usage="Usage: SegmentConverter segment output";
  if (args.length == 0) {
    System.err.println(usage);
    System.exit(-1);
  }
  Path segment=new Path(args[0]);
  Path output=new Path(args[1]);
  convert(segment,output);
  return 0;
}
 

Example 14

From project behemoth, under directory /io/src/main/java/com/digitalpebble/behemoth/io/warc/.

Source file: WARCConverterJob.java

  22 
vote

public int run(String[] args) throws Exception {
  String usage="Usage: WARCConverterJob archive output";
  if (args.length == 0) {
    System.err.println(usage);
    System.exit(-1);
  }
  Path segment=new Path(args[0]);
  Path output=new Path(args[1]);
  convert(segment,output);
  return 0;
}
 

Example 15

From project big-data-plugin, under directory /shims/common/src/org/pentaho/hadoop/shim/common/.

Source file: DistributedCacheUtilImpl.java

  22 
vote

/** 
 * This validates that the Kettle Environment is installed. "Installed" means the kettle engine and supporting jars/plugins exist in the provided file system at the path provided.
 * @param fs   File System to check for the Kettle Environment in
 * @param root Root path the Kettle Environment should reside within
 * @return True if the Kettle Environment is installed at {@code root}.
 * @throws IOException Error investigating installation
 */
public boolean isKettleEnvironmentInstalledAt(FileSystem fs,Path root) throws IOException {
  Path[] directories=new Path[]{new Path(root,PATH_LIB),new Path(root,PATH_PLUGINS),new Path(new Path(root,PATH_PLUGINS),PENTAHO_BIG_DATA_PLUGIN_FOLDER_NAME)};
  Path lock=getLockFileAt(root);
  for (  Path dir : directories) {
    if (!(fs.exists(dir) && fs.getFileStatus(dir).isDir())) {
      return false;
    }
  }
  return !fs.exists(lock);
}
 

Example 16

From project big-data-plugin, under directory /shims/common/src/org/pentaho/hadoop/shim/common/.

Source file: DistributedCacheUtilImpl.java

  22 
vote

/** 
 * Configure the provided configuration to use the Distributed Cache and include all files in  {@code kettleInstallDir}. All jar files in lib/ will be added to the classpath.
 * @param conf             Configuration to update
 * @param fs               File system to load Kettle Environment installation from
 * @param kettleInstallDir Directory that contains the Kettle installation to use in the file system provided
 * @throws KettleFileException
 * @throws IOException
 */
public void configureWithKettleEnvironment(Configuration conf,FileSystem fs,Path kettleInstallDir) throws KettleFileException, IOException {
  Path libDir=new Path(kettleInstallDir,PATH_LIB);
  List<Path> libraryJars=findFiles(fs,libDir,null);
  addCachedFilesToClasspath(libraryJars,conf);
  List<Path> nonLibFiles=findFiles(fs,kettleInstallDir,NOT_LIB_FILES);
  addCachedFiles(nonLibFiles,conf);
}
 

Example 17

From project action-core, under directory /src/main/java/com/ning/metrics/action/hdfs/reader/.

Source file: HdfsEntry.java

  21 
vote

@JsonCreator @SuppressWarnings("unused") public HdfsEntry(@JsonProperty(JSON_ENTRY_PATH) String path,@JsonProperty(JSON_ENTRY_MTIME) long mtime,@JsonProperty(JSON_ENTRY_SIZE) long sizeInBytes,@JsonProperty(JSON_ENTRY_REPLICATION) short replication,@JsonProperty(JSON_ENTRY_IS_DIR) boolean isDirectory){
  this.fs=null;
  this.path=new Path(path);
  this.modificationDate=new DateTime(mtime);
  this.blockSize=-1;
  this.size=sizeInBytes;
  this.replication=replication;
  this.replicatedSize=sizeInBytes * replication;
  this.directory=isDirectory;
  this.raw=true;
  this.rowFileContentsIteratorFactory=null;
}
 

Example 18

From project action-core, under directory /src/main/java/com/ning/metrics/action/hdfs/reader/.

Source file: HdfsListing.java

  21 
vote

@JsonCreator @SuppressWarnings("unused") public HdfsListing(@JsonProperty(JSON_LISTING_PATH) String path,@JsonProperty(JSON_LISTING_PARENT_PATH) String parentPath,@JsonProperty(JSON_LISTING_ENTRIES) List<HdfsEntry> entries){
  this.path=new Path(path);
  this.parentPath=parentPath;
  this.entries=ImmutableList.copyOf(entries);
  raw=true;
  recursive=false;
  rowFileContentsIteratorFactory=null;
}
 

Example 19

From project ARCInputFormat, under directory /src/org/commoncrawl/hadoop/io/.

Source file: HdfsARCSource.java

  21 
vote

@Override public InputStream getStream(String resource,long streamPosition,Throwable lastError,int previousFailures) throws Throwable {
  if (lastError != null || previousFailures > 0) {
    return null;
  }
  if (streamPosition != 0) {
    throw new RuntimeException("Non-zero position requested");
  }
  if (jc == null)   throw new NullPointerException("Jc is null");
  FileSystem fs=FileSystem.get(jc);
  logger.debug("getStream:: Opening: " + resource);
  FSDataInputStream is=fs.open(new Path(resource));
  is.seek(streamPosition);
  return is;
}
 

Example 20

From project ARCInputFormat, under directory /src/org/commoncrawl/hadoop/io/.

Source file: HdfsARCSource.java

  21 
vote

@Override protected Collection<ARCResource> getARCResources(JobConf job) throws IOException {
  String[] resources=getInputs(job);
  LinkedList<ARCResource> arc_resources=new LinkedList<ARCResource>();
  LinkedList<FileStatus> directories=new LinkedList<FileStatus>();
  FileSystem fs=FileSystem.get(job);
  for (  String current_resource : resources) {
    Path arc_file=new Path(sanitizeURI(current_resource,job));
    FileStatus filest=null;
    try {
      filest=fs.getFileStatus(arc_file);
    }
 catch (    FileNotFoundException fnf) {
      logger.error("Unable to open " + arc_file);
      continue;
    }
    if (filest.isDir()) {
      FileStatus dirs[]=fs.listStatus(arc_file);
      for (      FileStatus fst : dirs) {
        directories.add(fst);
      }
      continue;
    }
 else     arc_resources.add(new ARCResource(arc_file.toUri().toASCIIString(),filest.getLen()));
  }
  while (!directories.isEmpty()) {
    FileStatus current_fs=directories.pop();
    if (current_fs.isDir()) {
      FileStatus dirs[]=fs.listStatus(current_fs.getPath());
      for (      FileStatus fst : dirs) {
        directories.add(fst);
      }
      continue;
    }
 else     arc_resources.add(new ARCResource(current_fs.getPath().toUri().toASCIIString(),current_fs.getLen()));
  }
  return arc_resources;
}
 

Example 21

From project babel, under directory /src/babel/prep/corpus/.

Source file: CorpusGenerator.java

  21 
vote

/** 
 * Configures a map-only dataset generation job.
 */
protected JobConf createJobConf(String crawlDir,String pagesSubDir,boolean xmlOut) throws IOException {
  JobConf job=new JobConf(getConf());
  job.setJobName("create " + (xmlOut ? "xml formatted" : "") + " dataset from "+ pagesSubDir);
  job.setInputFormat(SequenceFileInputFormat.class);
  job.setMapperClass(CorpusGenMapper.class);
  job.setOutputFormat(xmlOut ? MultipleXMLLangFileOutputFormat.class : MultipleLangFileOutputFormat.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Page.class);
  FileInputFormat.addInputPath(job,new Path(crawlDir,pagesSubDir));
  Path outDir=new Path(new Path(crawlDir,CORPUS_SUBDIR),"corpus." + (xmlOut ? PARAM_XML + "." : "") + getCurTimeStamp());
  m_fs.delete(outDir,true);
  FileOutputFormat.setOutputPath(job,outDir);
  setUniqueTempDir(job);
  return job;
}
 

Example 22

From project babel, under directory /src/babel/prep/corpus/.

Source file: MultipleXMLLangFileOutputFormat.java

  21 
vote

public RecordWriter<Text,Page> getBaseRecordWriter(final FileSystem fs,JobConf job,String name,final Progressable progress) throws IOException {
  final Path dumpFile=new Path(FileOutputFormat.getOutputPath(job),name);
  if (fs.exists(dumpFile))   fs.delete(dumpFile,true);
  final XMLObjectWriter xmlWriter;
  try {
    xmlWriter=new XMLObjectWriter(fs.create(dumpFile),false);
  }
 catch (  Exception e) {
    throw new RuntimeException("Failed to instantiate XMLObjectWriter.");
  }
  return new RecordWriter<Text,Page>(){
    public synchronized void write(    Text key,    Page page) throws IOException {
      try {
        xmlWriter.write(page);
      }
 catch (      XMLStreamException e) {
        throw new RuntimeException("Error writing page XML.");
      }
    }
    public synchronized void close(    Reporter reporter) throws IOException {
      try {
        xmlWriter.close();
      }
 catch (      XMLStreamException e) {
        throw new RuntimeException("Error closing XMLObjectWriter.");
      }
    }
  }
;
}
 

Example 23

From project bagheera, under directory /src/main/java/com/mozilla/bagheera/sink/.

Source file: SequenceFileSink.java

  20 
vote

public SequenceFileSink(String namespace,String baseDirPath,String dateFormat,long maxFileSize,boolean useBytesValue) throws IOException {
  LOG.info("Initializing writer for namespace: " + namespace);
  conf=new Configuration();
  conf.setBoolean("fs.automatic.close",false);
  hdfs=FileSystem.newInstance(conf);
  this.useBytesValue=useBytesValue;
  this.maxFileSize=maxFileSize;
  sdf=new SimpleDateFormat(dateFormat);
  if (!baseDirPath.endsWith(Path.SEPARATOR)) {
    baseDir=new Path(baseDirPath + Path.SEPARATOR + namespace+ Path.SEPARATOR+ sdf.format(new Date(System.currentTimeMillis())));
  }
 else {
    baseDir=new Path(baseDirPath + namespace + Path.SEPARATOR+ sdf.format(new Date(System.currentTimeMillis())));
  }
  initWriter();
  stored=Metrics.newMeter(new MetricName("bagheera","sink.hdfs.",namespace + ".stored"),"messages",TimeUnit.SECONDS);
}
 

Example 24

From project bagheera, under directory /src/main/java/com/mozilla/bagheera/sink/.

Source file: SequenceFileSink.java

  20 
vote

private void initWriter() throws IOException {
  if (LOG.isDebugEnabled()) {
    LOG.debug("Thread " + Thread.currentThread().getId() + " - initWriter() called");
  }
  if (!hdfs.exists(baseDir)) {
    hdfs.mkdirs(baseDir);
  }
  outputPath=new Path(baseDir,new Path(UUID.randomUUID().toString()));
  LOG.info("Opening file handle to: " + outputPath.toString());
  if (useBytesValue) {
    writer=SequenceFile.createWriter(hdfs,conf,outputPath,Text.class,BytesWritable.class,CompressionType.BLOCK);
  }
 else {
    writer=SequenceFile.createWriter(hdfs,conf,outputPath,Text.class,Text.class,CompressionType.BLOCK);
  }
  Calendar prev=Calendar.getInstance();
  prev.set(Calendar.HOUR_OF_DAY,0);
  prev.set(Calendar.MINUTE,0);
  prev.set(Calendar.SECOND,0);
  prev.set(Calendar.MILLISECOND,0);
  nextRolloverMillis=prev.getTimeInMillis() + DAY_IN_MILLIS;
}