Java Code Examples for org.apache.hadoop.conf.Configuration

The following code examples are extracted from open source projects. You can click to vote up the examples that are useful to you.

Example 1

From project akela, under directory /src/main/java/com/mozilla/hadoop/.

Source file: UnknownPathFinder.java

  34 
vote

public static void main(String[] args) throws IOException {
  int retCode=0;
  Configuration hbaseConf=HBaseConfiguration.create(new Configuration());
  Path hbaseRootDir=new Path(hbaseConf.get("hbase.rootdir"));
  Set<String> knownRegionPaths=getRegionPaths(hbaseRootDir,HConstants.META_TABLE_NAME);
  Set<String> fsPaths=getFilesystemPaths(hbaseConf,hbaseRootDir);
  fsPaths.removeAll(knownRegionPaths);
  for (  String p : fsPaths) {
    System.out.println(p);
  }
  System.exit(retCode);
}
 

Example 2

From project behemoth, under directory /core/src/test/java/com/digitalpebble/behemoth/.

Source file: DocumentFilterTest.java

  33 
vote

public void testURLFilter(){
  Configuration config=BehemothConfiguration.create();
  config.set(DocumentFilter.DocumentFilterParamNameURLFilterKeep,".+");
  DocumentFilter filter=DocumentFilter.getFilters(config);
  BehemothDocument doc=new BehemothDocument();
  doc.getMetadata(true).put(new Text("lang"),new Text("en"));
  boolean kept=filter.keep(doc);
  assertEquals(false,kept);
  doc=new BehemothDocument();
  doc.setUrl("any random rubbish will do");
  kept=filter.keep(doc);
  assertEquals(true,kept);
}
 

Example 3

From project behemoth, under directory /core/src/test/java/com/digitalpebble/behemoth/.

Source file: DocumentFilterTest.java

  33 
vote

public void testURLFilterRequired(){
  Configuration config=BehemothConfiguration.create();
  assertEquals(false,DocumentFilter.isRequired(config));
  config.set(DocumentFilter.DocumentFilterParamNameURLFilterKeep,".+");
  assertEquals(true,DocumentFilter.isRequired(config));
}
 

Example 4

From project ambrose, under directory /pig/src/main/java/com/twitter/ambrose/pig/.

Source file: AmbrosePigProgressNotificationListener.java

  32 
vote

/** 
 * Collects statistics from JobStats and builds a nested Map of values. Subsclass ond override if you'd like to generate different stats.
 * @param scriptId
 * @param stats
 * @return
 */
protected JobInfo collectStats(String scriptId,JobStats stats){
  Properties jobConfProperties=new Properties();
  if (stats.getInputs() != null && stats.getInputs().size() > 0 && stats.getInputs().get(0).getConf() != null) {
    Configuration conf=stats.getInputs().get(0).getConf();
    for (    Map.Entry<String,String> entry : conf) {
      jobConfProperties.setProperty(entry.getKey(),entry.getValue());
    }
    if (workflowVersion == null) {
      workflowVersion=conf.get("pig.logical.plan.signature");
    }
  }
  return new PigJobInfo(stats,jobConfProperties);
}
 

Example 5

From project archive-commons, under directory /ia-tools/src/main/java/org/archive/hadoop/io/.

Source file: MergeClusterRangesInputFormat.java

  32 
vote

@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
  Configuration conf=context.getConfiguration();
  Reader r=getSplitReader(conf);
  SplitFile splitFile=new SplitFile();
  splitFile.read(r);
  r.close();
  ArrayList<InputSplit> splits=new ArrayList<InputSplit>();
  for (int i=0; i < splitFile.size(); i++) {
    MergeClusterRangesInputSplit split=new MergeClusterRangesInputSplit(splitFile.size() - i,splitFile.getStart(i),splitFile.getEnd(i),getClusterPaths(conf));
    splits.add(split);
    LOG.warning(String.format("Added split(%d) (%s)-(%s)",splitFile.size() - i,splitFile.getStart(i),splitFile.getEnd(i)));
  }
  return splits;
}
 

Example 6

From project archive-commons, under directory /ia-tools/src/main/java/org/archive/hadoop/mapreduce/.

Source file: GZIPRangeLineDereferencingRecordReader.java

  32 
vote

@Override public void initialize(InputSplit split,TaskAttemptContext context) throws IOException, InterruptedException {
  Configuration conf=context.getConfiguration();
  FileSplit fileSplit=(FileSplit)split;
  loader=new HDFSBlockLoader(fileSplit.getPath().getFileSystem(conf));
  skipBad=getSkipBadGZIPRanges(conf);
  super.initialize(split,context);
}
 

Example 7

From project avro, under directory /lang/java/mapred/src/main/java/org/apache/avro/hadoop/io/.

Source file: AvroSerialization.java

  32 
vote

/** 
 * Gets an object capable of deserializing the output from a Mapper.
 * @param c The class to get a deserializer for.
 * @return A deserializer for objects of class <code>c</code>.
 */
@Override public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c){
  Configuration conf=getConf();
  if (AvroKey.class.isAssignableFrom(c)) {
    return new AvroKeyDeserializer<T>(getKeyWriterSchema(conf),getKeyReaderSchema(conf),conf.getClassLoader());
  }
 else   if (AvroValue.class.isAssignableFrom(c)) {
    return new AvroValueDeserializer<T>(getValueWriterSchema(conf),getValueReaderSchema(conf),conf.getClassLoader());
  }
 else {
    throw new IllegalStateException("Only AvroKey and AvroValue are supported.");
  }
}
 

Example 8

From project avro, under directory /lang/java/mapred/src/main/java/org/apache/avro/mapreduce/.

Source file: AvroMultipleOutputs.java

  32 
vote

/** 
 * Adds a named output for the job. <p/>
 * @param job               job to add the named output
 * @param namedOutput       named output name, it has to be a word, lettersand numbers only, cannot be the word 'part' as that is reserved for the default output.
 * @param outputFormatClass OutputFormat class.
 * @param keySchema          Schema for the Key
 * @param valueSchema        Schema for the Value (used in case of AvroKeyValueOutputFormat or null)
 */
@SuppressWarnings("unchecked") public static void addNamedOutput(Job job,String namedOutput,Class<? extends OutputFormat> outputFormatClass,Schema keySchema,Schema valueSchema){
  checkNamedOutputName(job,namedOutput,true);
  Configuration conf=job.getConfiguration();
  conf.set(MULTIPLE_OUTPUTS,conf.get(MULTIPLE_OUTPUTS,"") + " " + namedOutput);
  conf.setClass(MO_PREFIX + namedOutput + FORMAT,outputFormatClass,OutputFormat.class);
  keySchemas.put(namedOutput + "_KEYSCHEMA",keySchema);
  valSchemas.put(namedOutput + "_VALSCHEMA",valueSchema);
}
 

Example 9

From project avro-utils, under directory /src/main/java/com/tomslabs/grid/avro/.

Source file: AvroFileOutputFormat.java

  32 
vote

@Override public RecordWriter<T,Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
  Configuration config=context.getConfiguration();
  Schema schema=getWriteSchema(config);
  DatumWriter<T> datumWriter=getDatumWriter(config);
  final DataFileWriter<T> writer=new DataFileWriter<T>(datumWriter);
  if (getCompressOutput(context)) {
    int level=config.getInt(DEFLATE_LEVEL_KEY,DEFAULT_DEFLATE_LEVEL);
    writer.setCodec(CodecFactory.deflateCodec(level));
  }
  Path file=getDefaultWorkFile(context,EXT);
  FileSystem fs=file.getFileSystem(config);
  writer.create(schema,fs.create(file));
  return new AvroRecordWriter<T>(writer);
}
 

Example 10

From project avro-utils, under directory /src/main/java/com/tomslabs/grid/avro/.

Source file: AvroRecordReader.java

  32 
vote

@Override public void initialize(InputSplit split,TaskAttemptContext context) throws IOException, InterruptedException {
  FileSplit fileSplit=(FileSplit)split;
  Configuration config=context.getConfiguration();
  Path path=fileSplit.getPath();
  this.in=new FsInput(path,config);
  DatumReader<T> datumReader=getDatumReader(config);
  this.reader=new DataFileReader<T>(in,datumReader);
  reader.sync(fileSplit.getStart());
  this.start=in.tell();
  this.end=fileSplit.getStart() + split.getLength();
}
 

Example 11

From project azkaban, under directory /azkaban/src/java/azkaban/web/pages/.

Source file: HdfsBrowserServlet.java

  32 
vote

@Override public void init(ServletConfig config) throws ServletException {
  super.init(config);
  try {
    Configuration conf=new Configuration();
    conf.setClassLoader(this.getApplication().getClassLoader());
    _fs=FileSystem.get(conf);
  }
 catch (  IOException e) {
    throw new ServletException(e);
  }
}
 

Example 12

From project bagheera, under directory /src/main/java/com/mozilla/bagheera/sink/.

Source file: HBaseSink.java

  32 
vote

public HBaseSink(String tableName,String family,String qualifier,boolean prefixDate){
  this.tableName=Bytes.toBytes(tableName);
  this.family=Bytes.toBytes(family);
  this.qualifier=Bytes.toBytes(qualifier);
  this.prefixDate=prefixDate;
  Configuration conf=HBaseConfiguration.create();
  hbasePool=new HTablePool(conf,hbasePoolSize);
  stored=Metrics.newMeter(new MetricName("bagheera","sink.hbase.",tableName + ".stored"),"messages",TimeUnit.SECONDS);
}
 

Example 13

From project big-data-plugin, under directory /shims/common/test-src/org/pentaho/hadoop/mapreduce/test/.

Source file: MRUtilTest.java

  32 
vote

@Test public void getPluginDirProperty() throws KettleException {
  final String USER_DIR=System.getProperty("user.dir");
  final Configuration c=new Configuration();
  assertNull(c.get(MRUtil.PROPERTY_PENTAHO_KETTLE_PLUGINS_DIR));
  String pluginDirProperty=MRUtil.getPluginDirProperty(c);
  assertTrue("Plugin Directory Property not configured as expected: " + pluginDirProperty,pluginDirProperty.endsWith(USER_DIR));
}
 

Example 14

From project big-data-plugin, under directory /shims/common/test-src/org/pentaho/hadoop/mapreduce/test/.

Source file: MRUtilTest.java

  32 
vote

@Test public void getPluginDirProperty_explicitly_set() throws KettleException {
  final String PLUGIN_DIR="/opt/pentaho";
  final Configuration c=new Configuration();
  c.set(MRUtil.PROPERTY_PENTAHO_KETTLE_PLUGINS_DIR,PLUGIN_DIR);
  String pluginDirProperty=MRUtil.getPluginDirProperty(c);
  assertTrue("Plugin Directory Property not configured as expected: " + pluginDirProperty,pluginDirProperty.endsWith(PLUGIN_DIR));
}
 

Example 15

From project C-Cat, under directory /core/src/main/java/gov/llnl/ontology/mapreduce/.

Source file: CorpusTableMR.java

  32 
vote

/** 
 * Initializes the  {@link CorpusTable} for this {@link CorpusTableMapper} and calls {@link #setup(Context,Configuration)}.
 */
public void setup(Context context) throws IOException, InterruptedException {
  context.setStatus("Setup");
  Configuration conf=context.getConfiguration();
  table=ReflectionUtil.getObjectInstance(conf.get(TABLE));
  table.table();
  context.setStatus("CorpusTable created");
  setup(context,conf);
}
 

Example 16

From project C-Cat, under directory /core/src/main/java/gov/llnl/ontology/mapreduce/ingest/.

Source file: ImportCorpusMR.java

  32 
vote

/** 
 * {@inheritDoc}
 */
public void setup(Context context){
  Configuration conf=context.getConfiguration();
  table=ReflectionUtil.getObjectInstance(conf.get(TABLE));
  table.table();
  reader=ReflectionUtil.getObjectInstance(conf.get(READER));
  corpusName=conf.get(CORP,"");
}
 

Example 17

From project cdh-mapreduce-ext, under directory /src/main/java/org/apache/hadoop/mapreduce/lib/input/.

Source file: SequenceFileAsBinaryInputFormat.java

  32 
vote

public void initialize(InputSplit split,TaskAttemptContext context) throws IOException, InterruptedException {
  Path path=((FileSplit)split).getPath();
  Configuration conf=context.getConfiguration();
  FileSystem fs=path.getFileSystem(conf);
  this.in=new SequenceFile.Reader(fs,path,conf);
  this.end=((FileSplit)split).getStart() + split.getLength();
  if (((FileSplit)split).getStart() > in.getPosition()) {
    in.sync(((FileSplit)split).getStart());
  }
  this.start=in.getPosition();
  vbytes=in.createValueBytes();
  done=start >= end;
}
 

Example 18

From project cdh-mapreduce-ext, under directory /src/main/java/org/apache/hadoop/mapreduce/lib/output/.

Source file: SequenceFileAsBinaryOutputFormat.java

  32 
vote

protected SequenceFile.Writer getSequenceWriter(TaskAttemptContext context,Class<?> keyClass,Class<?> valueClass) throws IOException {
  Configuration conf=context.getConfiguration();
  CompressionCodec codec=null;
  CompressionType compressionType=CompressionType.NONE;
  if (getCompressOutput(context)) {
    compressionType=getOutputCompressionType(context);
    Class<?> codecClass=getOutputCompressorClass(context,DefaultCodec.class);
    codec=(CompressionCodec)ReflectionUtils.newInstance(codecClass,conf);
  }
  Path file=getDefaultWorkFile(context,"");
  FileSystem fs=file.getFileSystem(conf);
  return SequenceFile.createWriter(fs,conf,file,keyClass,valueClass,compressionType,codec,context);
}
 

Example 19

From project action-core, under directory /src/main/java/com/ning/metrics/action/binder/modules/.

Source file: FileSystemAccessProvider.java

  31 
vote

@Inject public FileSystemAccessProvider(final ActionCoreConfig actionCoreConfig) throws IOException {
  final Configuration hadoopConfig=new Configuration();
  final String hfsHost=actionCoreConfig.getNamenodeUrl();
  if (hfsHost.isEmpty()) {
    hadoopConfig.set("fs.default.name","file:///");
  }
 else {
    hadoopConfig.set("fs.default.name",hfsHost);
  }
  hadoopConfig.setInt("dfs.socket.timeout",actionCoreConfig.getHadoopSocketTimeOut());
  hadoopConfig.setBoolean("fs.automatic.close",false);
  hadoopConfig.setLong("dfs.block.size",actionCoreConfig.getHadoopBlockSize());
  hadoopConfig.set("hadoop.job.ugi",actionCoreConfig.getHadoopUgi());
  hadoopConfig.setStrings("io.serializations",HadoopThriftWritableSerialization.class.getName(),HadoopThriftEnvelopeSerialization.class.getName(),HadoopSmileOutputStreamSerialization.class.getName(),"org.apache.hadoop.io.serializer.WritableSerialization",actionCoreConfig.getSerializations());
  fileSystemAccess=new FileSystemAccess(hadoopConfig);
}
 

Example 20

From project aim3-tu-berlin, under directory /seminar/exercises/datamining/core/src/main/java/de/tuberlin/dima/aim/exercises/hadoop/.

Source file: HadoopJob.java

  31 
vote

protected Job prepareJob(Path inputPath,Path outputPath,Class<? extends InputFormat> inputFormat,Class<? extends Mapper> mapper,Class<? extends Writable> mapperKey,Class<? extends Writable> mapperValue,Class<? extends Reducer> reducer,Class<? extends Writable> reducerKey,Class<? extends Writable> reducerValue,Class<? extends OutputFormat> outputFormat) throws IOException {
  Job job=new Job(new Configuration(getConf()));
  Configuration jobConf=job.getConfiguration();
  if (reducer.equals(Reducer.class)) {
    if (mapper.equals(Mapper.class)) {
      throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    }
    job.setJarByClass(mapper);
  }
 else {
    job.setJarByClass(reducer);
  }
  job.setInputFormatClass(inputFormat);
  jobConf.set("mapred.input.dir",inputPath.toString());
  job.setMapperClass(mapper);
  job.setMapOutputKeyClass(mapperKey);
  job.setMapOutputValueClass(mapperValue);
  jobConf.setBoolean("mapred.compress.map.output",true);
  job.setReducerClass(reducer);
  job.setOutputKeyClass(reducerKey);
  job.setOutputValueClass(reducerValue);
  job.setJobName(getCustomJobName(job,mapper,reducer));
  job.setOutputFormatClass(outputFormat);
  jobConf.set("mapred.output.dir",outputPath.toString());
  return job;
}
 

Example 21

From project aim3-tu-berlin, under directory /seminar/exercises/datamining/core/src/test/java/de/tuberlin/dima/aim/exercises/.

Source file: HadoopAndPactTestcase.java

  31 
vote

protected final Path getTestTempDirPath() throws IOException {
  if (testTempDirPath == null) {
    fs=FileSystem.get(new Configuration());
    long simpleRandomLong=(long)(Long.MAX_VALUE * Math.random());
    testTempDirPath=fs.makeQualified(new Path("/tmp/mahout-" + getClass().getSimpleName() + '-'+ simpleRandomLong));
    if (!fs.mkdirs(testTempDirPath)) {
      throw new IOException("Could not create " + testTempDirPath);
    }
    fs.deleteOnExit(testTempDirPath);
  }
  return testTempDirPath;
}
 

Example 22

From project akela, under directory /src/main/java/com/mozilla/hadoop/.

Source file: Backup.java

  31 
vote

public void setup(Context context){
  Configuration conf=context.getConfiguration();
  try {
    String backupInputPath=conf.get("backup.input.path");
    if (!backupInputPath.endsWith(Path.SEPARATOR)) {
      backupInputPath+=Path.SEPARATOR;
    }
    filePattern=Pattern.compile(backupInputPath + "(.+)");
    inputFs=FileSystem.get(new Path(backupInputPath).toUri(),context.getConfiguration());
    outputRootPath=conf.get("backup.output.path");
    if (!outputRootPath.endsWith(Path.SEPARATOR)) {
      outputRootPath+=Path.SEPARATOR;
    }
    outputFs=FileSystem.get(new Path(outputRootPath).toUri(),conf);
  }
 catch (  IOException e) {
    throw new RuntimeException("Could not get FileSystem",e);
  }
}
 

Example 23

From project azkaban, under directory /contrib/mr-kluj/src/main/java/com/linkedin/json/.

Source file: JsonSequenceFileInputFormat.java

  31 
vote

@Override public RecordReader<Object,Object> createRecordReader(final InputSplit split,final TaskAttemptContext context) throws IOException {
  Configuration conf=context.getConfiguration();
  String inputPathString=((FileSplit)split).getPath().toUri().getPath();
  log.info("Input file path:" + inputPathString);
  Path inputPath=new Path(inputPathString);
  SequenceFile.Reader reader=new SequenceFile.Reader(inputPath.getFileSystem(conf),inputPath,conf);
  SequenceFile.Metadata meta=reader.getMetadata();
  try {
    final Text keySchema=meta.get(new Text("key.schema"));
    final Text valueSchema=meta.get(new Text("value.schema"));
    if (0 == keySchema.getLength() || 0 == valueSchema.getLength()) {
      throw new Exception(String.format("Cannot have a 0 length schema. keySchema[%s], valueSchema[%s]",keySchema,valueSchema));
    }
    return new JsonObjectRecordReader(new JsonTypeSerializer(keySchema.toString()),new JsonTypeSerializer(valueSchema.toString()),baseInputFormat.createRecordReader(split,context));
  }
 catch (  Exception e) {
    throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n");
  }
}
 

Example 24

From project book, under directory /src/test/java/com/tamingtext/mahout/.

Source file: VectorExamplesTest.java

  31 
vote

@Test public void testProgrammatic() throws Exception {
  double[] vals=new double[]{0.3,1.8,200.228};
  Vector dense=new DenseVector(vals);
  assertTrue(dense.size() == 3);
  Vector sparseSame=new SequentialAccessSparseVector(3);
  Vector sparse=new SequentialAccessSparseVector(3000);
  for (int i=0; i < vals.length; i++) {
    sparseSame.set(i,vals[i]);
    sparse.set(i,vals[i]);
  }
  assertFalse(dense.equals(sparse));
  assertEquals(dense,sparseSame);
  assertFalse(sparse.equals(sparseSame));
  File tmpDir=new File(System.getProperty("java.io.tmpdir"));
  File tmpLoc=new File(tmpDir,"sfvwt");
  tmpLoc.mkdirs();
  File tmpFile=File.createTempFile("sfvwt",".dat",tmpLoc);
  Path path=new Path(tmpFile.getAbsolutePath());
  Configuration conf=new Configuration();
  FileSystem fs=FileSystem.get(conf);
  SequenceFile.Writer seqWriter=SequenceFile.createWriter(fs,conf,path,LongWritable.class,VectorWritable.class);
  VectorWriter vecWriter=new SequenceFileVectorWriter(seqWriter);
  List<Vector> vectors=new ArrayList<Vector>();
  vectors.add(sparse);
  vectors.add(sparseSame);
  vecWriter.write(vectors);
  vecWriter.close();
}
 

Example 25

From project cascading, under directory /src/hadoop/cascading/tuple/hadoop/.

Source file: TupleSerialization.java

  31 
vote

public TupleSerialization(final FlowProcess<JobConf> flowProcess){
  super(new Configuration(){
    @Override public String get(    String name){
      return get(name,null);
    }
    @Override public String get(    String name,    String defaultValue){
      Object value=flowProcess.getProperty(name);
      return value == null ? defaultValue : String.valueOf(value);
    }
  }
);
}
 

Example 26

From project bagheera, under directory /src/main/java/com/mozilla/bagheera/sink/.

Source file: SequenceFileSink.java

  30 
vote

public SequenceFileSink(String namespace,String baseDirPath,String dateFormat,long maxFileSize,boolean useBytesValue) throws IOException {
  LOG.info("Initializing writer for namespace: " + namespace);
  conf=new Configuration();
  conf.setBoolean("fs.automatic.close",false);
  hdfs=FileSystem.newInstance(conf);
  this.useBytesValue=useBytesValue;
  this.maxFileSize=maxFileSize;
  sdf=new SimpleDateFormat(dateFormat);
  if (!baseDirPath.endsWith(Path.SEPARATOR)) {
    baseDir=new Path(baseDirPath + Path.SEPARATOR + namespace+ Path.SEPARATOR+ sdf.format(new Date(System.currentTimeMillis())));
  }
 else {
    baseDir=new Path(baseDirPath + namespace + Path.SEPARATOR+ sdf.format(new Date(System.currentTimeMillis())));
  }
  initWriter();
  stored=Metrics.newMeter(new MetricName("bagheera","sink.hdfs.",namespace + ".stored"),"messages",TimeUnit.SECONDS);
}
 

Example 27

From project cascading, under directory /src/hadoop/cascading/flow/hadoop/util/.

Source file: HadoopUtil.java

  29 
vote

public static <T>ObjectSerializer instantiateSerializer(Configuration conf,Class<T> type) throws ClassNotFoundException {
  Class<ObjectSerializer> flowSerializerClass;
  String serializerClassName=conf.get(ObjectSerializer.OBJECT_SERIALIZER_PROPERTY);
  if (serializerClassName == null || serializerClassName.length() == 0)   flowSerializerClass=(Class<ObjectSerializer>)DEFAULT_OBJECT_SERIALIZER;
 else   flowSerializerClass=(Class<ObjectSerializer>)Class.forName(serializerClassName);
  ObjectSerializer objectSerializer;
  try {
    objectSerializer=flowSerializerClass.newInstance();
    if (objectSerializer instanceof Configurable)     ((Configurable)objectSerializer).setConf(conf);
  }
 catch (  Exception exception) {
    exception.printStackTrace();
    throw new IllegalArgumentException("Unable to instantiate serializer \"" + flowSerializerClass.getName() + "\" for class: "+ type.getName());
  }
  if (!objectSerializer.accepts(type))   throw new IllegalArgumentException(serializerClassName + " won't accept objects of class " + type.toString());
  return objectSerializer;
}