Java Code Examples for org.apache.hadoop.conf.Configuration
The following code examples are extracted from open source projects. You can click to
vote up the examples that are useful to you.
Example 1
From project akela, under directory /src/main/java/com/mozilla/hadoop/.
Source file: UnknownPathFinder.java

public static void main(String[] args) throws IOException { int retCode=0; Configuration hbaseConf=HBaseConfiguration.create(new Configuration()); Path hbaseRootDir=new Path(hbaseConf.get("hbase.rootdir")); Set<String> knownRegionPaths=getRegionPaths(hbaseRootDir,HConstants.META_TABLE_NAME); Set<String> fsPaths=getFilesystemPaths(hbaseConf,hbaseRootDir); fsPaths.removeAll(knownRegionPaths); for ( String p : fsPaths) { System.out.println(p); } System.exit(retCode); }
Example 2
From project behemoth, under directory /core/src/test/java/com/digitalpebble/behemoth/.
Source file: DocumentFilterTest.java

public void testURLFilter(){ Configuration config=BehemothConfiguration.create(); config.set(DocumentFilter.DocumentFilterParamNameURLFilterKeep,".+"); DocumentFilter filter=DocumentFilter.getFilters(config); BehemothDocument doc=new BehemothDocument(); doc.getMetadata(true).put(new Text("lang"),new Text("en")); boolean kept=filter.keep(doc); assertEquals(false,kept); doc=new BehemothDocument(); doc.setUrl("any random rubbish will do"); kept=filter.keep(doc); assertEquals(true,kept); }
Example 3
From project behemoth, under directory /core/src/test/java/com/digitalpebble/behemoth/.
Source file: DocumentFilterTest.java

public void testURLFilterRequired(){ Configuration config=BehemothConfiguration.create(); assertEquals(false,DocumentFilter.isRequired(config)); config.set(DocumentFilter.DocumentFilterParamNameURLFilterKeep,".+"); assertEquals(true,DocumentFilter.isRequired(config)); }
Example 4
From project ambrose, under directory /pig/src/main/java/com/twitter/ambrose/pig/.
Source file: AmbrosePigProgressNotificationListener.java

/** * Collects statistics from JobStats and builds a nested Map of values. Subsclass ond override if you'd like to generate different stats. * @param scriptId * @param stats * @return */ protected JobInfo collectStats(String scriptId,JobStats stats){ Properties jobConfProperties=new Properties(); if (stats.getInputs() != null && stats.getInputs().size() > 0 && stats.getInputs().get(0).getConf() != null) { Configuration conf=stats.getInputs().get(0).getConf(); for ( Map.Entry<String,String> entry : conf) { jobConfProperties.setProperty(entry.getKey(),entry.getValue()); } if (workflowVersion == null) { workflowVersion=conf.get("pig.logical.plan.signature"); } } return new PigJobInfo(stats,jobConfProperties); }
Example 5
From project archive-commons, under directory /ia-tools/src/main/java/org/archive/hadoop/io/.
Source file: MergeClusterRangesInputFormat.java

@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { Configuration conf=context.getConfiguration(); Reader r=getSplitReader(conf); SplitFile splitFile=new SplitFile(); splitFile.read(r); r.close(); ArrayList<InputSplit> splits=new ArrayList<InputSplit>(); for (int i=0; i < splitFile.size(); i++) { MergeClusterRangesInputSplit split=new MergeClusterRangesInputSplit(splitFile.size() - i,splitFile.getStart(i),splitFile.getEnd(i),getClusterPaths(conf)); splits.add(split); LOG.warning(String.format("Added split(%d) (%s)-(%s)",splitFile.size() - i,splitFile.getStart(i),splitFile.getEnd(i))); } return splits; }
Example 6
From project archive-commons, under directory /ia-tools/src/main/java/org/archive/hadoop/mapreduce/.
Source file: GZIPRangeLineDereferencingRecordReader.java

@Override public void initialize(InputSplit split,TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf=context.getConfiguration(); FileSplit fileSplit=(FileSplit)split; loader=new HDFSBlockLoader(fileSplit.getPath().getFileSystem(conf)); skipBad=getSkipBadGZIPRanges(conf); super.initialize(split,context); }
Example 7
From project avro, under directory /lang/java/mapred/src/main/java/org/apache/avro/hadoop/io/.
Source file: AvroSerialization.java

/** * Gets an object capable of deserializing the output from a Mapper. * @param c The class to get a deserializer for. * @return A deserializer for objects of class <code>c</code>. */ @Override public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c){ Configuration conf=getConf(); if (AvroKey.class.isAssignableFrom(c)) { return new AvroKeyDeserializer<T>(getKeyWriterSchema(conf),getKeyReaderSchema(conf),conf.getClassLoader()); } else if (AvroValue.class.isAssignableFrom(c)) { return new AvroValueDeserializer<T>(getValueWriterSchema(conf),getValueReaderSchema(conf),conf.getClassLoader()); } else { throw new IllegalStateException("Only AvroKey and AvroValue are supported."); } }
Example 8
From project avro, under directory /lang/java/mapred/src/main/java/org/apache/avro/mapreduce/.
Source file: AvroMultipleOutputs.java

/** * Adds a named output for the job. <p/> * @param job job to add the named output * @param namedOutput named output name, it has to be a word, lettersand numbers only, cannot be the word 'part' as that is reserved for the default output. * @param outputFormatClass OutputFormat class. * @param keySchema Schema for the Key * @param valueSchema Schema for the Value (used in case of AvroKeyValueOutputFormat or null) */ @SuppressWarnings("unchecked") public static void addNamedOutput(Job job,String namedOutput,Class<? extends OutputFormat> outputFormatClass,Schema keySchema,Schema valueSchema){ checkNamedOutputName(job,namedOutput,true); Configuration conf=job.getConfiguration(); conf.set(MULTIPLE_OUTPUTS,conf.get(MULTIPLE_OUTPUTS,"") + " " + namedOutput); conf.setClass(MO_PREFIX + namedOutput + FORMAT,outputFormatClass,OutputFormat.class); keySchemas.put(namedOutput + "_KEYSCHEMA",keySchema); valSchemas.put(namedOutput + "_VALSCHEMA",valueSchema); }
Example 9
From project avro-utils, under directory /src/main/java/com/tomslabs/grid/avro/.
Source file: AvroFileOutputFormat.java

@Override public RecordWriter<T,Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration config=context.getConfiguration(); Schema schema=getWriteSchema(config); DatumWriter<T> datumWriter=getDatumWriter(config); final DataFileWriter<T> writer=new DataFileWriter<T>(datumWriter); if (getCompressOutput(context)) { int level=config.getInt(DEFLATE_LEVEL_KEY,DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } Path file=getDefaultWorkFile(context,EXT); FileSystem fs=file.getFileSystem(config); writer.create(schema,fs.create(file)); return new AvroRecordWriter<T>(writer); }
Example 10
From project avro-utils, under directory /src/main/java/com/tomslabs/grid/avro/.
Source file: AvroRecordReader.java

@Override public void initialize(InputSplit split,TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fileSplit=(FileSplit)split; Configuration config=context.getConfiguration(); Path path=fileSplit.getPath(); this.in=new FsInput(path,config); DatumReader<T> datumReader=getDatumReader(config); this.reader=new DataFileReader<T>(in,datumReader); reader.sync(fileSplit.getStart()); this.start=in.tell(); this.end=fileSplit.getStart() + split.getLength(); }
Example 11
From project azkaban, under directory /azkaban/src/java/azkaban/web/pages/.
Source file: HdfsBrowserServlet.java

@Override public void init(ServletConfig config) throws ServletException { super.init(config); try { Configuration conf=new Configuration(); conf.setClassLoader(this.getApplication().getClassLoader()); _fs=FileSystem.get(conf); } catch ( IOException e) { throw new ServletException(e); } }
Example 12
From project bagheera, under directory /src/main/java/com/mozilla/bagheera/sink/.
Source file: HBaseSink.java

public HBaseSink(String tableName,String family,String qualifier,boolean prefixDate){ this.tableName=Bytes.toBytes(tableName); this.family=Bytes.toBytes(family); this.qualifier=Bytes.toBytes(qualifier); this.prefixDate=prefixDate; Configuration conf=HBaseConfiguration.create(); hbasePool=new HTablePool(conf,hbasePoolSize); stored=Metrics.newMeter(new MetricName("bagheera","sink.hbase.",tableName + ".stored"),"messages",TimeUnit.SECONDS); }
Example 13
From project big-data-plugin, under directory /shims/common/test-src/org/pentaho/hadoop/mapreduce/test/.
Source file: MRUtilTest.java

@Test public void getPluginDirProperty() throws KettleException { final String USER_DIR=System.getProperty("user.dir"); final Configuration c=new Configuration(); assertNull(c.get(MRUtil.PROPERTY_PENTAHO_KETTLE_PLUGINS_DIR)); String pluginDirProperty=MRUtil.getPluginDirProperty(c); assertTrue("Plugin Directory Property not configured as expected: " + pluginDirProperty,pluginDirProperty.endsWith(USER_DIR)); }
Example 14
From project big-data-plugin, under directory /shims/common/test-src/org/pentaho/hadoop/mapreduce/test/.
Source file: MRUtilTest.java

@Test public void getPluginDirProperty_explicitly_set() throws KettleException { final String PLUGIN_DIR="/opt/pentaho"; final Configuration c=new Configuration(); c.set(MRUtil.PROPERTY_PENTAHO_KETTLE_PLUGINS_DIR,PLUGIN_DIR); String pluginDirProperty=MRUtil.getPluginDirProperty(c); assertTrue("Plugin Directory Property not configured as expected: " + pluginDirProperty,pluginDirProperty.endsWith(PLUGIN_DIR)); }
Example 15
From project C-Cat, under directory /core/src/main/java/gov/llnl/ontology/mapreduce/.
Source file: CorpusTableMR.java

/** * Initializes the {@link CorpusTable} for this {@link CorpusTableMapper} and calls {@link #setup(Context,Configuration)}. */ public void setup(Context context) throws IOException, InterruptedException { context.setStatus("Setup"); Configuration conf=context.getConfiguration(); table=ReflectionUtil.getObjectInstance(conf.get(TABLE)); table.table(); context.setStatus("CorpusTable created"); setup(context,conf); }
Example 16
From project C-Cat, under directory /core/src/main/java/gov/llnl/ontology/mapreduce/ingest/.
Source file: ImportCorpusMR.java

/** * {@inheritDoc} */ public void setup(Context context){ Configuration conf=context.getConfiguration(); table=ReflectionUtil.getObjectInstance(conf.get(TABLE)); table.table(); reader=ReflectionUtil.getObjectInstance(conf.get(READER)); corpusName=conf.get(CORP,""); }
Example 17
From project cdh-mapreduce-ext, under directory /src/main/java/org/apache/hadoop/mapreduce/lib/input/.
Source file: SequenceFileAsBinaryInputFormat.java

public void initialize(InputSplit split,TaskAttemptContext context) throws IOException, InterruptedException { Path path=((FileSplit)split).getPath(); Configuration conf=context.getConfiguration(); FileSystem fs=path.getFileSystem(conf); this.in=new SequenceFile.Reader(fs,path,conf); this.end=((FileSplit)split).getStart() + split.getLength(); if (((FileSplit)split).getStart() > in.getPosition()) { in.sync(((FileSplit)split).getStart()); } this.start=in.getPosition(); vbytes=in.createValueBytes(); done=start >= end; }
Example 18
From project cdh-mapreduce-ext, under directory /src/main/java/org/apache/hadoop/mapreduce/lib/output/.
Source file: SequenceFileAsBinaryOutputFormat.java

protected SequenceFile.Writer getSequenceWriter(TaskAttemptContext context,Class<?> keyClass,Class<?> valueClass) throws IOException { Configuration conf=context.getConfiguration(); CompressionCodec codec=null; CompressionType compressionType=CompressionType.NONE; if (getCompressOutput(context)) { compressionType=getOutputCompressionType(context); Class<?> codecClass=getOutputCompressorClass(context,DefaultCodec.class); codec=(CompressionCodec)ReflectionUtils.newInstance(codecClass,conf); } Path file=getDefaultWorkFile(context,""); FileSystem fs=file.getFileSystem(conf); return SequenceFile.createWriter(fs,conf,file,keyClass,valueClass,compressionType,codec,context); }
Example 19
From project action-core, under directory /src/main/java/com/ning/metrics/action/binder/modules/.
Source file: FileSystemAccessProvider.java

@Inject public FileSystemAccessProvider(final ActionCoreConfig actionCoreConfig) throws IOException { final Configuration hadoopConfig=new Configuration(); final String hfsHost=actionCoreConfig.getNamenodeUrl(); if (hfsHost.isEmpty()) { hadoopConfig.set("fs.default.name","file:///"); } else { hadoopConfig.set("fs.default.name",hfsHost); } hadoopConfig.setInt("dfs.socket.timeout",actionCoreConfig.getHadoopSocketTimeOut()); hadoopConfig.setBoolean("fs.automatic.close",false); hadoopConfig.setLong("dfs.block.size",actionCoreConfig.getHadoopBlockSize()); hadoopConfig.set("hadoop.job.ugi",actionCoreConfig.getHadoopUgi()); hadoopConfig.setStrings("io.serializations",HadoopThriftWritableSerialization.class.getName(),HadoopThriftEnvelopeSerialization.class.getName(),HadoopSmileOutputStreamSerialization.class.getName(),"org.apache.hadoop.io.serializer.WritableSerialization",actionCoreConfig.getSerializations()); fileSystemAccess=new FileSystemAccess(hadoopConfig); }
Example 20
From project aim3-tu-berlin, under directory /seminar/exercises/datamining/core/src/main/java/de/tuberlin/dima/aim/exercises/hadoop/.
Source file: HadoopJob.java

protected Job prepareJob(Path inputPath,Path outputPath,Class<? extends InputFormat> inputFormat,Class<? extends Mapper> mapper,Class<? extends Writable> mapperKey,Class<? extends Writable> mapperValue,Class<? extends Reducer> reducer,Class<? extends Writable> reducerKey,Class<? extends Writable> reducerValue,Class<? extends OutputFormat> outputFormat) throws IOException { Job job=new Job(new Configuration(getConf())); Configuration jobConf=job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir",inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output",true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setJobName(getCustomJobName(job,mapper,reducer)); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir",outputPath.toString()); return job; }
Example 21
From project aim3-tu-berlin, under directory /seminar/exercises/datamining/core/src/test/java/de/tuberlin/dima/aim/exercises/.
Source file: HadoopAndPactTestcase.java

protected final Path getTestTempDirPath() throws IOException { if (testTempDirPath == null) { fs=FileSystem.get(new Configuration()); long simpleRandomLong=(long)(Long.MAX_VALUE * Math.random()); testTempDirPath=fs.makeQualified(new Path("/tmp/mahout-" + getClass().getSimpleName() + '-'+ simpleRandomLong)); if (!fs.mkdirs(testTempDirPath)) { throw new IOException("Could not create " + testTempDirPath); } fs.deleteOnExit(testTempDirPath); } return testTempDirPath; }
Example 22
public void setup(Context context){ Configuration conf=context.getConfiguration(); try { String backupInputPath=conf.get("backup.input.path"); if (!backupInputPath.endsWith(Path.SEPARATOR)) { backupInputPath+=Path.SEPARATOR; } filePattern=Pattern.compile(backupInputPath + "(.+)"); inputFs=FileSystem.get(new Path(backupInputPath).toUri(),context.getConfiguration()); outputRootPath=conf.get("backup.output.path"); if (!outputRootPath.endsWith(Path.SEPARATOR)) { outputRootPath+=Path.SEPARATOR; } outputFs=FileSystem.get(new Path(outputRootPath).toUri(),conf); } catch ( IOException e) { throw new RuntimeException("Could not get FileSystem",e); } }
Example 23
From project azkaban, under directory /contrib/mr-kluj/src/main/java/com/linkedin/json/.
Source file: JsonSequenceFileInputFormat.java

@Override public RecordReader<Object,Object> createRecordReader(final InputSplit split,final TaskAttemptContext context) throws IOException { Configuration conf=context.getConfiguration(); String inputPathString=((FileSplit)split).getPath().toUri().getPath(); log.info("Input file path:" + inputPathString); Path inputPath=new Path(inputPathString); SequenceFile.Reader reader=new SequenceFile.Reader(inputPath.getFileSystem(conf),inputPath,conf); SequenceFile.Metadata meta=reader.getMetadata(); try { final Text keySchema=meta.get(new Text("key.schema")); final Text valueSchema=meta.get(new Text("value.schema")); if (0 == keySchema.getLength() || 0 == valueSchema.getLength()) { throw new Exception(String.format("Cannot have a 0 length schema. keySchema[%s], valueSchema[%s]",keySchema,valueSchema)); } return new JsonObjectRecordReader(new JsonTypeSerializer(keySchema.toString()),new JsonTypeSerializer(valueSchema.toString()),baseInputFormat.createRecordReader(split,context)); } catch ( Exception e) { throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n"); } }
Example 24
From project book, under directory /src/test/java/com/tamingtext/mahout/.
Source file: VectorExamplesTest.java

@Test public void testProgrammatic() throws Exception { double[] vals=new double[]{0.3,1.8,200.228}; Vector dense=new DenseVector(vals); assertTrue(dense.size() == 3); Vector sparseSame=new SequentialAccessSparseVector(3); Vector sparse=new SequentialAccessSparseVector(3000); for (int i=0; i < vals.length; i++) { sparseSame.set(i,vals[i]); sparse.set(i,vals[i]); } assertFalse(dense.equals(sparse)); assertEquals(dense,sparseSame); assertFalse(sparse.equals(sparseSame)); File tmpDir=new File(System.getProperty("java.io.tmpdir")); File tmpLoc=new File(tmpDir,"sfvwt"); tmpLoc.mkdirs(); File tmpFile=File.createTempFile("sfvwt",".dat",tmpLoc); Path path=new Path(tmpFile.getAbsolutePath()); Configuration conf=new Configuration(); FileSystem fs=FileSystem.get(conf); SequenceFile.Writer seqWriter=SequenceFile.createWriter(fs,conf,path,LongWritable.class,VectorWritable.class); VectorWriter vecWriter=new SequenceFileVectorWriter(seqWriter); List<Vector> vectors=new ArrayList<Vector>(); vectors.add(sparse); vectors.add(sparseSame); vecWriter.write(vectors); vecWriter.close(); }
Example 25
From project cascading, under directory /src/hadoop/cascading/tuple/hadoop/.
Source file: TupleSerialization.java

public TupleSerialization(final FlowProcess<JobConf> flowProcess){ super(new Configuration(){ @Override public String get( String name){ return get(name,null); } @Override public String get( String name, String defaultValue){ Object value=flowProcess.getProperty(name); return value == null ? defaultValue : String.valueOf(value); } } ); }
Example 26
From project bagheera, under directory /src/main/java/com/mozilla/bagheera/sink/.
Source file: SequenceFileSink.java

public SequenceFileSink(String namespace,String baseDirPath,String dateFormat,long maxFileSize,boolean useBytesValue) throws IOException { LOG.info("Initializing writer for namespace: " + namespace); conf=new Configuration(); conf.setBoolean("fs.automatic.close",false); hdfs=FileSystem.newInstance(conf); this.useBytesValue=useBytesValue; this.maxFileSize=maxFileSize; sdf=new SimpleDateFormat(dateFormat); if (!baseDirPath.endsWith(Path.SEPARATOR)) { baseDir=new Path(baseDirPath + Path.SEPARATOR + namespace+ Path.SEPARATOR+ sdf.format(new Date(System.currentTimeMillis()))); } else { baseDir=new Path(baseDirPath + namespace + Path.SEPARATOR+ sdf.format(new Date(System.currentTimeMillis()))); } initWriter(); stored=Metrics.newMeter(new MetricName("bagheera","sink.hdfs.",namespace + ".stored"),"messages",TimeUnit.SECONDS); }
Example 27
From project cascading, under directory /src/hadoop/cascading/flow/hadoop/util/.
Source file: HadoopUtil.java

public static <T>ObjectSerializer instantiateSerializer(Configuration conf,Class<T> type) throws ClassNotFoundException { Class<ObjectSerializer> flowSerializerClass; String serializerClassName=conf.get(ObjectSerializer.OBJECT_SERIALIZER_PROPERTY); if (serializerClassName == null || serializerClassName.length() == 0) flowSerializerClass=(Class<ObjectSerializer>)DEFAULT_OBJECT_SERIALIZER; else flowSerializerClass=(Class<ObjectSerializer>)Class.forName(serializerClassName); ObjectSerializer objectSerializer; try { objectSerializer=flowSerializerClass.newInstance(); if (objectSerializer instanceof Configurable) ((Configurable)objectSerializer).setConf(conf); } catch ( Exception exception) { exception.printStackTrace(); throw new IllegalArgumentException("Unable to instantiate serializer \"" + flowSerializerClass.getName() + "\" for class: "+ type.getName()); } if (!objectSerializer.accepts(type)) throw new IllegalArgumentException(serializerClassName + " won't accept objects of class " + type.toString()); return objectSerializer; }