Java Code Examples for org.apache.hadoop.mapreduce.Job

The following code examples are extracted from open source projects. You can click to vote up the examples that are useful to you.

Example 1

From project aim3-tu-berlin, under directory /seminar/exercises/datamining/core/src/main/java/de/tuberlin/dima/aim/exercises/one/.

Source file: FilteringWordCount.java

  22 
vote

@Override public int run(String[] args) throws Exception {
  Map<String,String> parsedArgs=parseArgs(args);
  Path inputPath=new Path(parsedArgs.get("--input"));
  Path outputPath=new Path(parsedArgs.get("--output"));
  Job wordCount=prepareJob(inputPath,outputPath,TextInputFormat.class,FilteringWordCountMapper.class,Text.class,IntWritable.class,WordCountReducer.class,Text.class,IntWritable.class,TextOutputFormat.class);
  wordCount.waitForCompletion(true);
  return 0;
}
 

Example 2

From project avro, under directory /lang/java/mapred/src/test/java/org/apache/avro/hadoop/io/.

Source file: TestAvroSerialization.java

  22 
vote

@Test public void testGetSerializerForKey() throws IOException {
  Schema writerSchema=Schema.create(Schema.Type.STRING);
  Job job=new Job();
  AvroJob.setMapOutputKeySchema(job,writerSchema);
  AvroSerialization serialization=ReflectionUtils.newInstance(AvroSerialization.class,job.getConfiguration());
  @SuppressWarnings("unchecked") Serializer<AvroWrapper> serializer=serialization.getSerializer(AvroKey.class);
  assertTrue(serializer instanceof AvroSerializer);
  AvroSerializer avroSerializer=(AvroSerializer)serializer;
  assertEquals(writerSchema,avroSerializer.getWriterSchema());
}
 

Example 3

From project cdh-mapreduce-ext, under directory /src/test/java/org/apache/hadoop/mapreduce/lib/jobcontrol/.

Source file: TestMapReduceJobControlWithMocks.java

  22 
vote

private Job createJob(boolean complete,boolean successful) throws IOException, InterruptedException {
  Job mockJob=mock(Job.class);
  when(mockJob.getConfiguration()).thenReturn(new Configuration());
  when(mockJob.isComplete()).thenReturn(complete);
  when(mockJob.isSuccessful()).thenReturn(successful);
  return mockJob;
}
 

Example 4

From project chombo, under directory /src/main/java/org/chombo/mr/.

Source file: Combinator.java

  22 
vote

@Override public int run(String[] args) throws Exception {
  Job job=new Job(getConf());
  String jobName="Combinator  MR";
  job.setJobName(jobName);
  job.setJarByClass(Combinator.class);
  FileInputFormat.addInputPath(job,new Path(args[0]));
  FileOutputFormat.setOutputPath(job,new Path(args[1]));
  Utility.setConfiguration(job.getConfiguration());
  job.setMapperClass(Combinator.CombinatorMapper.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(Text.class);
  int status=job.waitForCompletion(true) ? 0 : 1;
  return status;
}
 

Example 5

From project Cloud9, under directory /src/dist/edu/umd/cloud9/example/simple/.

Source file: DemoMapreduceNullInput.java

  22 
vote

/** 
 * Runs the demo.
 */
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  Job job=new Job(new Configuration(),"DemoMapreduceNullInput");
  job.setJarByClass(DemoMapreduceNullInput.class);
  job.setNumReduceTasks(0);
  job.setInputFormatClass(NullInputFormat.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setMapperClass(MyMapper.class);
  job.waitForCompletion(true);
}
 

Example 6

From project crunch, under directory /crunch/src/main/java/org/apache/hadoop/mapreduce/lib/output/.

Source file: CrunchMultipleOutputs.java

  22 
vote

private TaskAttemptContext getContext(String nameOutput) throws IOException {
  TaskAttemptContext taskContext=taskContexts.get(nameOutput);
  if (taskContext != null) {
    return taskContext;
  }
  Job job=new Job(context.getConfiguration());
  job.getConfiguration().set("crunch.namedoutput",nameOutput);
  job.setOutputFormatClass(getNamedOutputFormatClass(context,nameOutput));
  job.setOutputKeyClass(getNamedOutputKeyClass(context,nameOutput));
  job.setOutputValueClass(getNamedOutputValueClass(context,nameOutput));
  taskContext=TaskAttemptContextFactory.create(job.getConfiguration(),context.getTaskAttemptID());
  taskContexts.put(nameOutput,taskContext);
  return taskContext;
}
 

Example 7

From project datasalt-utils, under directory /src/test/java/com/datasalt/utils/mapred/joiner/.

Source file: TestJoinOneToMany.java

  22 
vote

protected Job getMultiJoiner(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException {
  JoinOneToMany multiJoiner=new JoinOneToMany("MultiJoiner Test",conf);
  multiJoiner.setReducer(Reducer.class);
  multiJoiner.setOutputKeyClass(Text.class);
  multiJoiner.setOutputValueClass(NullWritable.class);
  multiJoiner.setOutputFormat(TextOutputFormat.class);
  multiJoiner.setOutputPath(new Path(OUTPUT));
  Job job=multiJoiner.setManySideClass(Text.class).setOneSideClass(IntWritable.class).addManySideInput(new Path(INPUT1),TextInputFormat.class,ManyMap.class).addOneSideInput(new Path(INPUT2),TextInputFormat.class,OneMap.class).getJob();
  return job;
}
 

Example 8

From project gora, under directory /gora-core/src/examples/java/org/apache/gora/examples/mapreduce/.

Source file: QueryCounter.java

  22 
vote

/** 
 * Creates and returns the  {@link Job} for submitting to Hadoop mapreduce.
 * @param dataStore
 * @param query
 * @return
 * @throws IOException
 */
public Job createJob(DataStore<K,T> dataStore,Query<K,T> query) throws IOException {
  Job job=new Job(getConf());
  job.setJobName("QueryCounter");
  job.setNumReduceTasks(0);
  job.setJarByClass(getClass());
  GoraMapper.initMapperJob(job,query,dataStore,NullWritable.class,NullWritable.class,QueryCounterMapper.class,true);
  job.setOutputFormatClass(NullOutputFormat.class);
  return job;
}
 

Example 9

From project hadoop-book_1, under directory /src/main/java/com/manning/hip/ch1/.

Source file: InvertedIndexMapReduce.java

  22 
vote

public static void runJob(String[] input,String output) throws Exception {
  Configuration conf=new Configuration();
  Job job=new Job(conf);
  job.setJarByClass(InvertedIndexMapReduce.class);
  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  Path outputPath=new Path(output);
  FileInputFormat.setInputPaths(job,StringUtils.join(input,","));
  FileOutputFormat.setOutputPath(job,outputPath);
  outputPath.getFileSystem(conf).delete(outputPath,true);
  job.waitForCompletion(true);
}
 

Example 10

From project akela, under directory /src/main/java/com/mozilla/hadoop/.

Source file: Backup.java

  21 
vote

public int run(String[] args) throws Exception {
  if (args.length < 2) {
    return printUsage();
  }
  int rc=-1;
  Job job=initJob(args);
  job.waitForCompletion(true);
  if (job.isSuccessful()) {
    rc=0;
    FileSystem hdfs=null;
    try {
      hdfs=FileSystem.get(job.getConfiguration());
      hdfs.delete(new Path(NAME + "-inputsource*.txt"),false);
    }
  finally {
      checkAndClose(hdfs);
    }
  }
  return rc;
}
 

Example 11

From project avro-utils, under directory /src/test/java/com/tomslabs/grid/avro/.

Source file: AvroWordCount.java

  21 
vote

public static Job createSubmitableJob(final Configuration conf,final Path inputPath,final Path outputPath) throws IOException {
  conf.set(AvroFileOutputFormat.OUTPUT_SCHEMA,WordCountSchema.getSchema().toString());
  conf.setInt("mapred.max.split.size",1024000);
  conf.setInt("mapred.reduce.tasks",10);
  conf.setBoolean("mapred.reduce.tasks.speculative.execution",true);
  final Job job=new Job(conf,"Word Count");
  job.setJarByClass(AvroWordCount.class);
  job.setInputFormatClass(AvroFileInputFormat.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(IntWritable.class);
  job.setMapperClass(WordCountMapper.class);
  job.setReducerClass(WordCountReducer.class);
  job.setOutputKeyClass(GenericRecord.class);
  job.setOutputValueClass(NullWritable.class);
  job.setOutputFormatClass(AvroFileOutputFormat.class);
  AvroFileOutputFormat.setDeflateLevel(job,3);
  FileInputFormat.addInputPath(job,inputPath);
  FileOutputFormat.setOutputPath(job,outputPath);
  return job;
}
 

Example 12

From project behemoth, under directory /io/src/main/java/com/digitalpebble/behemoth/io/sequencefile/.

Source file: SequenceFileConverterJob.java

  21 
vote

public int run(String[] args) throws Exception {
  int result=0;
  addInputOption();
  addOutputOption();
  if (parseArguments(args) == null) {
    return -1;
  }
  Path input=getInputPath();
  Path output=getOutputPath();
  Job job=prepareJob(input,output,SequenceFileInputFormat.class,SequenceFileConverterMapper.class,Text.class,BehemothDocument.class,SequenceFileOutputFormat.class);
  job.setJobName("Convert Sequence File: " + input);
  job.waitForCompletion(true);
  if (log.isInfoEnabled()) {
    log.info("Conversion: done");
  }
  return result;
}
 

Example 13

From project C-Cat, under directory /core/src/main/java/gov/llnl/ontology/mapreduce/.

Source file: CorpusTableMR.java

  21 
vote

/** 
 * {@inheritDoc}
 */
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  MRArgOptions options=new MRArgOptions();
  addOptions(options);
  LOG.info("Parse Options");
  options.parseOptions(args);
  validateOptions(options);
  LOG.info("Setup Configuration");
  Configuration conf=getConf();
  conf.set(TABLE,options.corpusTableType());
  setupConfiguration(options,conf);
  LOG.info("Setup Table Scan");
  CorpusTable table=options.corpusTable();
  Scan scan=new Scan();
  table.setupScan(scan,options.sourceCorpus());
  LOG.info("Setup Job");
  Job job=new Job(conf,jobName());
  job.setJarByClass(CorpusTableMR.class);
  TableMapReduceUtil.initTableMapperJob(table.tableName(),scan,mapperClass(),mapperKeyClass(),mapperValueClass(),job);
  setupReducer(table.tableName(),job,options);
  LOG.info("Start Mapper job");
  job.waitForCompletion(true);
  LOG.info("Job Completed");
  return 0;
}
 

Example 14

From project cdh-maven-archetype, under directory /hbase-driver/src/main/java/com/cloudera/hbase/.

Source file: WordCount.java

  21 
vote

public int run(String[] args) throws Exception {
  if (args.length != 2) {
    System.err.println("Usage: wordcount <in> <out>");
    return 2;
  }
  Configuration conf=getConf();
  Job job=new Job(conf,"word count");
  job.setJarByClass(WordCount.class);
  job.setMapperClass(Map.class);
  job.setCombinerClass(Reduce.class);
  job.setReducerClass(Reduce.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  FileInputFormat.addInputPath(job,new Path(args[0]));
  FileOutputFormat.setOutputPath(job,new Path(args[1]));
  return job.waitForCompletion(true) ? 0 : 1;
}
 

Example 15

From project data-bus, under directory /databus-worker/src/main/java/com/inmobi/databus/local/.

Source file: LocalStreamService.java

  21 
vote

@Override protected void execute() throws Exception {
  try {
    FileSystem fs=FileSystem.get(cluster.getHadoopConf());
    cleanUpTmp(fs);
    LOG.info("TmpPath is [" + tmpPath + "]");
    publishMissingPaths(fs,cluster.getLocalFinalDestDirRoot());
    Map<FileStatus,String> fileListing=new TreeMap<FileStatus,String>();
    Set<FileStatus> trashSet=new HashSet<FileStatus>();
    Map<String,FileStatus> checkpointPaths=new TreeMap<String,FileStatus>();
    createMRInput(tmpJobInputPath,fileListing,trashSet,checkpointPaths);
    if (fileListing.size() == 0) {
      LOG.info("Nothing to do!");
      return;
    }
    Job job=createJob(tmpJobInputPath);
    job.waitForCompletion(true);
    if (job.isSuccessful()) {
      long commitTime=cluster.getCommitTime();
      LOG.info("Commiting mvPaths and ConsumerPaths");
      commit(prepareForCommit(commitTime,fileListing));
      checkPoint(checkpointPaths);
      LOG.info("Commiting trashPaths");
      commit(populateTrashCommitPaths(trashSet));
      LOG.info("Committed successfully at " + getLogDateString(commitTime));
    }
  }
 catch (  Exception e) {
    LOG.warn("Error in running LocalStreamService " + e);
    throw e;
  }
}
 

Example 16

From project DistCpV2-0.20.203, under directory /src/main/java/org/apache/hadoop/tools/.

Source file: DistCp.java

  21 
vote

/** 
 * Create Job object for submitting it, with all the configuration
 * @return Reference to job object.
 * @throws IOException - Exception if any
 */
protected Job createJob() throws IOException {
  String jobName="distcp";
  String userChosenName=getConf().get("mapred.job.name");
  if (userChosenName != null)   jobName+=": " + userChosenName;
  Job job=new Job(getConf(),jobName);
  job.setInputFormatClass(DistCpUtils.getStrategy(getConf(),inputOptions));
  job.setJarByClass(CopyMapper.class);
  configureOutputFormat(job);
  job.setMapperClass(CopyMapper.class);
  job.setNumReduceTasks(0);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  job.setOutputFormatClass(CopyOutputFormat.class);
  job.getConfiguration().set("mapred.map.tasks.speculative.execution","false");
  job.getConfiguration().set(DistCpConstants.CONF_LABEL_NUM_MAPS,String.valueOf(inputOptions.getMaxMaps()));
  if (inputOptions.getSslConfigurationFile() != null) {
    setupSSLConfig(job.getConfiguration());
  }
  inputOptions.appendToConf(job.getConfiguration());
  return job;
}
 

Example 17

From project elephant-twin, under directory /com.twitter.elephanttwin/src/main/java/com/twitter/elephanttwin/indexing/.

Source file: AbstractBlockIndexingJob.java

  21 
vote

/** 
 * Sets up various job properites required for the indexing job. If your implementation needs to mess with the conf, you can do so by overriding this method (remember to call super.setupJob()!) or in setMapper().
 * @param conf
 * @return
 * @throws IOException
 */
protected Job setupJob(Configuration conf) throws IOException {
  Job job=new Job(new Configuration(conf));
  job.setJarByClass(getClass());
  job.setInputFormatClass(BlockIndexedFileInputFormat.class);
  job.setReducerClass(MapFileIndexingReducer.class);
  job.setMapOutputKeyClass(TextLongPairWritable.class);
  job.setMapOutputValueClass(LongPairWritable.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(ListLongPair.class);
  job.setPartitionerClass(TextLongPairWritable.Parititioner.class);
  job.setSortComparatorClass(TextLongPairWritable.PairComparator.class);
  job.setGroupingComparatorClass(TextLongPairWritable.KeyOnlyComparator.class);
  job.setOutputFormatClass(MapFileOutputFormat.class);
  job.setNumReduceTasks(getNumPartitions());
  BlockIndexedFileInputFormat.setIndexOptions(job,getInputFormat(),getValueClass(),getIndex(),getColumnName());
  return job;
}
 

Example 18

From project exo-training, under directory /hadoop-sample/simple/src/main/java/org/sample/hadoop/.

Source file: WordCount.java

  21 
vote

public static void main(String[] args) throws Exception {
  Configuration configuration=new Configuration();
  String[] otherArgs=new GenericOptionsParser(configuration,args).getRemainingArgs();
  if (otherArgs.length != 2) {
    System.err.println("Usage: wordcount <in> <out>");
    System.exit(2);
  }
  Job job=new Job();
  job.setJarByClass(WordCount.class);
  job.setJobName(WordCount.class.getSimpleName());
  job.setCombinerClass(IntSumReducer.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  FileInputFormat.addInputPath(job,new Path(otherArgs[0]));
  FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 

Example 19

From project extramuros, under directory /java/src/extramuros/java/jobs/clustering/proclus/algorithm/.

Source file: ProClusClusterEvaluationJob.java

  21 
vote

public void run() throws Exception {
  log.info("SETTING INPUT CLUSTER:" + clusterSetInputFile.toUri().toString());
  getConf().set(ProClusConfigKeys.SET_PATH,clusterSetInputFile.toUri().toString());
  log.info("*** RUNNING CLUSTERS EVALUATION - ITERATION " + iteration);
  Job job=new Job(getConf(),"Clusters evaluation algorithm execution, output: " + output.toUri().getPath() + " input:"+ input.toUri().getPath());
  job.setMapperClass(ProClusPointsAssignmentMapper.class);
  job.setReducerClass(ProClusClusterEvaluationReducer.class);
  job.setMapOutputKeyClass(Cluster.class);
  job.setMapOutputValueClass(VectorWritable.class);
  job.setOutputKeyClass(Cluster.class);
  job.setOutputValueClass(DoubleWritable.class);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  String inputFiles=ProClusUtils.composeInputPathString(fs,input);
  FileInputFormat.addInputPaths(job,inputFiles);
  FileOutputFormat.setOutputPath(job,output);
  job.setJarByClass(ProClusClusterEvaluationJob.class);
  if (!job.waitForCompletion(true)) {
    throw new InterruptedException("Clusters evaluation algorithm failed processing " + input.toUri().getPath() + " output: "+ output.toUri().getPath());
  }
  ProClusUtils.cleanOutput(fs,output);
  setClusterSet(new ClusterSet());
  setEvaluationMetric(computeFinalMetric(getClusterSet()));
  log.info(" EVALUATION METRIC: " + getEvaluationMetric() + " - "+ getClusterSet().bestClustersCount()+ " best clusters found");
  writeOutputSequence();
}
 

Example 20

From project giraph, under directory /src/main/java/org/apache/giraph/graph/.

Source file: GiraphJob.java

  21 
vote

/** 
 * Runs the actual graph application through Hadoop Map-Reduce.
 * @param verbose If true, provide verbose output, false otherwise
 * @return True if success, false otherwise
 * @throws ClassNotFoundException
 * @throws InterruptedException
 * @throws IOException
 */
public final boolean run(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException {
  setIntConfIfDefault("mapreduce.job.counters.limit",512);
  setIntConfIfDefault("mapred.job.map.memory.mb",1024);
  setIntConfIfDefault("mapred.job.reduce.memory.mb",1024);
  giraphConfiguration.setBoolean("mapred.map.tasks.speculative.execution",false);
  Client.setPingInterval(giraphConfiguration,60000 * 5);
  giraphConfiguration.setBoolean("mapreduce.user.classpath.first",true);
  ImmutableClassesGiraphConfiguration immutableClassesGiraphConfiguration=new ImmutableClassesGiraphConfiguration(giraphConfiguration);
  checkConfiguration(immutableClassesGiraphConfiguration);
  checkLocalJobRunnerConfiguration(immutableClassesGiraphConfiguration);
  Job submittedJob=new Job(immutableClassesGiraphConfiguration,jobName);
  if (submittedJob.getJar() == null) {
    submittedJob.setJarByClass(GiraphJob.class);
  }
  submittedJob.setNumReduceTasks(0);
  submittedJob.setMapperClass(GraphMapper.class);
  submittedJob.setInputFormatClass(BspInputFormat.class);
  submittedJob.setOutputFormatClass(BspOutputFormat.class);
  return submittedJob.waitForCompletion(verbose);
}
 

Example 21

From project HadoopIlluminated, under directory /src/main/java/com/hadoopilluminated/ch01/.

Source file: MRWordCount21.java

  21 
vote

@Override public int run(String[] args) throws Exception {
  System.out.println("Running MR: MRWordCount21");
  Job job=new Job(getConf());
  job.setJarByClass(MRWordCount21.class);
  job.setJobName("MRWordCount21");
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setMapperClass(Map21.class);
  job.setCombinerClass(Reduce21.class);
  job.setReducerClass(Reduce21.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  System.out.println("Input path: " + args[0]);
  System.out.println("Output path: " + args[1]);
  FileInputFormat.setInputPaths(job,new Path(args[0]));
  FileOutputFormat.setOutputPath(job,new Path(args[1]));
  boolean success=job.waitForCompletion(true);
  return success ? 0 : 1;
}
 

Example 22

From project archive-commons, under directory /ia-tools/src/main/java/org/archive/hadoop/storage/.

Source file: ZipNumStorage.java

  19 
vote

@Override public void setStoreLocation(String path,Job job) throws IOException {
  System.err.format("Set partitioner class\n");
  job.getConfiguration().set("mapreduce.partitioner.class",AlphaPartitioner.class.getCanonicalName());
  job.getConfiguration().set("mapred.textoutputformat.separator","");
  FileOutputFormat.setOutputPath(job,new Path(path));
}
 

Example 23

From project chukwa, under directory /contrib/chukwa-pig/src/java/org/apache/hadoop/chukwa/pig/.

Source file: ChukwaLoader.java

  19 
vote

@Override public ResourceSchema getSchema(String s,Job job) throws IOException {
  Schema newSchema=new Schema();
  newSchema.add(new Schema.FieldSchema("timestamp",DataType.LONG));
  newSchema.add(new Schema.FieldSchema("map",DataType.MAP));
  return new ResourceSchema(newSchema);
}
 

Example 24

From project ecoadapters, under directory /ecoadapters/src/main/java/com/inadco/ecoadapters/pig/.

Source file: HBaseProtobufLoader.java

  19 
vote

@Override public ResourceSchema getSchema(String location,Job job) throws IOException {
  Schema ps=new Schema();
  ps.add(new FieldSchema(HBaseProtobufStorage.HKEY_ALIAS,DataType.BYTEARRAY));
  for (int i=0; i < m_colSpec.m_pigSchema.length; i++) {
    String colName=Bytes.toString(m_colSpec.m_fams[i]) + "::" + Bytes.toString(m_colSpec.m_cols[i]);
    if (m_colSpec.m_pigSchema[i] != null)     ps.add(new FieldSchema(colName,m_colSpec.m_pigSchema[i],DataType.TUPLE));
 else     ps.add(new FieldSchema(colName,DataType.BYTEARRAY));
    String timestampName=colName + "::timestamp";
    ps.add(new FieldSchema(timestampName,DataType.LONG));
  }
  return new ResourceSchema(ps);
}
 

Example 25

From project grouperfish, under directory /transforms/coclustering/src/main/java/com/mozilla/grouperfish/transforms/coclustering/pig/storage/.

Source file: MahoutVectorStorage.java

  19 
vote

public static void cleanupOnFailureImpl(String location,Job job) throws IOException {
  Path path=new Path(location);
  FileSystem fs=path.getFileSystem(job.getConfiguration());
  if (fs.exists(path)) {
    fs.delete(path,true);
  }
}