Java Code Examples for org.apache.hadoop.io.Text

The following code examples are extracted from open source projects. You can click to vote up the examples that are useful to you.

Example 1

From project akela, under directory /src/main/java/com/mozilla/pig/load/.

Source file: DateRangeLoader.java

  22 
vote

@Override public Tuple getNext() throws IOException {
  Tuple t=null;
  while (reader.nextKeyValue()) {
    Text val=reader.getCurrentValue();
    if (val != null) {
      String line=val.toString();
      t=TupleFactory.getInstance().newTuple();
      t.append(line);
    }
  }
  return t;
}
 

Example 2

From project asakusafw-examples, under directory /example-wordcount/src/main/java/com/example/wordcount/operator/.

Source file: WordCountOperator.java

  22 
vote

@CoGroup public void split(@Key(group={}) List<LogLine> lines,Result<WordCount> result){
  for (  LogLine line : lines) {
    Text query=line.getQuery();
    if (query.toString().length() == 0) {
      continue;
    }
    for (    String word : query.toString().split(SPACE)) {
      WordCount count=new WordCount();
      count.setWordAsString(word);
      count.setCount(1);
      result.add(count);
    }
  }
}
 

Example 3

From project avro, under directory /lang/java/mapred/src/main/java/org/apache/avro/mapred/.

Source file: AvroTextOutputFormat.java

  22 
vote

private ByteBuffer toByteBuffer(Object o) throws IOException {
  if (o instanceof Text) {
    Text to=(Text)o;
    return ByteBuffer.wrap(to.getBytes(),0,to.getLength());
  }
 else {
    return ByteBuffer.wrap(o.toString().getBytes(UTF8));
  }
}
 

Example 4

From project big-data-plugin, under directory /shims/common/src-mapred/org/pentaho/hadoop/mapreduce/converter/converters/.

Source file: KettleTypeToTextConverter.java

  22 
vote

@Override public Text convert(ValueMetaInterface meta,Object obj) throws TypeConversionException {
  try {
    Text text=new Text();
    text.set(meta.getString(obj));
    return text;
  }
 catch (  KettleValueException ex) {
    throw new TypeConversionException(BaseMessages.getString(TypeConverterFactory.class,"ErrorConverting",Text.class.getSimpleName(),obj),ex);
  }
}
 

Example 5

From project aim3-tu-berlin, under directory /seminar/exercises/datamining/core/src/main/java/de/tuberlin/dima/aim/exercises/two/.

Source file: SecondarySortBookSort.java

  21 
vote

@Override protected void reduce(BookSortKey bookSortKey,Iterable<Text> values,Context ctx) throws IOException, InterruptedException {
  for (  Text value : values) {
    String out=Joiner.on('\t').skipNulls().join(new Object[]{bookSortKey.toString(),value.toString()});
    ctx.write(new Text(out),NullWritable.get());
  }
}
 

Example 6

From project archive-commons, under directory /ia-tools/src/main/java/org/archive/hadoop/io/.

Source file: MergeClusterRangesInputFormat.java

  21 
vote

@Override public boolean nextKeyValue() throws IOException, InterruptedException {
  if (key == null) {
    key=new Long(0);
  }
  if (value == null) {
    value=new Text();
  }
  if (itr.hasNext()) {
    key=new Long(key.longValue() + 1);
    value.set(itr.next());
    return true;
  }
  return false;
}
 

Example 7

From project avro-utils, under directory /src/test/java/com/tomslabs/grid/avro/.

Source file: AvroRecordToJSONStringTest.java

  21 
vote

@Test public void test() throws Throwable {
  String[] words=new String[]{"foo","foo","bar","baz","foo","baz"};
  String avroFile=AvroIOUtils.createAvroInputFile(inputDir,words);
  JobConf job=new JobConf();
  FileSplit split=new FileSplit(new Path(avroFile),0,4096,job);
  AvroTextRecordReader reader=new AvroTextRecordReader(job,split);
  Text value=reader.createKey();
  Text key=reader.createValue();
  for (  String word : words) {
    Assert.assertTrue(reader.next(key,value));
    String jsonString="{\"word\": \"" + word + "\"}";
    Assert.assertEquals(jsonString,key.toString());
    Assert.assertEquals("",value.toString());
  }
  Assert.assertFalse(reader.next(key,value));
}
 

Example 8

From project azkaban, under directory /azkaban-common/src/java/azkaban/common/web/.

Source file: JsonSequenceFileViewer.java

  21 
vote

public void displaySequenceFile(SequenceFile.Reader reader,PrintWriter output,int startLine,int endLine) throws IOException {
  if (logger.isDebugEnabled())   logger.debug("display json file");
  try {
    BytesWritable keyWritable=new BytesWritable();
    BytesWritable valueWritable=new BytesWritable();
    Text keySchema=reader.getMetadata().get(new Text("key.schema"));
    Text valueSchema=reader.getMetadata().get(new Text("value.schema"));
    JsonTypeSerializer keySerializer=new JsonTypeSerializer(keySchema.toString());
    JsonTypeSerializer valueSerializer=new JsonTypeSerializer(valueSchema.toString());
    for (int i=1; i < startLine; i++)     reader.next(keyWritable,valueWritable);
    for (int i=startLine; i <= endLine; i++) {
      boolean readSomething=reader.next(keyWritable,valueWritable);
      if (!readSomething)       break;
      output.write(safeToString(keySerializer.toObject(keyWritable.getBytes())));
      output.write("\t=>\t");
      output.write(safeToString(valueSerializer.toObject(valueWritable.getBytes())));
      output.write("\n");
      output.flush();
    }
  }
  finally {
    reader.close();
  }
}
 

Example 9

From project babel, under directory /src/babel/prep/datedcorpus/.

Source file: DatedCorpusGenReducer.java

  21 
vote

public void reduce(Text key,Iterator<PageVersion> values,OutputCollector<Text,Text> output,Reporter reporter) throws IOException {
  StringBuilder strBld=new StringBuilder();
  HashSet<String> seenVers=new HashSet<String>();
  String content;
  while (values.hasNext()) {
    content=values.next().getContent().trim();
    if (!seenVers.contains(content)) {
      seenVers.add(content);
      strBld.append(content + "\n\n");
    }
  }
  output.collect(key,new Text(strBld.toString()));
}
 

Example 10

From project behemoth, under directory /core/src/main/java/com/digitalpebble/behemoth/util/.

Source file: ContentExtractor.java

  21 
vote

private void generateDocs(Path input,Path dir,int[] count) throws IOException, ArchiveException {
  DocumentFilter docFilter=DocumentFilter.getFilters(getConf());
  Reader[] cacheReaders=SequenceFileOutputFormat.getReaders(getConf(),input);
  for (  Reader current : cacheReaders) {
    Text key=new Text();
    BehemothDocument inputDoc=new BehemothDocument();
    while (current.next(key,inputDoc)) {
      count[0]++;
      if (!docFilter.keep(inputDoc))       continue;
      if (dumpBinary && inputDoc.getContent() == null)       continue;
 else       if (!dumpBinary && inputDoc.getText() == null)       continue;
      String fileName=Integer.toString(count[0]);
      String urldoc=inputDoc.getUrl();
      if (mode.equals(FileNamingMode.URL) && urldoc != null && urldoc.length() > 0) {
        fileName=URLEncoder.encode(urldoc,"UTF-8");
      }
 else       if (mode.equals(FileNamingMode.UUID) && urldoc != null && urldoc.length() > 0) {
        fileName=UUID.nameUUIDFromBytes(urldoc.getBytes()).toString();
      }
 else {
        fileName=String.format("%09d",count[0]);
      }
      if (!dumpBinary)       fileName+=".txt";
      byte[] contentBytes;
      if (dumpBinary)       contentBytes=inputDoc.getContent();
 else       contentBytes=inputDoc.getText().getBytes("UTF-8");
      addToArchive(fileName,contentBytes,dir);
      index.writeBytes(urldoc + "\t" + fileName+ "\t"+ String.format("%06d",partNum)+ "\n");
    }
    current.close();
  }
}
 

Example 11

From project C-Cat, under directory /core/src/main/java/gov/llnl/ontology/mapreduce/ingest/.

Source file: ExtractNounPairsMR.java

  21 
vote

private void emitPath(String first,String second,String source,DependencyPath path,Context context) throws IOException, InterruptedException {
  StringBuilder builder=new StringBuilder();
  for (  DependencyRelation relation : path)   builder.append(relation.relation()).append(":");
  String key=first + ":" + second+ ":"+ source;
  context.write(new Text(key),new Text(builder.toString()));
}
 

Example 12

From project cdh-mapreduce-ext, under directory /src/main/java/org/apache/hadoop/mapreduce/lib/input/.

Source file: SequenceFileAsTextRecordReader.java

  21 
vote

/** 
 * Read key/value pair in a line. 
 */
public synchronized boolean nextKeyValue() throws IOException, InterruptedException {
  if (!sequenceFileRecordReader.nextKeyValue()) {
    return false;
  }
  if (key == null) {
    key=new Text();
  }
  if (value == null) {
    value=new Text();
  }
  key.set(sequenceFileRecordReader.getCurrentKey().toString());
  value.set(sequenceFileRecordReader.getCurrentValue().toString());
  return true;
}
 

Example 13

From project cdh-maven-archetype, under directory /hive-udf/src/main/java/thiswillbereplaced/.

Source file: CustomGenericUDF.java

  21 
vote

@Override public Object evaluate(DeferredObject[] arguments) throws HiveException {
  if (arguments[0].get() == null || arguments[1].get() == null) {
    return null;
  }
  Text subtext=(Text)converters[0].convert(arguments[0].get());
  Text text=(Text)converters[1].convert(arguments[1].get());
  int start=1;
  if (arguments.length == 3) {
    IntWritable startWritable=(IntWritable)converters[2].convert(arguments[2].get());
    if (startWritable == null) {
      intWritable.set(0);
      return intWritable;
    }
    start=startWritable.get();
  }
  intWritable.set(GenericUDFUtils.findText(text,subtext,start - 1) + 1);
  return intWritable;
}
 

Example 14

From project bagheera, under directory /src/main/java/com/mozilla/bagheera/sink/.

Source file: SequenceFileSink.java

  20 
vote

@Override public void store(String key,byte[] data){
  try {
    lock.acquire();
    checkRollover();
    if (useBytesValue) {
      writer.append(new Text(key),new BytesWritable(data));
    }
 else {
      writer.append(new Text(key),new Text(data));
    }
    stored.mark();
    bytesWritten.getAndAdd(key.length() + data.length);
  }
 catch (  IOException e) {
    LOG.error("IOException while writing key/value pair",e);
    throw new RuntimeException(e);
  }
catch (  InterruptedException e) {
    LOG.error("Interrupted while writing key/value pair",e);
  }
 finally {
    lock.release();
  }
}
 

Example 15

From project action-core, under directory /src/main/java/com/ning/metrics/action/hdfs/data/parser/.

Source file: WritableRowSerializer.java

  19 
vote

@Override public Rows toRows(Registrar r,Object value) throws RowAccessException {
  Row row;
  if (value instanceof Text) {
    String[] data=value.toString().split("\t");
    List<ColumnKey> columnKeyList=new ArrayList<ColumnKey>();
    for (int i=0; i < data.length; i++) {
      columnKeyList.add(new DynamicColumnKey(String.valueOf("col-" + i)));
    }
    row=RowFactory.getRow(new RowSchema("Text",columnKeyList),Arrays.asList(data));
  }
 else   if (value instanceof BytesWritable) {
    byte[] data=((BytesWritable)value).getBytes();
    ArrayList<String> listData=new ArrayList<String>();
    listData.add(new String(data,Charset.forName("UTF-8")));
    row=RowFactory.getRow(new RowSchema("BytesWritable",new DynamicColumnKey(String.valueOf("col-1"))),listData);
  }
 else   if (value instanceof RowThrift) {
    row=(RowThrift)value;
  }
 else {
    throw new RowAccessException(String.format("Writable [%s] is not a known row type",value == null ? null : value.getClass()));
  }
  Rows rows=new Rows();
  rows.add(row);
  return rows;
}
 

Example 16

From project ARCInputFormat, under directory /src/org/commoncrawl/hadoop/io/.

Source file: ARCSplit.java

  19 
vote

/** 
 * @inheritDoc
 */
public void readFields(DataInput in) throws IOException {
  int nResources=in.readInt();
  resources=new ARCResource[nResources];
  for (int i=0; i < nResources; i++) {
    resources[i]=new ARCResource(Text.readString(in),in.readLong());
  }
  size=in.readLong();
  hosts=null;
}
 

Example 17

From project cascading, under directory /src/hadoop/cascading/scheme/hadoop/.

Source file: TextLine.java

  19 
vote

@Override public void sinkConfInit(FlowProcess<JobConf> flowProcess,Tap<JobConf,RecordReader,OutputCollector> tap,JobConf conf){
  if (tap.getFullIdentifier(conf).toString().endsWith(".zip"))   throw new IllegalStateException("cannot write zip files: " + FileOutputFormat.getOutputPath(conf));
  if (getSinkCompression() == Compress.DISABLE)   conf.setBoolean("mapred.output.compress",false);
 else   if (getSinkCompression() == Compress.ENABLE)   conf.setBoolean("mapred.output.compress",true);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(Text.class);
  conf.setOutputFormat(TextOutputFormat.class);
}