Java Code Examples for org.apache.lucene.analysis.Analyzer

The following code examples are extracted from open source projects. You can click to vote up the examples that are useful to you.

Example 1

From project book, under directory /src/main/java/com/tamingtext/classifier/mlt/.

Source file: MoreLikeThisCategorizer.java

  22 
vote

public void setNgramSize(int size){
  if (size <= 1)   return;
  Analyzer a=moreLikeThis.getAnalyzer();
  ShingleAnalyzerWrapper sw;
  if (a instanceof ShingleAnalyzerWrapper) {
    sw=(ShingleAnalyzerWrapper)a;
  }
 else {
    sw=new ShingleAnalyzerWrapper(a);
    moreLikeThis.setAnalyzer(sw);
  }
  sw.setMaxShingleSize(size);
  sw.setMinShingleSize(size);
}
 

Example 2

From project couchdb-lucene, under directory /src/test/java/com/github/rnewson/couchdb/lucene/util/.

Source file: AnalyzersTest.java

  22 
vote

@Test public void testPerField() throws Exception {
  final Analyzer analyzer=Analyzers.getAnalyzer("perfield:{name:\"standard\",age:\"keyword\"}");
  assertThat(analyzer,is(PerFieldAnalyzerWrapper.class));
  assertThat(analyzer.toString(),containsString("default=org.apache.lucene.analysis.standard.StandardAnalyzer"));
  assertThat(analyzer.toString(),containsString("name=org.apache.lucene.analysis.standard.StandardAnalyzer"));
  assertThat(analyzer.toString(),containsString("age=org.apache.lucene.analysis.KeywordAnalyzer"));
}
 

Example 3

From project en4j, under directory /NBPlatformApp/SearchLucene/src/main/java/com/rubenlaguna/en4j/searchlucene/.

Source file: IndexWriterWrapper.java

  22 
vote

void initWriter(){
  if (indexWriterInstance == null) {
    try {
      File file=getDirectoryFile();
      final Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_29);
      final FSDirectory theDir=FSDirectory.open(file);
      indexWriterInstance=new IndexWriter(theDir,analyzer,IndexWriter.MaxFieldLength.UNLIMITED);
      indexWriterInstance.setUseCompoundFile(true);
    }
 catch (    IOException ex) {
      Exceptions.printStackTrace(ex);
    }
  }
}
 

Example 4

From project indextank-engine, under directory /src/main/java/com/flaptor/indextank/query/analyzers/.

Source file: CompositeAnalyzer.java

  22 
vote

@Override public final TokenStream tokenStream(String fieldName,Reader reader){
  Analyzer analyzer=perfieldsAnalyzers.get(fieldName);
  if (analyzer == null) {
    analyzer=defaultAnalyzer;
  }
  return analyzer.tokenStream(fieldName,reader);
}
 

Example 5

From project jspwiki, under directory /src/org/apache/wiki/search/.

Source file: LuceneSearchProvider.java

  22 
vote

private Analyzer getLuceneAnalyzer() throws ProviderException {
  try {
    Class clazz=ClassUtil.findClass("",m_analyzerClass);
    Constructor constructor=clazz.getConstructor(Version.LUCENE_36.getClass());
    Analyzer analyzer=(Analyzer)constructor.newInstance(Version.LUCENE_36);
    return analyzer;
  }
 catch (  Exception e) {
    String msg="Could not get LuceneAnalyzer class " + m_analyzerClass + ", reason: ";
    log.error(msg,e);
    throw new ProviderException(msg + e);
  }
}
 

Example 6

From project jsword, under directory /src/main/java/org/crosswire/jsword/index/lucene/analysis/.

Source file: LuceneAnalyzer.java

  22 
vote

public LuceneAnalyzer(Book book){
  analyzer=new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
  if (IndexMetadata.instance().getInstalledIndexVersion() > IndexMetadata.INDEX_VERSION_1_1) {
    Analyzer myNaturalLanguageAnalyzer=AnalyzerFactory.getInstance().createAnalyzer(book);
    analyzer.addAnalyzer(LuceneIndex.FIELD_BODY,myNaturalLanguageAnalyzer);
  }
  analyzer.addAnalyzer(LuceneIndex.FIELD_KEY,new KeyAnalyzer());
  analyzer.addAnalyzer(LuceneIndex.FIELD_STRONG,new StrongsNumberAnalyzer());
  analyzer.addAnalyzer(LuceneIndex.FIELD_XREF,new XRefAnalyzer());
}
 

Example 7

From project Kairos, under directory /src/java/org/apache/nutch/analysis/.

Source file: NutchDocumentAnalyzer.java

  22 
vote

/** 
 * Returns a new token stream for text from the named field. 
 */
public TokenStream tokenStream(String fieldName,Reader reader){
  Analyzer analyzer;
  if ("anchor".equals(fieldName))   analyzer=ANCHOR_ANALYZER;
 else   analyzer=CONTENT_ANALYZER;
  return analyzer.tokenStream(fieldName,reader);
}
 

Example 8

From project lenya, under directory /org.apache.lenya.module.lucene/src/main/java/org/apache/cocoon/components/search/components/impl/.

Source file: AnalyzerManagerImpl.java

  22 
vote

public Analyzer getAnalyzer(String id) throws ConfigurationException {
  Analyzer analyzer=(Analyzer)this.analyzers.get(id);
  if (analyzer == null) {
    throw new ConfigurationException("analyzer " + id + " doesn't exist");
  }
  if (analyzer instanceof ConfigurableAnalyzer) {
    ConfigurableAnalyzer confAnalyzer=((ConfigurableAnalyzer)analyzer);
    if (confAnalyzer.enableCheckFile()) {
      confAnalyzer.reconfigure();
    }
  }
  return analyzer;
}
 

Example 9

From project montysolr, under directory /contrib/antlrqueryparser/src/test/org/apache/lucene/queryParser/aqp/.

Source file: TestAqpSLGStandardTest.java

  22 
vote

public void testPunct() throws Exception {
  Analyzer a=new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
  assertQueryEquals("a&b",a,"a&b");
  assertQueryEquals("a&&b",a,"a&&b");
  assertQueryEquals(".NET",a,".NET");
}
 

Example 10

From project SIREn, under directory /siren-core/src/test/java/org/sindice/siren/analysis/bench/.

Source file: TupleAnalyzerBenchmark.java

  22 
vote

public long timeStandardAnalyzer(final int reps) throws IOException {
  long counter=0;
  for (int i=0; i < reps; i++) {
    final Analyzer analyzer=new StandardAnalyzer(matchVersion);
    counter+=this.performAnalysis(analyzer);
  }
  return counter;
}
 

Example 11

From project Solbase-Solr, under directory /src/java/org/apache/solr/schema/.

Source file: IndexSchema.java

  22 
vote

protected HashMap<String,Analyzer> analyzerCache(){
  HashMap<String,Analyzer> cache=new HashMap<String,Analyzer>();
  for (  SchemaField f : getFields().values()) {
    Analyzer analyzer=f.getType().getAnalyzer();
    cache.put(f.getName(),analyzer);
  }
  return cache;
}
 

Example 12

From project supose, under directory /supose-core/src/test/java/com/soebes/supose/core/lucene/.

Source file: LuceneTest.java

  22 
vote

@Test public void testSingleAsterik() throws ParseException, IOException {
  Analyzer analyzer=AnalyzerFactory.createInstance();
  QueryParser parser=new CustomQueryParser(FieldNames.CONTENTS,analyzer);
  Query query=parser.parse("+filename:/*.doc");
  TopDocs result=isearcher.search(query,null,10);
  printOut(query,"testSingleAsterik",result);
  assertEquals(result.totalHits,3);
}
 

Example 13

From project thrudb, under directory /src/org/thrudb/thrudex/lucene/.

Source file: ThrudexLuceneHandler.java

  22 
vote

public SearchResponse search(SearchQuery s) throws ThrudexException, TException {
  if (!isValidIndex(s.index))   throw new ThrudexExceptionImpl("No Index Found: " + s.index);
  Analyzer defaultAnalyzer=getAnalyzer(s.getDefaultAnalyzer());
  PerFieldAnalyzerWrapper qAnalyzer=new PerFieldAnalyzerWrapper(defaultAnalyzer);
  if (s.isSetFieldAnalyzers()) {
    for (    String field : s.fieldAnalyzers.keySet())     qAnalyzer.addAnalyzer(field,getAnalyzer(s.fieldAnalyzers.get(field)));
  }
  return indexMap.get(s.index).search(s,qAnalyzer);
}
 

Example 14

From project zoie, under directory /zoie-core/src/main/java/proj/zoie/impl/indexing/internal/.

Source file: BaseSearchIndex.java

  22 
vote

public void updateIndex(LongSet delDocs,List<IndexingReq> insertDocs,Analyzer defaultAnalyzer,Similarity similarity) throws IOException {
  deleteDocs(delDocs);
  IndexWriter idxMod=null;
  try {
    idxMod=openIndexWriter(defaultAnalyzer,similarity);
    if (idxMod != null) {
      for (      IndexingReq idxPair : insertDocs) {
        Analyzer analyzer=idxPair.getAnalyzer();
        Document doc=idxPair.getDocument();
        if (analyzer == null) {
          idxMod.addDocument(doc);
        }
 else {
          idxMod.addDocument(doc,analyzer);
        }
      }
    }
  }
  finally {
    if (idxMod != null) {
      idxMod.commit();
      if (_closeWriterAfterUpdate) {
        closeIndexWriter();
      }
    }
  }
}
 

Example 15

From project chililog-server, under directory /src/test/java/org/chililog/server/common/.

Source file: TextTokenizerTest.java

  21 
vote

/** 
 * Used for benchmarking ... basic tokenizing without regular expression
 * @param text
 * @return
 * @throws IOException
 */
public List<String> basicTokenize(String text) throws IOException {
  List<String> tokens=new ArrayList<String>();
  if (StringUtils.isEmpty(text)) {
    return tokens;
  }
  Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_30);
  HashMap<String,String> lookup=new HashMap<String,String>();
  TokenStream stream=analyzer.tokenStream("field",new StringReader(text));
  TermAttribute termAttribute=stream.getAttribute(TermAttribute.class);
  while (stream.incrementToken()) {
    String term=termAttribute.term();
    if (!lookup.containsKey(term)) {
      tokens.add(term);
      lookup.put(term,null);
    }
  }
  return tokens;
}
 

Example 16

From project elephant-twin, under directory /com.twitter.elephanttwin.lucene/src/main/java/com/twitter/elephanttwin/lucene/indexing/.

Source file: AbstractLuceneIndexingReducer.java

  21 
vote

@Override public void setup(Reducer<KIN,VIN,NullWritable,NullWritable>.Context context) throws IOException {
  LOG.info("Starting up indexer...");
  heartbeatThread=new ReducerHeartBeatThread(context);
  heartbeatThread.start();
  String tmp=context.getConfiguration().get("hadoop.tmp.dir");
  String shardName="index-" + context.getTaskAttemptID().toString();
  tmpIndex=new File(tmp,shardName);
  LOG.info("tmp index location: " + tmpIndex);
  String analyzerClassName=context.getConfiguration().get(ANALYZER,"org.apache.lucene.analysis.SimpleAnalyzer");
  Analyzer analyzer=ReflectionHelper.<Analyzer>createClassFromName(analyzerClassName,Analyzer.class);
  if (analyzer == null) {
    throw new RuntimeException("Unable to create analyzer!");
  }
  LOG.info("analyzer created: " + analyzerClassName);
  String similarityClassName=context.getConfiguration().get(SIMILARITY,DefaultSimilarity.class.getName());
  LOG.info("Similarity: " + similarityClassName);
  Similarity similarity=ReflectionHelper.createClassFromName(similarityClassName,Similarity.class);
  if (similarity == null) {
    throw new RuntimeException("Unable to create similarity!");
  }
  LOG.info("similarity created: " + similarityClassName);
  indexer=new IndexBuilder(tmpIndex,analyzer,similarity);
  indexer.initialize();
}
 

Example 17

From project guj.com.br, under directory /src/net/jforum/search/.

Source file: LuceneManager.java

  21 
vote

/** 
 * @see net.jforum.search.SearchManager#init()
 */
public void init(){
  try {
    Analyzer analyzer=(Analyzer)Class.forName(SystemGlobals.getValue(ConfigKeys.LUCENE_ANALYZER)).newInstance();
    this.settings=new LuceneSettings(analyzer);
    this.settings.useFSDirectory(SystemGlobals.getValue(ConfigKeys.LUCENE_INDEX_WRITE_PATH));
    this.removeLockFile();
    this.indexer=new LuceneIndexer(this.settings);
    this.search=new LuceneSearch(this.settings,new LuceneContentCollector(this.settings));
    this.indexer.watchNewDocuDocumentAdded(this.search);
    SystemGlobals.setObjectValue(ConfigKeys.LUCENE_SETTINGS,this.settings);
  }
 catch (  Exception e) {
    throw new ForumException(e);
  }
}
 

Example 18

From project java-maven-tests, under directory /src/lucene-benchmark/src/test/java/eg/sample/lb/.

Source file: BasicLuceneTest.java

  21 
vote

@Test public void testLuceneUsage() throws IOException, ParseException {
  final Analyzer analyzer=new StandardAnalyzer(LUCENE_VERSION);
  final Directory directory=new RAMDirectory();
  final IndexWriterConfig writerConfig=new IndexWriterConfig(LUCENE_VERSION,analyzer);
  writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
  IndexWriter indexWriter=new IndexWriter(directory,writerConfig);
  final Document luceneDocument=new Document();
  final String text="This is the text to be indexed.";
  luceneDocument.add(new Field("fieldname",text,Field.Store.YES,Field.Index.ANALYZED));
  indexWriter.addDocument(luceneDocument);
  indexWriter.optimize();
  indexWriter.close();
  IndexSearcher indexSearcher=new IndexSearcher(directory);
  QueryParser parser=new QueryParser(LUCENE_VERSION,"fieldname",analyzer);
  Query query=parser.parse("text");
  final TopDocs hits=indexSearcher.search(query,10);
  assertEquals(1,hits.totalHits);
  for (int i=0; i < hits.totalHits; i++) {
    final ScoreDoc scoreDoc=hits.scoreDocs[i];
    final Document hitDoc=indexSearcher.doc(scoreDoc.doc);
    assertEquals("This is the text to be indexed.",hitDoc.get("fieldname"));
  }
  indexSearcher.close();
  directory.close();
}
 

Example 19

From project jforum2, under directory /src/net/jforum/search/.

Source file: LuceneManager.java

  21 
vote

/** 
 * @see net.jforum.search.SearchManager#init()
 */
public void init(){
  try {
    Analyzer analyzer=(Analyzer)Class.forName(SystemGlobals.getValue(ConfigKeys.LUCENE_ANALYZER)).newInstance();
    this.settings=new LuceneSettings(analyzer);
    this.settings.useFSDirectory(SystemGlobals.getValue(ConfigKeys.LUCENE_INDEX_WRITE_PATH));
    this.removeLockFile();
    this.indexer=new LuceneIndexer(this.settings);
    this.search=new LuceneSearch(this.settings,new LuceneContentCollector(this.settings));
    this.indexer.watchNewDocuDocumentAdded(this.search);
    SystemGlobals.setObjectValue(ConfigKeys.LUCENE_SETTINGS,this.settings);
  }
 catch (  Exception e) {
    throw new ForumException(e);
  }
}
 

Example 20

From project subsonic_2, under directory /subsonic-main/src/main/java/net/sourceforge/subsonic/service/.

Source file: LuceneSearchService.java

  21 
vote

public SearchResult search(SearchCriteria criteria,IndexType indexType){
  SearchResult result=new SearchResult();
  List<MusicFile> musicFiles=new ArrayList<MusicFile>();
  int offset=criteria.getOffset();
  int count=criteria.getCount();
  result.setOffset(offset);
  result.setMusicFiles(musicFiles);
  IndexReader reader=null;
  try {
    reader=createIndexReader(indexType);
    Searcher searcher=new IndexSearcher(reader);
    Analyzer analyzer=new SubsonicAnalyzer();
    MultiFieldQueryParser queryParser=new MultiFieldQueryParser(LUCENE_VERSION,indexType.getFields(),analyzer,indexType.getBoosts());
    Query query=queryParser.parse(criteria.getQuery());
    TopDocs topDocs=searcher.search(query,null,offset + count);
    result.setTotalHits(topDocs.totalHits);
    int start=Math.min(offset,topDocs.totalHits);
    int end=Math.min(start + count,topDocs.totalHits);
    for (int i=start; i < end; i++) {
      Document doc=searcher.doc(topDocs.scoreDocs[i].doc);
      musicFiles.add(musicFileService.getMusicFile(doc.getField(FIELD_PATH).stringValue()));
    }
  }
 catch (  Throwable x) {
    LOG.error("Failed to execute Lucene search.",x);
  }
 finally {
    FileUtil.closeQuietly(reader);
  }
  return result;
}
 

Example 21

From project walkingword, under directory /src/com/tistory/devyongsik/analyzer/.

Source file: KoreanSynonymEngine.java

  21 
vote

private static void createSynonymIndex(){
  directory=new RAMDirectory();
  try {
    Analyzer analyzer=new SimpleAnalyzer(Version.LUCENE_31);
    IndexWriterConfig iwc=new IndexWriterConfig(Version.LUCENE_31,analyzer);
    IndexWriter ramWriter=new IndexWriter(directory,iwc);
    int recordCnt=0;
    for (    String syn : synonyms) {
      String[] synonymWords=syn.split(",");
      Document doc=new Document();
      for (int i=0, size=synonymWords.length; i < size; i++) {
        String fieldValue=synonymWords[i];
        Field field=new Field("syn",fieldValue,Store.YES,Index.NOT_ANALYZED_NO_NORMS,TermVector.NO);
        doc.add(field);
        recordCnt++;
      }
      ramWriter.addDocument(doc);
    }
    ramWriter.close();
    loggerStatic.info("???? ??? ??? ??? : {}",recordCnt);
  }
 catch (  CorruptIndexException e) {
    loggerStatic.error("???? ??? ? ??? ???? : {}",e);
    e.printStackTrace();
  }
catch (  LockObtainFailedException e) {
    loggerStatic.error("???? ??? ? ??? ???? : {}",e);
    e.printStackTrace();
  }
catch (  IOException e) {
    loggerStatic.error("???? ??? ? ??? ???? : {}",e);
    e.printStackTrace();
  }
}
 

Example 22

From project behemoth, under directory /mahout/src/main/java/com/digitalpebble/behemoth/mahout/.

Source file: BehemothDocumentProcessor.java

  20 
vote

/** 
 * Convert the input documents into token array using the {@link StringTuple} The input documents has to be in the{@link org.apache.hadoop.io.SequenceFile} format
 * @param input input directory of the documents in {@link org.apache.hadoop.io.SequenceFile} format
 * @param output output directory were the  {@link StringTuple} token array ofeach document has to be created
 * @param analyzerClass The Lucene  {@link Analyzer} for tokenizing the UTF-8 text
 */
public static void tokenizeDocuments(Path input,Class<? extends Analyzer> analyzerClass,Path output,Configuration baseConf) throws IOException, InterruptedException, ClassNotFoundException {
  Configuration conf=new Configuration(baseConf);
  conf.set("io.serializations","org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization");
  conf.set(ANALYZER_CLASS,analyzerClass.getName());
  Job job=new Job(conf);
  job.setJobName("DocumentProcessor::LuceneTokenizer: input-folder: " + input);
  job.setJarByClass(BehemothDocumentProcessor.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(StringTuple.class);
  FileInputFormat.setInputPaths(job,input);
  FileOutputFormat.setOutputPath(job,output);
  job.setMapperClass(LuceneTokenizerMapper.class);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setNumReduceTasks(0);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  HadoopUtil.delete(conf,output);
  boolean succeeded=job.waitForCompletion(true);
  if (!succeeded)   throw new IllegalStateException("Job failed!");
}
 

Example 23

From project capedwarf-blue, under directory /search/src/main/java/org/jboss/capedwarf/search/.

Source file: DocumentFieldAnalyzer.java

  19 
vote

private Analyzer getAnalyzer(Field.FieldType fieldType){
switch (fieldType) {
case NUMBER:
    return passThroughAnalyzer;
case TEXT:
  return standardAnalyzer;
case HTML:
return standardHtmlAnalyzer;
case ATOM:
return passThroughAnalyzer;
case DATE:
return passThroughAnalyzer;
case GEO_POINT:
return passThroughAnalyzer;
default :
throw new IllegalArgumentException("Unsupported field type: " + fieldType);
}
}
 

Example 24

From project chombo, under directory /src/main/java/org/chombo/util/.

Source file: Utility.java

  19 
vote

/** 
 * @param text
 * @param analyzer
 * @return
 * @throws IOException
 */
public static List<String> tokenize(String text,Analyzer analyzer) throws IOException {
  TokenStream stream=analyzer.tokenStream("contents",new StringReader(text));
  List<String> tokens=new ArrayList<String>();
  CharTermAttribute termAttribute=(CharTermAttribute)stream.getAttribute(CharTermAttribute.class);
  while (stream.incrementToken()) {
    String token=termAttribute.toString();
    tokens.add(token);
  }
  return tokens;
}
 

Example 25

From project eclipse-instasearch, under directory /instasearch/src/it/unibz/instasearch/indexing/.

Source file: Searcher.java

  19 
vote

private Query parserSearchString(String searchString,Analyzer analyzer) throws ParseException {
  QueryParser queryParser=new QueryParser(Field.CONTENTS.toString(),analyzer);
  queryParser.setDefaultOperator(Operator.AND);
  queryParser.setLowercaseExpandedTerms(false);
  queryParser.setPhraseSlop(DEFAULT_PHRASE_SLOP);
  queryParser.setAllowLeadingWildcard(true);
  return queryParser.parse(searchString);
}
 

Example 26

From project elasticsearch-analysis-combo, under directory /src/main/java/org/elasticsearch/index/analysis/.

Source file: ComboAnalyzer.java

  19 
vote

/** 
 * Read settings and load the appropriate sub-analyzers.
 */
synchronized protected void init(){
  if (analyzer != null)   return;
  AnalysisService analysisService=injector.getInstance(AnalysisService.class);
  String[] sub=settings.getAsArray("sub_analyzers");
  ArrayList<Analyzer> subAnalyzers=new ArrayList<Analyzer>();
  if (sub == null) {
    throw new ElasticSearchIllegalArgumentException("\"" + NAME + "\" analyzers must have a \"sub_analyzers\" list property");
  }
  for (  String subname : sub) {
    NamedAnalyzer analyzer=analysisService.analyzer(subname);
    if (analyzer == null) {
      logger.debug("Sub-analyzer \"" + subname + "\" not found!");
    }
 else {
      subAnalyzers.add(analyzer);
    }
  }
  this.analyzer=new org.apache.lucene.analysis.ComboAnalyzer(Lucene.VERSION,subAnalyzers.toArray(new Analyzer[subAnalyzers.size()]));
  Boolean tokenstreamReuse=settings.getAsBoolean("tokenstream_reuse",null);
  if (tokenstreamReuse != null)   this.analyzer.setTokenStreamReuseEnabled(tokenstreamReuse);
  Boolean tokenstreamCaching=settings.getAsBoolean("tokenstream_caching",null);
  if (tokenstreamCaching != null)   this.analyzer.setTokenStreamCachingEnabled(tokenstreamCaching);
}
 

Example 27

From project gast-lib, under directory /libraryLuceneExt/src/root/gast/speech/lucene/.

Source file: LuceneIndexBuilder.java

  19 
vote

private IndexWriter makeWriter(Directory directory,boolean create,Analyzer analyzer){
  IndexWriter writer=null;
  try {
    IndexWriterConfig config=new IndexWriterConfig(LuceneParameters.VERSION,analyzer);
    writer=new IndexWriter(directory,config);
  }
 catch (  CorruptIndexException e) {
    Log.e(TAG,"no index build",e);
  }
catch (  LockObtainFailedException e) {
    Log.e(TAG,"no index build",e);
  }
catch (  IOException e) {
    Log.e(TAG,"no index build",e);
  }
  return writer;
}
 

Example 28

From project grails-searchable, under directory /src/java/grails/plugin/searchable/internal/compass/config/.

Source file: EnvironmentSearchableCompassConfigurator.java

  19 
vote

/** 
 * Configure the Compass environment
 * @param compassConfiguration the runtime config instance
 * @param configurationContext a context allowing flexible parameter passing
 */
public void configure(CompassConfiguration compassConfiguration,Map configurationContext){
  if (compassConfiguration.getSettings().getSetting(CompassEnvironment.CONNECTION) == null) {
    String conn=connection;
    if (conn == null) {
      LOG.debug("No connection specified, using default");
      conn=SearchableCompassUtils.getDefaultConnection(grailsApplication);
    }
    LOG.info("Setting Compass connection to [" + conn + "]");
    compassConfiguration.setConnection(conn);
  }
  if (compassSettings != null) {
    for (Iterator iter=compassSettings.keySet().iterator(); iter.hasNext(); ) {
      String name=iter.next().toString();
      Object value=compassSettings.get(name);
      LOG.debug("Setting Compass setting [" + name + "] = ["+ value+ "]");
      compassConfiguration.setSetting(name,value);
    }
  }
  if (beans != null) {
    Map converters=(Map)configurationContext.get("customConverters");
    for (Iterator iter=beans.entrySet().iterator(); iter.hasNext(); ) {
      Map.Entry entry=(Map.Entry)iter.next();
      String name=(String)entry.getKey();
      Object value=entry.getValue();
      if (value instanceof Converter) {
        LOG.debug("Registering Converter bean [" + name + "] with value ["+ value+ "]");
        compassConfiguration.registerConverter(name,(Converter)value);
        converters.put(name,value);
      }
 else       if (value instanceof Analyzer) {
        LOG.debug("Registering Analyzer bean [" + name + "] with value ["+ value+ "]");
        compassConfiguration.setSetting("compass.engine.analyzer." + name + ".type",value);
      }
 else {
        LOG.warn("Bean [" + name + "] value is null or not a recognised type ["+ (value != null ? value.getClass().getName() : "null")+ "] - ignoring");
      }
    }
  }
}
 

Example 29

From project greplin-lucene-utils, under directory /src/main/java/com/greplin/lucene/query/.

Source file: Queries.java

  19 
vote

/** 
 * Returns a PhraseQuery generated by the passed string.
 * @param analyzer - the analyzer used to tokenize the index
 * @param field - the field to match on
 * @param query - The untokenized string
 * @return a PhraseQuery
 */
public static PhraseQuery phraseFor(final Analyzer analyzer,final String field,final String query){
  if (query == null) {
    return null;
  }
  TokenStream tokens=analyzer.tokenStream("",new StringReader(query));
  PhraseQuery phrase=new PhraseQuery();
  try {
    boolean anyTokens=false;
    while (tokens.incrementToken()) {
      anyTokens=true;
      String word=tokens.getAttribute(CharTermAttribute.class).toString();
      phrase.add(new Term(field,word));
    }
    if (anyTokens) {
      return phrase;
    }
 else {
      return null;
    }
  }
 catch (  IOException e) {
    return null;
  }
}
 

Example 30

From project HBasePS, under directory /src/main/java/ch/sentric/hbase/prospective/.

Source file: Percolator.java

  19 
vote

/** 
 * Create a  {@code Percolator} instance with the given {@code Analyzer}.
 * @param analyzer to find terms in the text and queries
 * @param queries the parsed queries
 */
public Percolator(final Analyzer analyzer){
  this.analyzer=analyzer;
  if (LOG.isDebugEnabled()) {
    LOG.debug("Percolator initialized.");
  }
}
 

Example 31

From project james-mailbox, under directory /lucene/src/main/java/org/apache/james/mailbox/lucene/search/.

Source file: LuceneMessageSearchIndex.java

  19 
vote

protected IndexWriterConfig createConfig(Analyzer analyzer,boolean dropIndexOnStart){
  IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_31,analyzer);
  if (dropIndexOnStart) {
    config.setOpenMode(OpenMode.CREATE);
  }
 else {
    config.setOpenMode(OpenMode.CREATE_OR_APPEND);
  }
  return config;
}
 

Example 32

From project logsaw-app, under directory /net.sf.logsaw.index/src/net/sf/logsaw/index/internal/.

Source file: ARunWithIndexWriter.java

  19 
vote

/** 
 * Opens a Lucene index writer, executes the callback method and then closes the writer.
 * @param log the log resource, may be <code>null</code>
 * @param analyzer the Lucene analyzer to set on the index writer
 * @param matchVersion the Lucene match version
 * @return any object or <code>null</code>
 * @throws CoreException if an <strong>expected</strong> error occurred
 */
protected final T runWithIndexWriter(ILogResource log,Analyzer analyzer,Version matchVersion) throws CoreException {
  logger.info("Opening index writer for '" + log.getName() + "'...");
  IndexWriter writer=null;
  try {
    Directory dir=FSDirectory.open(IndexPlugin.getDefault().getIndexFile(log));
    LogMergePolicy mp=new LogByteSizeMergePolicy();
    mp.setMergeFactor(30);
    IndexWriterConfig cfg=new IndexWriterConfig(matchVersion,analyzer);
    cfg.setMaxBufferedDocs(1000);
    cfg.setMergePolicy(mp);
    writer=new IndexWriter(dir,cfg);
    try {
      return doRunWithIndexWriter(writer,log);
    }
  finally {
      logger.info("Closing index writer for '" + log.getName() + "'...");
      writer.close();
    }
  }
 catch (  CoreException e) {
    throw e;
  }
catch (  Exception e) {
    throw new CoreException(new Status(IStatus.ERROR,IndexPlugin.PLUGIN_ID,NLS.bind(Messages.LuceneIndexService_error_failedToUpdateIndex,new Object[]{log.getName(),e.getLocalizedMessage()}),e));
  }
}
 

Example 33

From project mdk, under directory /service/lucene/src/main/java/uk/ac/ebi/mdk/service/index/.

Source file: KeywordNIOIndex.java

  19 
vote

@Override public Analyzer getAnalyzer(){
  if (analyzer == null) {
    analyzer=new KeywordAnalyzer();
  }
  return analyzer;
}
 

Example 34

From project qsol, under directory /src/java/com/mhs/qsol/.

Source file: QsolParser.java

  19 
vote

/** 
 * Converts Qsol query syntax into a Lucene Query object.
 * @param field default search field
 * @param query Qsol syntax query
 * @param analyzer lucene analyzer to use on terms
 * @return
 * @throws QsolSyntaxException
 * @throws EmptyQueryException
 * @since 1.0
 */
public Query parse(String field,String query,Analyzer analyzer) throws QsolSyntaxException, EmptyQueryException {
  Node root=null;
  preProcessVisitor.setAnalyzer(analyzer);
  buildQueryVisitor.setOrderOfOps(opsList);
  preProcessVisitor.setDateFields(dateFields);
  buildQueryVisitor.setDateFields(dateFields);
  QueryParser parser=new QueryParser(new StringReader(query));
  if (useHide) {
    parser.setHideOps(hideOr,hideAnd,hideAndNot,hideProximity);
  }
  parser.setOrderOfOps(orderOfOpsMap);
  assert(parser != null);
  try {
    root=parser.Search();
  }
 catch (  com.mhs.qsol.queryparser.ParseException e) {
    throw new QsolParseException(e);
  }
  preProcessVisitor.setFindReplace(findReplace);
  preProcessVisitor.setFindReplaceRegEx(findReplaceRegEx);
  String processedQuery=root.accept(preProcessVisitor,null);
  try {
    parser=new QueryParser(new StringReader(processedQuery));
    parser.setOrderOfOps(orderOfOpsMap);
    root=parser.Search();
  }
 catch (  Exception e) {
    throw new QsolSyntaxException(e);
  }
  buildQueryVisitor.setAnalyzer(analyzer);
  buildQueryVisitor.setField(field);
  Query luceneQuery=root.accept(buildQueryVisitor,null);
  if (luceneQuery == null) {
    throw new EmptyQueryException("Expanded query is empty");
  }
  return luceneQuery;
}
 

Example 35

From project riot, under directory /search/src/org/riotfamily/search/analysis/.

Source file: DefaultAnalyzerFactory.java

  19 
vote

public Analyzer getAnalyzer(String language){
  if (language != null) {
    String snowballName=(String)snowballNames.get(language);
    if (snowballName != null) {
      String[] stopWords=getStopWords(language);
      if (stopWords != null) {
        return new SnowballAnalyzer(snowballName,stopWords);
      }
      return new SnowballAnalyzer(snowballName);
    }
    if (language.equals("ja") || language.equals("ko") || language.endsWith("zh")) {
      return new CJKAnalyzer();
    }
    if (language.equals("th")) {
      return new ThaiAnalyzer();
    }
    if (language.equals("el")) {
      return new GreekAnalyzer();
    }
    if (language.equals("cs")) {
      return new CzechAnalyzer();
    }
  }
  return defaultAnalyzer;
}
 

Example 36

From project sensei, under directory /sensei-hadoop-indexing/src/main/java/com/senseidb/indexing/hadoop/keyvalueformat/.

Source file: IntermediateForm.java

  19 
vote

/** 
 * This method is used by the index update mapper and process a document operation into the current intermediate form.
 * @param doc  input document operation
 * @param analyzer  the analyzer
 * @throws IOException
 */
public void process(Document doc,Analyzer analyzer) throws IOException {
  if (writer == null) {
    writer=createWriter();
  }
  writer.addDocument(doc,analyzer);
  numDocs++;
}
 

Example 37

From project thinklab, under directory /plugins/org.integratedmodelling.thinklab.searchengine/src/org/integratedmodelling/searchengine/.

Source file: SearchEngine.java

  19 
vote

SearchEngine(String id,Properties properties) throws ThinklabException {
  this.properties=properties;
  this.id=id;
  this.indexIndividuals=BooleanValue.parseBoolean(properties.getProperty(SearchEnginePlugin.SEARCHENGINE_INDEX_INDIVIDUALS_PROPERTY,"false"));
  this.indexPath=properties.getProperty(SearchEnginePlugin.SEARCHENGINE_INDEX_PATH_PROPERTY,SearchEnginePlugin.get().getScratchPath() + "/" + id+ "/index");
  this.indexUncommented=BooleanValue.parseBoolean(properties.getProperty(SearchEnginePlugin.SEARCHENGINE_INDEX_UNCOMMENTED_PROPERTY,"false"));
  this.indexedOntologies=properties.getProperty(SearchEnginePlugin.SEARCHENGINE_INDEX_ONTOLOGIES_PROPERTY,"");
  String itypes=properties.getProperty(SearchEnginePlugin.SEARCHENGINE_INDEX_TYPES_PROPERTY,"");
  if (!itypes.equals("")) {
    iTypes=itypes.trim().split(",");
  }
  File scratchDir=SearchEnginePlugin.get().getScratchPath();
  docCacheDir=new File(scratchDir + "/cache/" + id+ "/doc");
  ontCacheDir=new File(scratchDir + "/cache/" + id+ "/ontology");
  kboxCacheDir=new File(scratchDir + "/cache/" + id+ "/kbox");
  docCacheDir.mkdirs();
  ontCacheDir.mkdirs();
  kboxCacheDir.mkdirs();
  String analyzerClass=properties.getProperty(SearchEnginePlugin.SEARCHENGINE_ANALYZER_CLASS_PROPERTY,"org.apache.lucene.analysis.standard.StandardAnalyzer");
  try {
    Class<?> aClass=Class.forName(analyzerClass);
    analyzer=(Analyzer)aClass.newInstance();
  }
 catch (  Exception e1) {
    throw new ThinklabResourceNotFoundException("searchengine: " + id + ": can't create analyzer: "+ e1.getMessage());
  }
  boolean create_index=!IndexReader.indexExists(indexPath);
  try {
    if (IndexReader.isLocked(indexPath)) {
      IndexReader.unlock(FSDirectory.getDirectory(indexPath,false));
    }
    index=new IndexModifier(indexPath,analyzer,create_index);
  }
 catch (  IOException e) {
    throw new ThinklabIOException(e);
  }
}