Analyzer


1   package org.apache.lucene.analysis;
2   
3   /**
4    * Copyright 2004 The Apache Software Foundation
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.Reader  ;
20  
21  /** An Analyzer builds TokenStreams, which analyze text.  It thus represents a
22   *  policy for extracting index terms from text.
23   *  <p>
24   *  Typical implementations first build a Tokenizer, which breaks the stream of
25   *  characters from the Reader into raw Tokens.  One or more TokenFilters may
26   *  then be applied to the output of the Tokenizer.
27   *  <p>
28   *  WARNING: You must override one of the methods defined by this class in your
29   *  subclass or the Analyzer will enter an infinite loop.
30   */
31  public abstract class Analyzer {
32    /** Creates a TokenStream which tokenizes all the text in the provided
33      Reader.  Default implementation forwards to tokenStream(Reader) for 
34      compatibility with older version.  Override to allow Analyzer to choose 
35      strategy based on document and/or field.  Must be able to handle null
36      field name for backward compatibility. */
37    public TokenStream tokenStream(String   fieldName, Reader   reader)
38    {
39        // implemented for backward compatibility
40        return tokenStream(reader);
41    }
42    
43    /** Creates a TokenStream which tokenizes all the text in the provided
44     *  Reader.  Provided for backward compatibility only.
45     * @deprecated use tokenStream(String, Reader) instead.
46     * @see #tokenStream(String, Reader)
47     */
48    public TokenStream tokenStream(Reader   reader)
49    {
50        return tokenStream(null, reader);
51    }
52  
53    /**
54     * Invoked before indexing a Field instance if
55     * terms have already been added to that field.  This allows custom
56     * analyzers to place an automatic position increment gap between
57     * Field instances using the same field name.  The default value
58     * position increment gap is 0.  With a 0 position increment gap and
59     * the typical default token position increment of 1, all terms in a field,
60     * including across Field instances, are in successive positions, allowing
61     * exact PhraseQuery matches, for instance, across Field instance boundaries.
62     *
63     * @param fieldName Field name being indexed.
64     * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
65     */
66    public int getPositionIncrementGap(String   fieldName)
67    {
68      return 0;
69    }
70  }
71  
72
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags