[Snowball-discuss] Evening -> Even, Marine -> Marin
Ann B
alleycatwoman at gmail.com
Thu Jun 22 16:51:24 BST 2017
Hi All,
I help maintain a Drupal site where a database search will be replaced with
an Apache Solr search. I'm having some trouble with stemming. When
searching for the word *evening*, the results include excerpts with the
word *even*. When searching for the last name *Marin*, results include
excerpts with the word *marine*. The excerpts do not include evening or
Marin. What is the best way to handle this? Should I add these words as
we come across them to the protected words file? I am using the
*SnowballPorterFilterFactory*. Below is the configuration we are using
that comes with Apache Solr 3.6.1:
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<charFilter class="solr.MappingCharFilterFactory" mapping="
mapping-ISOLatin1Accent.txt"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="
stopwords.txt"enablePositionIncrements="true"/>
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt"
generateWordParts="1"generateNumberParts="1" catenateWords="1" cat
enateNumbers="1" catenateAll="0" splitOnCaseChange="1"preserveOriginal="1"/>
<filter class="solr.LengthFilterFactory" min="2" max="100"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.*SnowballPorterFilterFactory*" language="English"
protected="protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<charFilter class="solr.MappingCharFilterFactory" mapping="
mapping-ISOLatin1Accent.txt"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase
="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="
stopwords.txt"enablePositionIncrements="true"/>
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt"
generateWordParts="1"generateNumberParts="1" catenateWords="0" cat
enateNumbers="0" catenateAll="0" splitOnCaseChange="1"preserveOriginal="1"/>
<filter class="solr.LengthFilterFactory" min="2" max="100"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.*SnowballPorterFilterFactory*" language="English"
protected="protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
Thank you for any help,
Ann
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.tartarus.org/mailman/private/snowball-discuss/attachments/20170622/622cb9ba/attachment.html>
More information about the Snowball-discuss
mailing list