public class BrazilianStemmer
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
private java.lang.String |
CT |
private static java.util.Locale |
locale |
private java.lang.String |
R1 |
private java.lang.String |
R2 |
private java.lang.String |
RV |
private java.lang.String |
TERM
Changed term
|
Constructor and Description |
---|
BrazilianStemmer() |
Modifier and Type | Method and Description |
---|---|
private java.lang.String |
changeTerm(java.lang.String value)
1) Turn to lowercase
2) Remove accents
3) ã -> a ; õ -> o
4) ç -> c
|
private void |
createCT(java.lang.String term)
Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'.
|
private java.lang.String |
getR1(java.lang.String value)
Gets R1
R1 - is the region after the first non-vowel following a vowel,
or is the null region at the end of the word if there is
no such non-vowel.
|
private java.lang.String |
getRV(java.lang.String value)
Gets RV
RV - IF the second letter is a consonant, RV is the region after
the next following vowel,
OR if the first two letters are vowels, RV is the region
after the next consonant,
AND otherwise (consonant-vowel case) RV is the region after
the third letter.
|
private boolean |
isIndexable(java.lang.String term)
Checks a term if it can be processed indexed.
|
private boolean |
isStemmable(java.lang.String term)
Checks a term if it can be processed correctly.
|
private boolean |
isVowel(char value)
See if string is 'a','e','i','o','u'
|
java.lang.String |
log()
For log and debug purpose
|
private java.lang.String |
removeSuffix(java.lang.String value,
java.lang.String toRemove)
Remove a string suffix
|
private java.lang.String |
replaceSuffix(java.lang.String value,
java.lang.String toReplace,
java.lang.String changeTo)
Replace a string suffix by another
|
protected java.lang.String |
stem(java.lang.String term)
Stems the given term to an unique discriminator.
|
private boolean |
step1()
Standard suffix removal.
|
private boolean |
step2()
Verb suffixes.
|
private void |
step3()
Delete suffix 'i' if in RV and preceded by 'c'
|
private void |
step4()
Residual suffix
If the word ends with one of the suffixes (os a i o á í ó)
in RV, delete it
|
private void |
step5()
If the word ends with one of ( e é ê) in RV,delete it,
and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV,
delete the 'u' (or 'i')
Or if the word ends ç remove the cedilha
|
private boolean |
suffix(java.lang.String value,
java.lang.String suffix)
Check if a string ends with a suffix
|
private boolean |
suffixPreceded(java.lang.String value,
java.lang.String suffix,
java.lang.String preceded)
See if a suffix is preceded by a String
|
private static final java.util.Locale locale
private java.lang.String TERM
private java.lang.String CT
private java.lang.String R1
private java.lang.String R2
private java.lang.String RV
protected java.lang.String stem(java.lang.String term)
term
- The term that should be stemmed.private boolean isStemmable(java.lang.String term)
private boolean isIndexable(java.lang.String term)
private boolean isVowel(char value)
private java.lang.String getR1(java.lang.String value)
private java.lang.String getRV(java.lang.String value)
private java.lang.String changeTerm(java.lang.String value)
private boolean suffix(java.lang.String value, java.lang.String suffix)
private java.lang.String replaceSuffix(java.lang.String value, java.lang.String toReplace, java.lang.String changeTo)
private java.lang.String removeSuffix(java.lang.String value, java.lang.String toRemove)
private boolean suffixPreceded(java.lang.String value, java.lang.String suffix, java.lang.String preceded)
private void createCT(java.lang.String term)
private boolean step1()
private boolean step2()
private void step3()
private void step4()
private void step5()
public java.lang.String log()