java - java中如何获取词干后的符号

标签 java javascript

我有一个问题,在stem之后我无法得到像($,...)这样的符号,因为在输出中我需要示例字符串单词的价格= 44,66$;//词干提取后//输出 = 4466 字符串的词干非常好,但我想要不改变价格的数字(价格),因为我需要输出后的价格。 最后我的问题是如何获得像(44.33$)这样的价格数字,并在这个主干代码之后带有符号?

//////这里所有代码“”

class NewStrings {
  public String str;

  NewString() {
     str = "";
  }
}

public class Stemmer {

  private String Clean( String str ) {
     int last = str.length();

     Character ch = new Character( str.charAt(0) );
     String temp = "";

     for ( int i=0; i < last; i++ ) {
         if ( ch.isLetterOrDigit( str.charAt(i) ) )
            temp += str.charAt(i);
     }

     return temp;
  } //clean

  private boolean hasSuffix( String word, String suffix, NewString stem ) {

     String tmp = "";

     if ( word.length() <= suffix.length() )
        return false;
     if (suffix.length() > 1) 
        if ( word.charAt( word.length()-2 ) != suffix.charAt( suffix.length()-2 ) )
           return false;

     stem.str = "";

     for ( int i=0; i<word.length()-suffix.length(); i++ )
         stem.str += word.charAt( i );
     tmp = stem.str;

     for ( int i=0; i<suffix.length(); i++ )
         tmp += suffix.charAt( i );

     if ( tmp.compareTo( word ) == 0 )
        return true;
     else
        return false;
  }

  private boolean vowel( char ch, char prev ) {
     switch ( ch ) {
        case 'a': case 'e': case 'i': case 'o': case 'u': 
          return true;
        case 'y': {

          switch ( prev ) {
            case 'a': case 'e': case 'i': case 'o': case 'u': 
              return false;

            default: 
              return true;
          }
        }

        default : 
          return false;
     }
  }

  private int measure( String stem ) {

    int i=0, count = 0;
    int length = stem.length();

    while ( i < length ) {
       for ( ; i < length ; i++ ) {
           if ( i > 0 ) {
              if ( vowel(stem.charAt(i),stem.charAt(i-1)) )
                 break;
           }
           else {  
              if ( vowel(stem.charAt(i),'a') )
                break; 
           }
       }

       for ( i++ ; i < length ; i++ ) {
           if ( i > 0 ) {
              if ( !vowel(stem.charAt(i),stem.charAt(i-1)) )
                  break;
              }
           else {  
              if ( !vowel(stem.charAt(i),'?') )
                 break;
           }
       } 
      if ( i < length ) {
         count++;
         i++;
      }
    } //while

    return(count);
  }

  private boolean containsVowel( String word ) {

     for (int i=0 ; i < word.length(); i++ )
         if ( i > 0 ) {
            if ( vowel(word.charAt(i),word.charAt(i-1)) )
               return true;
         }
         else {  
            if ( vowel(word.charAt(0),'a') )
               return true;
         }

     return false;
  }

  private boolean cvc( String str ) {
     int length=str.length();

     if ( length < 3 )
        return false;

     if ( (!vowel(str.charAt(length-1),str.charAt(length-2)) )
        && (str.charAt(length-1) != 'w') && (str.charAt(length-1) != 'x') && (str.charAt(length-1) != 'y')
        && (vowel(str.charAt(length-2),str.charAt(length-3))) ) {

        if (length == 3) {
           if (!vowel(str.charAt(0),'?')) 
              return true;
           else
              return false;
        }
        else {
           if (!vowel(str.charAt(length-3),str.charAt(length-4)) ) 
              return true; 
           else
              return false;
        } 
     }   

     return false;
  }

  private String step1( String str ) {

     NewString stem = new NewString();

     if ( str.charAt( str.length()-1 ) == 's' ) {
        if ( (hasSuffix( str, "sses", stem )) || (hasSuffix( str, "ies", stem)) ){
           String tmp = "";
           for (int i=0; i<str.length()-2; i++)
               tmp += str.charAt(i);
           str = tmp;
        }
        else {
           if ( ( str.length() == 1 ) && ( str.charAt(str.length()-1) == 's' ) ) {
              str = "";
              return str;
           }
           if ( str.charAt( str.length()-2 ) != 's' ) {
              String tmp = "";
              for (int i=0; i<str.length()-1; i++)
                  tmp += str.charAt(i);
              str = tmp;
           }
        }  
     }

     if ( hasSuffix( str,"eed",stem ) ) {
           if ( measure( stem.str ) > 0 ) {
              String tmp = "";
              for (int i=0; i<str.length()-1; i++)
                  tmp += str.charAt( i );
              str = tmp;
           }
     }
     else {  
        if (  (hasSuffix( str,"ed",stem )) || (hasSuffix( str,"ing",stem )) ) { 
           if (containsVowel( stem.str ))  {

              String tmp = "";
              for ( int i = 0; i < stem.str.length(); i++)
                  tmp += str.charAt( i );
              str = tmp;
              if ( str.length() == 1 )
                 return str;

              if ( ( hasSuffix( str,"at",stem) ) || ( hasSuffix( str,"bl",stem ) ) || ( hasSuffix( str,"iz",stem) ) ) {
                 str += "e";

              }
              else {   
                 int length = str.length(); 
                 if ( (str.charAt(length-1) == str.charAt(length-2)) 
                    && (str.charAt(length-1) != 'l') && (str.charAt(length-1) != 's') && (str.charAt(length-1) != 'z') ) {

                    tmp = "";
                    for (int i=0; i<str.length()-1; i++)
                        tmp += str.charAt(i);
                    str = tmp;
                 }
                 else
                    if ( measure( str ) == 1 ) {
                       if ( cvc(str) ) 
                          str += "e";
                    }
              }
           }
        }
     }

     if ( hasSuffix(str,"y",stem) ) 
        if ( containsVowel( stem.str ) ) {
           String tmp = "";
           for (int i=0; i<str.length()-1; i++ )
               tmp += str.charAt(i);
           str = tmp + "i";
        }
     return str;  
  }

  private String step2( String str ) {

     String[][] suffixes = { { "ational", "ate" },
                                    { "tional",  "tion" },
                                    { "enci",    "ence" },
                                    { "anci",    "ance" },
                                    { "izer",    "ize" },
                                    { "iser",    "ize" },
                                    { "abli",    "able" },
                                    { "alli",    "al" },
                                    { "entli",   "ent" },
                                    { "eli",     "e" },
                                    { "ousli",   "ous" },
                                    { "ization", "ize" },
                                    { "isation", "ize" },
                                    { "ation",   "ate" },
                                    { "ator",    "ate" },
                                    { "alism",   "al" },
                                    { "iveness", "ive" },
                                    { "fulness", "ful" },
                                    { "ousness", "ous" },
                                    { "aliti",   "al" },
                                    { "iviti",   "ive" },
                                    { "biliti",  "ble" }};
     NewString stem = new NewString();


     for ( int index = 0 ; index < suffixes.length; index++ ) {
         if ( hasSuffix ( str, suffixes[index][0], stem ) ) {
            if ( measure ( stem.str ) > 0 ) {
               str = stem.str + suffixes[index][1];
               return str;
            }
         }
     }

     return str;
  }

  private String step3( String str ) {

        String[][] suffixes = { { "icate", "ic" },
                                       { "ative", "" },
                                       { "alize", "al" },
                                       { "alise", "al" },
                                       { "iciti", "ic" },
                                       { "ical",  "ic" },
                                       { "ful",   "" },
                                       { "ness",  "" }};
        NewString stem = new NewString();

        for ( int index = 0 ; index<suffixes.length; index++ ) {
            if ( hasSuffix ( str, suffixes[index][0], stem ))
               if ( measure ( stem.str ) > 0 ) {
                  str = stem.str + suffixes[index][1];
                  return str;
               }
        }
        return str;
  }

  private String step4( String str ) {

     String[] suffixes = { "al", "ance", "ence", "er", "ic", "able", "ible", "ant", "ement", "ment", "ent", "sion", "tion",
                           "ou", "ism", "ate", "iti", "ous", "ive", "ize", "ise"};

     NewString stem = new NewString();

     for ( int index = 0 ; index<suffixes.length; index++ ) {
         if ( hasSuffix ( str, suffixes[index], stem ) ) {

            if ( measure ( stem.str ) > 1 ) {
               str = stem.str;
               return str;
            }
         }
     }
     return str;
  }

  private String step5( String str ) {

     if ( str.charAt(str.length()-1) == 'e' ) { 
        if ( measure(str) > 1 ) {/* measure(str)==measure(stem) if ends in vowel */
           String tmp = "";
           for ( int i=0; i<str.length()-1; i++ ) 
               tmp += str.charAt( i );
           str = tmp;
        }
        else
           if ( measure(str) == 1 ) {
              String stem = "";
              for ( int i=0; i<str.length()-1; i++ ) 
                  stem += str.charAt( i );

              if ( !cvc(stem) )
                 str = stem;
           }
     }

     if ( str.length() == 1 )
        return str;
     if ( (str.charAt(str.length()-1) == 'l') && (str.charAt(str.length()-2) == 'l') && (measure(str) > 1) )
        if ( measure(str) > 1 ) {/* measure(str)==measure(stem) if ends in vowel */
           String tmp = "";
           for ( int i=0; i<str.length()-1; i++ ) 
               tmp += str.charAt( i );
           str = tmp;
        } 
     return str;
  }

//这是您的添加代码

  /*** chrs besides these will be removed */

  static final String allowedChrs = "$€¥£0123456789,.";
  public static String step6(String str){

      int sizeOfString = str.length();
      StringBuilder tmp = new StringBuilder();

      for(int i=0;i<sizeOfString;++i){

          if(allowedChrs.indexOf(str.charAt(i)) > -1){
             tmp.append(str.charAt(i));
          }    
      }
  return tmp.toString();
  }

  private String stripPrefixes ( String str) {

     String[] prefixes = { "kilo", "micro", "milli", "intra", "ultra", "mega", "nano", "pico", "pseudo"};

     int last = prefixes.length;
     for ( int i=0 ; i<last; i++ ) {
         if ( str.startsWith( prefixes[i] ) ) {
            String temp = "";
            for ( int j=0 ; j< str.length()-prefixes[i].length(); j++ )
                temp += str.charAt( j+prefixes[i].length() );
            return temp;
         }
     }

     return str;
  }


  private String stripSuffixes( String str ) {

     str = step1( str );
     if ( str.length() >= 1 )
        str = step2( str );
     if ( str.length() >= 1 )
        str = step3( str );
     if ( str.length() >= 1 )
        str = step4( str );
     if ( str.length() >= 1 )
        str = step5( str );
     if ( str.length() >= 1 )
         str = step6( str );
     return str; 
  }

  public static void main(String[] args) {
    String Word = "3.4$";
     // String str = stripAffixes  (Word);
    porrrr fun = new porrrr();
    fun.stripAffixes(Word);

    System.out.println(fun.stripAffixes(Word));

    }          //stripAffixes 
  public  String stripAffixes( String str ) {

    str = str.toLowerCase();
    str = Clean(str);

    if (( str != "" ) && (str.length() > 2)) {
       str = stripPrefixes(str);

       if (str != "" ) 
          str = stripSuffixes(str);

    }   

    return str;
    } //stripAffixes

} //class

最佳答案

很难理解你到底要做什么,但是,你见过你的 Clean 方法吗?

 if ( ch.isLetterOrDigit( str.charAt(i) ) ){ // you only take letters and digits from the initial String.

$ 和逗号既不是数字也不是字母

编辑 它应该看起来像这样:

    static final List<Character> list = Arrays.asList('$','€','¥','£');

private String step6(String str){
    int sizeOfString = str.length();
    StringBuilder tmp = new StringBuilder();
    for(int i=0;i<sizeOfString;++i){
        if(list.contains(str.charAt(i))){
            tmp.append(str.charAt(i));
        }

    }
    return tmp.toString();
}

关于java - java中如何获取词干后的符号,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/16333952/

相关文章:

javascript - Bootstrap Carousel Error == TypeError : $(. ..).carousel 不是函数

java - Hibernate:如何建模继承类型结构并在没有显式转换的情况下执行操作

java - Java阻塞线程占用CPU资源多吗?

java - 使用 JAR 文件

javascript - 在文本框左侧插入文本 - JS

javascript - 在一行代码中用扩展语法替换数组条目?

javascript - Object.defineProperty polyfill

java - 事务回滚时如何返回不同的值?

java - 获取.xls工作表中单元格的文本和背景颜色作为java中的十六进制

javascript - 我可以创建自定义 Ember 数据方法吗?