/* * call-seq: * StandardAnalyzer.new(stop_words = FULL_ENGLISH_STOP_WORDS, lower=true) * -> analyzer * * Create a new StandardAnalyzer which downcases tokens by default but can * optionally leave case as is. Lowercasing will be done based on the current * locale. You can also set the list of stop-words to be used by the * StopFilter. * * lower:: set to false if you don't want the field's tokens to be downcased * stop_words:: list of stop-words to pass to the StopFilter */ static VALUE frb_standard_analyzer_init(int argc, VALUE *argv, VALUE self) { bool lower; VALUE rlower, rstop_words; Analyzer *a; #ifndef POSH_OS_WIN32 if (!frb_locale) frb_locale = setlocale(LC_CTYPE, ""); #endif rb_scan_args(argc, argv, "02", &rstop_words, &rlower); lower = ((rlower == Qnil) ? true : RTEST(rlower)); if (rstop_words != Qnil) { char **stop_words = get_stopwords(rstop_words); a = mb_standard_analyzer_new_with_words((const char **)stop_words, lower); free(stop_words); } else { a = mb_standard_analyzer_new(lower); } Frt_Wrap_Struct(self, NULL, &frb_analyzer_free, a); object_add(a, self); return self; }