/* * call-seq: * StemFilter.new(token_stream) -> token_stream * StemFilter.new(token_stream, * algorithm="english", * encoding="UTF-8") -> token_stream * * Create an StemFilter which uses a snowball stemmer (thank you Martin * Porter) to stem words. You can optionally specify the algorithm (default: * "english") and encoding (default: "UTF-8"). * * token_stream:: TokenStream to be filtered * algorithm:: The algorithm (or language) to use * encoding:: The encoding of the data (default: "UTF-8") */ static VALUE frb_stem_filter_init(int argc, VALUE *argv, VALUE self) { VALUE rsub_ts, ralgorithm, rcharenc; char *algorithm = "english"; char *charenc = NULL; TokenStream *ts; rb_scan_args(argc, argv, "12", &rsub_ts, &ralgorithm, &rcharenc); ts = frb_get_cwrapped_rts(rsub_ts); switch (argc) { case 3: charenc = rs2s(rb_obj_as_string(rcharenc)); case 2: algorithm = rs2s(rb_obj_as_string(ralgorithm)); } ts = stem_filter_new(ts, algorithm, charenc); object_add(&(TkFilt(ts)->sub_ts), rsub_ts); Frt_Wrap_Struct(self, &frb_tf_mark, &frb_tf_free, ts); object_add(ts, self); if (((StemFilter *)ts)->stemmer == NULL) { rb_raise(rb_eArgError, "No stemmer could be found with the encoding " "%s and the language %s", charenc, algorithm); } return self; }