Kouhei Sutou
null+****@clear*****
Wed Jan 23 18:33:56 JST 2013
Kouhei Sutou 2013-01-23 18:33:56 +0900 (Wed, 23 Jan 2013) New Revision: 3756d3cb667ed6bf66804f70849ef4879d4ceacb https://github.com/groonga/groonga/commit/3756d3cb667ed6bf66804f70849ef4879d4ceacb Log: Add "normalize" command It normalizes the specified string by the specified normalizer: normalize NormalizerAuto "aBcDe 123" [[0,0.0,0.0],"abcde 123"] Is is useful to confirm normalizer behavior because it doesn't require creating a table. TODO: * Document it * Add tests for non-ASCII characters * Support flags Added files: test/command/suite/normalize/ascii.expected test/command/suite/normalize/ascii.test Modified files: lib/proc.c Modified: lib/proc.c (+57 -0) =================================================================== --- lib/proc.c 2013-01-23 16:25:04 +0900 (c9fcc19) +++ lib/proc.c 2013-01-23 18:33:56 +0900 (4d656cf) @@ -2745,6 +2745,59 @@ proc_truncate(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) } static grn_obj * +proc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) +{ + grn_obj *normalizer_name; + grn_obj *string; + + normalizer_name = VAR(0); + string = VAR(1); + if (GRN_TEXT_LEN(normalizer_name) == 0) { + ERR(GRN_INVALID_ARGUMENT, "normalizer name is missing"); + GRN_OUTPUT_CSTR(""); + return NULL; + } + + { + grn_obj *normalizer; + grn_obj *grn_string; + int flags = 0; /* TODO */ + + normalizer = grn_ctx_get(ctx, + GRN_TEXT_VALUE(normalizer_name), + GRN_TEXT_LEN(normalizer_name)); + if (!normalizer) { + ERR(GRN_INVALID_ARGUMENT, + "unknown normalizer: <%.*s>", + (int)GRN_TEXT_LEN(normalizer_name), + GRN_TEXT_VALUE(normalizer_name)); + GRN_OUTPUT_CSTR(""); + return NULL; + } + + grn_string = grn_string_open(ctx, + GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), + normalizer, flags); + grn_obj_unlink(ctx, normalizer); + + { + const char *normalized; + unsigned int normalized_length_in_bytes; + + grn_string_get_normalized(ctx, grn_string, + &normalized, + &normalized_length_in_bytes, + NULL); + GRN_OUTPUT_STR(normalized, normalized_length_in_bytes); + } + + grn_obj_unlink(ctx, grn_string); + } + + return NULL; +} + +static grn_obj * func_rand(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { int val; @@ -3584,6 +3637,10 @@ grn_db_init_builtin_query(grn_ctx *ctx) DEF_VAR(vars[0], "table"); DEF_COMMAND("truncate", proc_truncate, 1, vars); + DEF_VAR(vars[0], "normalizer"); + DEF_VAR(vars[1], "string"); + DEF_COMMAND("normalize", proc_normalize, 2, vars); + DEF_VAR(vars[0], "seed"); grn_proc_create(ctx, "rand", -1, GRN_PROC_FUNCTION, func_rand, NULL, NULL, 0, vars); Added: test/command/suite/normalize/ascii.expected (+2 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalize/ascii.expected 2013-01-23 18:33:56 +0900 (948c499) @@ -0,0 +1,2 @@ +normalize NormalizerAuto "aBcDe 123" +[[0,0.0,0.0],"abcde 123"] Added: test/command/suite/normalize/ascii.test (+1 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalize/ascii.test 2013-01-23 18:33:56 +0900 (45f868b) @@ -0,0 +1 @@ +normalize NormalizerAuto "aBcDe 123" -------------- next part -------------- HTML����������������������������...Descargar