[Snowball-discuss] A problem with replacing letters

Martin Porter martin.porter at grapeshot.co.uk
Fri Jan 21 08:53:44 GMT 2005


Anna,

I've just run your script on my machine and does work correctly, that is to
say, hagyásában is stemmed to hagyása, with the final acute on the a
removed. There must be some confusion at your end. For comparison, I'm
attaching the generated stem.c (and the tar file of my work). This should be
like yours, apar from possible extern names and so on.

Martin



>Hello,
>
>I'm working on a Hungarian stemmer and I have a problem I haven't been 
>able to solve. The code is added below.  I have a routine called 
>v_ending which replaces  "a acute" and "e acute" by "a" and "e".  If I 
>simply delete them it works but when I actually try replacing instead of 
>an "a" I get an "a acute".
>For instance if I test it on the word  "hagyásában" I ought to get 
>"hagyása" (with ban removed and a acute replaced) but I get "hagyásá". 
>Similar things happen with a word like  "kimenetelében".  I suspect I am 
>missing something simple but I just can't figure out what goes wrong.
>
>Thank you
>
>Anna Tordai
>
>**************************
-------------- next part --------------

/* This file was generated automatically by the Snowball to ANSI C compiler */

#include "header.h"

extern int test_stem(struct SN_env * z);
static int r_case(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_R1(struct SN_env * z);
static int r_v_ending(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);

extern struct SN_env * test_create_env(void);
extern void test_close_env(struct SN_env * z);

static symbol s_0_0[2] = { 'c', 's' };
static symbol s_0_1[2] = { 'g', 'y' };
static symbol s_0_2[2] = { 's', 'z' };
static symbol s_0_3[2] = { 't', 'y' };

static struct among a_0[4] =
{
/*  0 */ { 2, s_0_0, -1, -1, 0},
/*  1 */ { 2, s_0_1, -1, -1, 0},
/*  2 */ { 2, s_0_2, -1, -1, 0},
/*  3 */ { 2, s_0_3, -1, -1, 0}
};

static symbol s_1_0[1] = { 225 };
static symbol s_1_1[1] = { 233 };

static struct among a_1[2] =
{
/*  0 */ { 1, s_1_0, -1, 1, 0},
/*  1 */ { 1, s_1_1, -1, 2, 0}
};

static symbol s_2_0[3] = { 'b', 'a', 'n' };
static symbol s_2_1[3] = { 'b', 'e', 'n' };

static struct among a_2[2] =
{
/*  0 */ { 3, s_2_0, -1, -1, 0},
/*  1 */ { 3, s_2_1, -1, -1, 0}
};

static unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 52, 14 };

static symbol s_0[] = { 'a' };
static symbol s_1[] = { 'e' };

static int r_mark_regions(struct SN_env * z) {
    z->I[0] = z->l;
    z->I[1] = z->l;
    {   int c = z->c; /* or, line 59 */
        while(1) { /* gopast, line 59 */
            if (!(in_grouping(z, g_v, 97, 252))) goto lab2;
            break;
        lab2:
            if (z->c >= z->l) goto lab1;
            z->c++;
        }
        {   int c_test = z->c; /* test, line 59 */
            if (!(find_among(z, a_0, 4))) goto lab1; /* substring, line 59 */
            z->c = c_test;
        }
        z->I[0] = z->c; /* setmark p1, line 59 */
        goto lab0;
    lab1:
        z->c = c;
        while(1) { /* goto, line 60 */
            int c = z->c;
            if (!(in_grouping(z, g_v, 97, 252))) goto lab3;
            z->c = c;
            break;
        lab3:
            z->c = c;
            if (z->c >= z->l) return 0;
            z->c++;
        }
        while(1) { /* gopast, line 60 */
            if (!(out_grouping(z, g_v, 97, 252))) goto lab4;
            break;
        lab4:
            if (z->c >= z->l) return 0;
            z->c++;
        }
        z->I[0] = z->c; /* setmark p1, line 60 */
    }
lab0:
    while(1) { /* goto, line 61 */
        int c = z->c;
        if (!(in_grouping(z, g_v, 97, 252))) goto lab5;
        z->c = c;
        break;
    lab5:
        z->c = c;
        if (z->c >= z->l) return 0;
        z->c++;
    }
    while(1) { /* gopast, line 61 */
        if (!(out_grouping(z, g_v, 97, 252))) goto lab6;
        break;
    lab6:
        if (z->c >= z->l) return 0;
        z->c++;
    }
    z->I[1] = z->c; /* setmark p2, line 61 */
    return 1;
}

static int r_R1(struct SN_env * z) {
    if (!(z->I[0] <= z->c)) return 0;
    return 1;
}

static int r_R2(struct SN_env * z) {
    if (!(z->I[1] <= z->c)) return 0;
    return 1;
}

static int r_v_ending(struct SN_env * z) {
    int among_var;
    z->ket = z->c; /* [, line 70 */
    among_var = find_among_b(z, a_1, 2); /* substring, line 70 */
    if (!(among_var)) return 0;
    z->bra = z->c; /* ], line 70 */
    switch(among_var) {
        case 0: return 0;
        case 1:
            slice_from_s(z, 1, s_0); /* <-, line 71 */
            break;
        case 2:
            slice_from_s(z, 1, s_1); /* <-, line 72 */
            break;
    }
    return 1;
}

static int r_case(struct SN_env * z) {
    z->ket = z->c; /* [, line 77 */
    if (!(find_among_b(z, a_2, 2))) return 0; /* substring, line 77 */
    z->bra = z->c; /* ], line 77 */
    slice_del(z); /* delete, line 81 */
    if (!r_v_ending(z)) return 0; /* call v_ending, line 82 */
    return 1;
}

extern int test_stem(struct SN_env * z) {
    {   int c = z->c; /* do, line 87 */
        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 87 */
    lab0:
        z->c = c;
    }
    z->lb = z->c; z->c = z->l; /* backwards, line 88 */

    {   int m = z->l - z->c; /* do, line 89 */
        if (!r_case(z)) goto lab1; /* call case, line 89 */
    lab1:
        z->c = z->l - m;
    }
    z->c = z->lb;
    return 1;
}

extern struct SN_env * test_create_env(void) { return SN_create_env(0, 2, 0); }

extern void test_close_env(struct SN_env * z) { SN_close_env(z); }

-------------- next part --------------
A non-text attachment was scrubbed...
Name: HUNG.TAR
Type: application/octet-stream
Size: 10240 bytes
Desc: not available
Url : http://lists.tartarus.org/mailman/private/snowball-discuss/attachments/20050121/e55cf541/HUNG.obj


More information about the Snowball-discuss mailing list