From: Philipp Takacs Date: Mon, 10 Jun 2019 01:17:36 +0000 (+0200) Subject: pick matching rework X-Git-Url: http://git.marmaro.de/?a=commitdiff_plain;h=fcaf3752d024fd5ce98f5fbd5604c53444c0a4cf;p=mmh pick matching rework the last rewrite of the matching implementation was a bit overcomplex. Now the structures are better to read and without function pointers. Also now the -not switch is working again. The early return true in the last version causes this. Now the leafs of the matching tree remeber, if the message has matched. --- diff --git a/test/tests/pick/test-case b/test/tests/pick/test-case new file mode 100644 index 0000000..fc06ef2 --- /dev/null +++ b/test/tests/pick/test-case @@ -0,0 +1,48 @@ +#!/bin/sh +###################################################### +# +# Test that the -thread option works. +# +###################################################### + +. "$MH_TEST_COMMON" + +#lower +cat > `mhpath b` << '!' +From: alice@example.org +subject: fooooo +! +lower=$(pick l) + +#upper +cat > `mhpath b` << '!' +From: alice@example.org +SUBJECT: FOOOOO +! +upper=$(pick l) + + +#mixed +cat > `mhpath b` << '!' +From: alice@example.org +sUbJEcT: FOooOo +! +mixed=$(pick l) + +runandcheck 'pick --subject fooooo' < $msgfile < $expected_out < $expected_out < $expected_err - -pick > $actual_out 2> $actual_err -diff -u $expected_err $actual_err -diff -u $expected_out $actual_out +! diff --git a/test/tests/pick/test-output-on-error b/test/tests/pick/test-output-on-error index 8d96224..07231f0 100644 --- a/test/tests/pick/test-output-on-error +++ b/test/tests/pick/test-output-on-error @@ -5,20 +5,10 @@ # ###################################################### -expected_err=$MH_TEST_DIR/$$.expected_err -expected_out=$MH_TEST_DIR/$$.expected_out -actual_err=$MH_TEST_DIR/$$.actual_err -actual_out=$MH_TEST_DIR/$$.actual_out +. "$MH_TEST_COMMON" # A zero should go to standard out to protect other programms -cat > $expected_out < $expected_err < $actual_out 2> $actual_err -diff -u $expected_err $actual_err -diff -u $expected_out $actual_out +0 +! diff --git a/test/tests/pick/test-rfc2047 b/test/tests/pick/test-rfc2047 index b89a71a..3eab253 100644 --- a/test/tests/pick/test-rfc2047 +++ b/test/tests/pick/test-rfc2047 @@ -5,28 +5,39 @@ # ###################################################### -set -e - -expected_err=$MH_TEST_DIR/$$.expected_err -expected_out=$MH_TEST_DIR/$$.expected_out -actual_err=$MH_TEST_DIR/$$.actual_err -actual_out=$MH_TEST_DIR/$$.actual_out +. "$MH_TEST_COMMON" # Test MIME-encoded header. -cat >"$MH_TEST_DIR/Mail/inbox/13" < $(mhpath b) < To: Some User Date: Fri, 29 Sep 2006 00:00:00 Message-Id: 13@test.nmh -Subject: =?us-ascii?q?=66=6f=6f?= - =?utf-8?q?=62=61=72?= +Subject: =?utf-8?q?=66=6f=6f=62=61=72?= This is message number 13, with MIME-encoded Subject "foobar". -EOF +! +lm=$(pick l) + +runandcheck 'pick -subject foobar' < $(mhpath b) < +To: Some User +Date: Fri, 29 Sep 2006 00:00:00 +Subject: =?utf-8?q?=C3=BCbung?= -echo 13 >"$expected_out" -cat /dev/null > $expected_err +This is a message, with MIME-encoded subject. +! +lm=$(pick l) -pick -subject foobar 13 > $actual_out 2> $actual_err -diff -u $expected_err $actual_err -diff -u $expected_out $actual_out +runandcheck 'pick -subject übung' < $(mhpath b) < +To: Some User +Date: Fri, 29 Sep 2006 00:00:00 +Message-Id: 13@test.nmh +Subject: fooo + +This is a test message +! +lm=$(pick l) + +runandcheck 'pick -search fooo' </dev/null +pick: no messages match specification +0 +! + +cat > $(mhpath b) < +To: Some User +Date: Fri, 29 Sep 2006 00:00:00 +Subject: bla + +This is a test message, with fooo in the body. +! +lm=$(pick l) + +runandcheck 'pick -search fooo' < $expected_err < $expected_out - -pick -a > $actual_out 2> $actual_err -diff -u $expected_err $actual_err -diff -u $expected_out $actual_out +! diff --git a/test/tests/pick/test-thread-without-msgid b/test/tests/pick/test-thread-without-msgid index 6058815..154c9c0 100644 --- a/test/tests/pick/test-thread-without-msgid +++ b/test/tests/pick/test-thread-without-msgid @@ -5,20 +5,9 @@ # ###################################################### -expected_err=$MH_TEST_DIR/$$.expected_err -expected_out=$MH_TEST_DIR/$$.expected_out -actual_err=$MH_TEST_DIR/$$.actual_err -actual_out=$MH_TEST_DIR/$$.actual_out +. "$MH_TEST_COMMON" -# All messages should be go to stdout -cat > $expected_out < $expected_err < $actual_out 2> $actual_err -diff -u $expected_err $actual_err || exit 1 -diff -u $expected_out $actual_out || exit 1 +0 +! diff --git a/uip/pick.c b/uip/pick.c index b1a946d..e76b528 100644 --- a/uip/pick.c +++ b/uip/pick.c @@ -85,12 +85,40 @@ static struct swit switches[] = { char *version=VERSION; -struct nexus { - boolean (*action)(struct field *, int, void *); - void (*free)(struct nexus **); - void (*debug)(void *, size_t); +enum nexus_type { + noop_t = 0, + not_t, + and_t, + or_t, + date_t, + grep_t +}; + +struct bin_data { + struct nexus *left; + struct nexus *right; +}; + +struct date_data { + char *datef; + boolean after; + struct tws tws; +}; - void *data; +struct grep_data { + char *header; + char *pattern; + regex_t *preg; +}; + +struct nexus { + enum nexus_type t; + boolean match; + union { + struct bin_data b; + struct date_data d; + struct grep_data g; + } data; }; static struct nexus *head; @@ -101,6 +129,12 @@ static boolean body = FALSE; */ static int pcompile(char **, char *); static int pmatches(FILE *, int); +static boolean nexus_match(struct field *, int, struct nexus *); +static void nexus_free(struct nexus **); +static void nexus_clear(struct nexus *); +static void nexus_debug(struct nexus *, size_t); +static void nexus_debug_grep(struct grep_data *); +static void print_debug_level(size_t); static struct nexus * createonethread(char *); static struct nexus * createpickthread(char *); static void scan_mbox(char *, char *, int); @@ -358,9 +392,7 @@ main(int argc, char **argv) } } - if (head) { - head->free(&head); - } + nexus_free(&head); mp->lowsel = lo; mp->hghsel = hi; @@ -415,7 +447,6 @@ scan_mbox(char *file, char *fmtstr, int width) fclose(in); } - void putzero_done() { @@ -448,7 +479,6 @@ printmsg(FILE *f, struct msgs *mp, int msgnum, char *fmtstr, int width) } } - static struct swit parswit[] = { #define PRAND 0 { "and", 0 }, @@ -519,44 +549,12 @@ static struct swit parswit[] = { #define padvise if (!talked++) advise - -enum nexus_type { - TYPE_GREP, - TYPE_DATE, - TYPE_OR, - TYPE_AND, - TYPE_NOT -}; - -struct bin_data { - struct nexus *left; - struct nexus *right; - enum nexus_type type; - int oldmsgnum; - boolean leftmatch; - boolean rightmatch; - boolean match; -}; - -struct date_data { - char *datef; - boolean after; - struct tws tws; -}; - -struct grep_data { - char *header; - char *pattern; - regex_t *preg; -}; - static int talked; static int pdebug = 0; static char *datesw; static char **argp; - /* ** prototypes for date routines */ @@ -575,19 +573,6 @@ static struct nexus *nexp2(void); static struct nexus *nexp3(void); static struct nexus *newnexus(enum nexus_type); -static boolean BINaction(struct field *, int, void *); -static boolean NOTaction(struct field *, int, void *); -static boolean GREPaction(struct field *, int, void *); -static boolean DATEaction(struct field *, int, void *); - -static void BINfree(struct nexus **); -static void GREPfree(struct nexus **); -static void DATEfree(struct nexus **); - -static void BINdebug(void *, size_t); -static void GREPdebug(void *, size_t); -static void DATEdebug(void *, size_t); - static int pcompile(char **vec, char *date) { @@ -641,8 +626,8 @@ parse(void) return NULL; case PROR: - o = newnexus(TYPE_OR); - bin = o->data; + o = newnexus(or_t); + bin = &o->data.b; bin->left = n; if ((bin->right = parse())) return o; @@ -684,8 +669,8 @@ nexp1(void) return NULL; case PRAND: - o = newnexus(TYPE_AND); - bin = o->data; + o = newnexus(and_t); + bin = &o->data.b; bin->left = n; if ((bin->right = nexp1())) return o; @@ -728,8 +713,8 @@ nexp2(void) return NULL; case PRNOT: - n = newnexus(TYPE_NOT); - bin = n->data; + n = newnexus(not_t); + bin = &n->data.b; if ((bin->left = nexp3())) return n; padvise(NULL, "missing negation"); @@ -810,16 +795,16 @@ header: ; padvise(NULL, "missing argument to %s", argp[-2]); return NULL; } - n = newnexus(TYPE_GREP); - gdata = n->data; + n = newnexus(grep_t); + gdata = &n->data.g; gdata->header = mh_xstrdup(dp); snprintf(buffer, sizeof(buffer), "%s", cp); dp = buffer; goto pattern; case PRSRCH: - n = newnexus(TYPE_GREP); - gdata = n->data; + n = newnexus(grep_t); + gdata = &n->data.g; gdata->header = NULL; body = TRUE; if (!(cp = nxtarg())) { /* allow -xyz arguments */ @@ -852,8 +837,8 @@ pattern: ; padvise(NULL, "missing argument to %s", argp[-2]); return NULL; } - n = newnexus(TYPE_DATE); - twsd = n->data; + n = newnexus(date_t); + twsd = &n->data.d; twsd->datef = datesw; if (!tcompile(cp, &twsd->tws, twsd->after = i == PRAFTR)) { padvise(NULL, "unable to parse %s %s", argp[-2], cp); @@ -868,44 +853,26 @@ static struct nexus * newnexus(enum nexus_type t) { struct nexus *p = NULL; - struct bin_data *bin; - p = mh_xcalloc(1, sizeof(struct nexus)); + p->t = t; + return p; - switch (t) { - case TYPE_NOT: - p->action = NOTaction; - p->debug = BINdebug; - p->free = BINfree; - p->data = bin = mh_xcalloc(1, sizeof(struct bin_data)); - bin->type = t; - break; - case TYPE_AND: - case TYPE_OR: - p->action = BINaction; - p->debug = BINdebug; - p->free = BINfree; - p->data = bin = mh_xcalloc(1, sizeof(struct bin_data)); - bin->type = t; - break; - case TYPE_GREP: - p->action = GREPaction; - p->debug = GREPdebug; - p->free = GREPfree; - p->data = mh_xcalloc(1, sizeof(struct grep_data)); - break; - case TYPE_DATE: - p->action = DATEaction; - p->debug = DATEdebug; - p->free = DATEfree; - p->data = mh_xcalloc(1, sizeof(struct date_data)); +} + +static void nexus_clear(struct nexus *n) +{ + n->match = FALSE; + switch(n->t) { + case and_t: + case or_t: + nexus_clear(n->data.b.right); + /* FALL */ + case not_t: + nexus_clear(n->data.b.left); break; default: - adios(EX_SOFTWARE, NULL, "unknown nexus type %d", t); + break; } - - return p; - } static int @@ -914,11 +881,13 @@ pmatches(FILE *fp, int msgnum) struct field f = {{0}}; enum state s = FLD2; + if (!head) return 1; - if (!talked++ && pdebug && head->debug) { - head->debug(head->data, 0); + nexus_clear(head); + if (!talked++ && pdebug) { + nexus_debug(head, 0); } while (s == FLD2 || s == BODY2) { @@ -927,167 +896,29 @@ pmatches(FILE *fp, int msgnum) s = FLD2; /* FALL */ case FLD2: - if (head->action(&f, msgnum, head->data)) { - return TRUE; - } + nexus_match(&f, msgnum, head); break; case BODY2: if (!body) { - return FALSE; - } - if (head->action(&f, msgnum, head->data)) { - return TRUE; + return head->match; } + nexus_match(&f, msgnum, head); break; case IOERR2: advise(NULL, "IOERR in message %d\n", msgnum); return FALSE; case FILEEOF2: - return FALSE; + break; default: adios(EX_SOFTWARE, "m_getfld2", "returned unknown state %d at message %d", s, msgnum); } } - return FALSE; -} - -void -print_debug_level(size_t level) -{ - size_t i; - - for (i = 0; i < level; i++) { - fputs("| ", stderr); - } -} - -void -BINdebug(void *data, size_t level) -{ - struct bin_data *bd = data; - - print_debug_level(level); - - switch (bd->type) { - case TYPE_OR: - fputs("OR\n", stderr); - break; - case TYPE_AND: - fputs("AND\n", stderr); - break; - case TYPE_NOT: - fputs("NOT\n", stderr); - break; - default: - advise(NULL, "binary nexus has unknown type: %d\n", bd->type); - return; - } - - if (bd->left && bd->left->debug) { - bd->left->debug(bd->left->data, level+1); - } else { - print_debug_level(level+1); - fputs("can't debug left child\n", stderr); - } - - if (bd->right && bd->right->debug) { - bd->right->debug(bd->right->data, level+1); - } else if (bd->type != TYPE_NOT) { - print_debug_level(level+1); - fputs("can't debug right child\n", stderr); - } -} - -static boolean -NOTaction(struct field *f, int msgnum, void *data) -{ - struct bin_data *bin = data; - return !bin->left->action(f, msgnum, bin->left->data); -} - -static boolean -BINaction(struct field *f, int msgnum, void *data) -{ - struct bin_data *bin = data; - - if (bin->oldmsgnum != msgnum) { - bin->oldmsgnum = msgnum; - bin->match = FALSE; - bin->leftmatch = FALSE; - bin->rightmatch = FALSE; - } - - if (bin->match) { - return bin->match; - } - - bin->leftmatch = bin->leftmatch || bin->left->action(f, msgnum, bin->left->data); - bin->rightmatch = bin->rightmatch || bin->right->action(f, msgnum, bin->right->data); - - switch (bin->type) { - case TYPE_OR: - bin->match = bin->leftmatch || bin->rightmatch; - break; - case TYPE_AND: - bin->match = bin->leftmatch && bin->rightmatch; - break; - default: - adios(EX_SOFTWARE, NULL, "unknown nexus type: %d\n", bin->type); - } - - return bin->match; -} - -static void -BINfree(struct nexus **n) -{ - struct bin_data *bd; - - if (!(*n)) { - return; - } - - bd = (*n)->data; - - if (bd->left && bd->left->free) { - bd->left->free(&bd->left); - } else { - advise(NULL, "BUG: can't free left child"); - } - - if (bd->right && bd->right->free) { - bd->right->free(&bd->right); - } else { - advise(NULL, "BUG: can't free right child"); - } - - mh_free0(n); -} - -static int -gcompile(struct grep_data *g, const char *astr) -{ - regex_t *preg = mh_xcalloc(1, sizeof(regex_t)); - char *buf; - int ret; - - g->preg = preg; - g->pattern = mh_xstrdup(astr); - ret = regcomp(preg, astr, REG_ICASE | REG_NOSUB); - if (ret != 0) { - buf = mh_xcalloc(BUFSIZ, sizeof(char)); - regerror(ret, g->preg, buf, BUFSIZ*sizeof(char)); - fprintf(stderr, "%s\n", buf); - return FALSE; - } - return TRUE; - + return head->match; } static boolean -GREPaction(struct field *f, int msgnum, void *data) +match_grep(struct field *f, struct grep_data *g) { - struct grep_data *g = data; int ret; char buf[BUFSIZ]; @@ -1095,16 +926,22 @@ GREPaction(struct field *f, int msgnum, void *data) return FALSE; } + if (!g->header) { + ret = regexec(g->preg, f->value, 0, NULL, 0); + goto out; + } + /* check for the right field */ if (!(g->header && *g->header && mh_strcasecmp(g->header, f->name)==0)) { return FALSE; } - if(decode_rfc2047(f->value, buf, sizeof(buf))) { + if (decode_rfc2047(f->value, buf, sizeof(buf))) { ret = regexec(g->preg, buf, 0, NULL, 0); } else { ret = regexec(g->preg, f->value, 0, NULL, 0); } +out: switch (ret) { case 0: return TRUE; @@ -1115,26 +952,99 @@ GREPaction(struct field *f, int msgnum, void *data) fprintf(stderr, "%s\n", buf); return FALSE; } +} + +static boolean +match_date(struct field *f, int msgnum, struct date_data *dd) +{ + struct tws *tw; + char *bp; + boolean ret = FALSE; + + if (mh_strcasecmp(f->name, dd->datef)!=0) { + return FALSE; + } + bp = mh_xstrdup(f->value); + if ((tw = dparsetime(bp)) == NULL) { + advise(NULL, "unable to parse %s field in message %d, not matching...", dd->datef, msgnum); + } else if (dd->after) { + ret = twsort(tw, &dd->tws) > 0; + } else { + ret = twsort(tw, &dd->tws) < 0; + } + + mh_free0(&bp); + return ret; +} +static boolean +nexus_match(struct field *f, int msgnum, struct nexus *n) +{ + switch (n->t) { + case and_t: + n->match = nexus_match(f, msgnum, n->data.b.left); + n->match = nexus_match(f, msgnum, n->data.b.right) && n->match; + break; + case or_t: + n->match = nexus_match(f, msgnum, n->data.b.left); + n->match = nexus_match(f, msgnum, n->data.b.right) || n->match; + break; + case not_t: + n->match = !nexus_match(f, msgnum, n->data.b.left); + break; + case date_t: + if (n->match) { + return n->match; + } + n->match = match_date(f, msgnum, &n->data.d); + break; + case grep_t: + if (n->match) { + return n->match; + } + n->match = match_grep(f, &n->data.g); + break; + default: + adios(EX_SOFTWARE, NULL, "nexus tree contains a unknown nexus_type (%d)", n->t); + } + return n->match; } static void -GREPfree(struct nexus **n) +nexus_debug(struct nexus *n, size_t level) { - struct grep_data *gd; - if (!(*n)) { - return; + struct date_data *dd; + print_debug_level(level); + switch (n->t) { + case and_t: + fputs("AND\n", stderr); + nexus_debug(n->data.b.left, level+1); + nexus_debug(n->data.b.right, level+1); + break; + case or_t: + fputs("OR\n", stderr); + nexus_debug(n->data.b.left, level+1); + nexus_debug(n->data.b.right, level+1); + break; + case not_t: + fputs("NOT\n", stderr); + nexus_debug(n->data.b.left, level+1); + break; + case grep_t: + nexus_debug_grep(&n->data.g); + break; + case date_t: + dd = &n->data.d; + fprintf(stderr, "TEMPORAL(%s) %s: %s\n",dd->after ? "after" : "before", dd->datef, dasctime(&dd->tws)); + break; + default: + adios(EX_SOFTWARE, NULL, "nexus tree contains a unknown nexus_type (%d)", n->t); } - gd = (*n)->data; - mh_free0(&gd->header); - regfree(gd->preg); - mh_free0(n); } static void -GREPdebug(void *data, size_t level) +nexus_debug_grep(struct grep_data *gd) { - struct grep_data *gd = data; char *buf, *buf2, *pbuf, *pbuf2; pbuf = pbuf2 = mh_xstrdup(gd->pattern); @@ -1143,8 +1053,6 @@ GREPdebug(void *data, size_t level) *pbuf2 = tolower(*pbuf2); } - print_debug_level(level); - if (gd->header) { buf = buf2 = mh_xstrdup(gd->header); for (;*buf2; buf2++) { @@ -1158,6 +1066,62 @@ GREPdebug(void *data, size_t level) mh_free0(&pbuf); } +static void +nexus_free(struct nexus **n) +{ + if (!(*n)) { + return; + } + switch((*n)->t) { + case and_t: + case or_t: + nexus_free(&(*n)->data.b.right); + /* FALL */ + case not_t: + nexus_free(&(*n)->data.b.left); + break; + case grep_t: + mh_free0(&(*n)->data.g.header); + mh_free0(&(*n)->data.g.pattern); + regfree((*n)->data.g.preg); + case date_t: + break; + default: + advise(NULL, "Unknown nexus_type (%d) to free", (*n)->t); + } + mh_free0(n); +} + +static void +print_debug_level(size_t level) +{ + size_t i; + + for (i = 0; i < level; i++) { + fputs("| ", stderr); + } +} + +static int +gcompile(struct grep_data *g, const char *astr) +{ + regex_t *preg = mh_xcalloc(1, sizeof(regex_t)); + char *buf; + int ret; + + g->preg = preg; + g->pattern = mh_xstrdup(astr); + ret = regcomp(preg, astr, REG_ICASE | REG_NOSUB); + if (ret != 0) { + buf = mh_xcalloc(BUFSIZ, sizeof(char)); + regerror(ret, g->preg, buf, BUFSIZ*sizeof(char)); + fprintf(stderr, "%s\n", buf); + return FALSE; + } + return TRUE; + +} + static int tcompile(char *ap, struct tws *tb, int isafter) { @@ -1251,52 +1215,6 @@ tws_special(char *ap) } -static boolean -DATEaction(struct field *f, int msgnum, void *data) -{ - struct date_data *dd = data; - boolean state = FALSE; - char *bp; - struct tws *tw; - - if (mh_strcasecmp(f->name, dd->datef)!=0) { - return FALSE; - } - bp = mh_xstrdup(f->value); - if ((tw = dparsetime(bp)) == NULL) { - advise(NULL, "unable to parse %s field in message %d, not matching...", dd->datef, msgnum); - state = FALSE; - } else if (dd->after) { - state = twsort(tw, &dd->tws) > 0; - } else { - state = twsort(tw, &dd->tws) < 0; - } - - mh_free0(&bp); - - return state; -} - -static void -DATEfree(struct nexus **n) -{ - struct date_data *dd; - if (!(*n)) { - return; - } - dd = (*n)->data; - - mh_free0(n); -} - -static void -DATEdebug(void *data, size_t level) -{ - struct date_data *dd = data; - print_debug_level(level); - fprintf(stderr, "TEMPORAL(%s) %s: %s\n",dd->after ? "after" : "before", dd->datef, dasctime(&dd->tws)); -} - static struct nexus * createpickthread(char *msgs) { @@ -1343,8 +1261,8 @@ createpickthread(char *msgs) } - or = newnexus(TYPE_OR); - bd = or->data; + or = newnexus(or_t); + bd = &or->data.b; bd->right = ret; bd->left = c; ret = or; @@ -1359,28 +1277,26 @@ createpickthread(char *msgs) static struct nexus * createonethread(char *c) { - struct nexus *ret = newnexus(TYPE_OR); - struct nexus *left = newnexus(TYPE_GREP); - struct nexus *right = newnexus(TYPE_GREP); - struct bin_data *bd = ret->data; - struct grep_data *gd = left->data; + struct nexus *ret = newnexus(or_t); + struct nexus *left = newnexus(grep_t); + struct nexus *right = newnexus(grep_t); char buf[BUFSIZ]; - bd->left = left; - bd->right = right; - gd->header = mh_xstrdup("message-id"); + ret->data.b.left = left; + ret->data.b.right = right; + left->data.g.header = mh_xstrdup("message-id"); + snprintf(buf, sizeof(buf), "^[ \t]*<%s>", c); - if(!gcompile(gd, buf)) { + if(!gcompile(&left->data.g, buf)) { padvise(NULL, "pattern error %s", c); goto error; } - gd = right->data; - gd->header = mh_xstrdup("references"); + right->data.g.header = mh_xstrdup("references"); snprintf(buf, sizeof(buf), "^[ \t]*<%s>", c); - if(!gcompile(gd, buf)) { + if(!gcompile(&right->data.g, buf)) { padvise(NULL, "pattern error in %s", c); goto error; } @@ -1388,9 +1304,7 @@ createonethread(char *c) return ret; error: - GREPfree(&left); - GREPfree(&right); - BINfree(&ret); + nexus_free(&ret); return NULL; }