2 ** pick.c -- search for messages by content
4 ** This code is Copyright (c) 2002, 2008, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #ifdef HAVE_SYS_TIME_H
16 # include <sys/time.h>
20 static struct swit switches[] = {
34 { "date pattern", 0 },
36 { "from pattern", 0 },
38 { "search pattern", 0 },
40 { "subject pattern", 0 },
44 { "-othercomponent pattern", 0 },
50 { "datefield field", 5 }, /* 5 chars required to differ from -date */
52 { "sequence name", 0 },
75 static int pcompile(char **, char *);
76 static int pmatches(FILE *, int, long, long);
79 static int listsw = -1;
84 main(int argc, char **argv)
86 int publicsw = -1, zerosw = 1, vecp = 0;
87 unsigned int seqp = 0;
89 char *maildir, *folder = NULL, buf[100];
90 char *cp, **argp, **arguments;
91 char *seqs[NUMATTRS + 1], *vec[MAXARGS];
92 struct msgs_array msgs = { 0, 0, NULL };
96 if (atexit(putzero_done) != 0) {
97 adios(NULL, "atexit failed");
100 setlocale(LC_ALL, "");
101 invo_name = mhbasename(argv[0]);
103 /* read user profile/context */
106 arguments = getarguments(invo_name, argc, argv, 1);
109 while ((cp = *argp++)) {
115 switch (smatch(cp, switches)) {
117 ambigsw(cp, switches);
118 listsw = 0; /* HACK */
121 adios(NULL, "-%s unknown", cp);
124 snprintf(buf, sizeof(buf), "%s [+folder] [msgs] [switches]", invo_name);
125 print_help(buf, switches, 1);
126 listsw = 0; /* HACK */
129 print_version(invo_name);
130 listsw = 0; /* HACK */
144 if (!(cp = *argp++)) /* allow -xyz arguments */
145 adios(NULL, "missing argument to %s",
150 adios(NULL, "internal error!");
161 if (!(cp = *argp++) || *cp == '-')
162 adios(NULL, "missing argument to %s",
165 /* check if too many sequences specified */
166 if (seqp >= NUMATTRS)
167 adios(NULL, "too many sequences (more than %d) specified", NUMATTRS);
195 if (*cp == '+' || *cp == '@') {
197 adios(NULL, "only one folder at a time!");
199 folder = getcpy(expandfol(cp));
201 app_msgarg(&msgs, cp);
206 ** If we didn't specify which messages to search,
207 ** then search the whole folder.
210 app_msgarg(&msgs, seq_all);
213 folder = getcurfol();
214 maildir = toabsdir(folder);
216 if (chdir(maildir) == NOTOK)
217 adios(maildir, "unable to change directory to");
219 /* read folder and create message structure */
220 if (!(mp = folder_read(folder)))
221 adios(NULL, "unable to read folder %s", folder);
223 /* check for empty folder */
225 adios(NULL, "no messages in %s", folder);
227 /* parse all the message ranges/sequences and set SELECTED */
228 for (msgnum = 0; msgnum < msgs.size; msgnum++)
229 if (!m_convert(mp, msgs.msgs[msgnum]))
231 seq_setprev(mp); /* set the previous-sequence */
234 ** If we aren't saving the results to a sequence,
235 ** we default to list the results.
240 if (publicsw == 1 && is_readonly(mp))
241 adios(NULL, "folder %s is read-only, so -public not allowed",
244 if (!pcompile(vec, NULL))
251 ** If printing message numbers to standard out,
252 ** force line buffering on.
255 setvbuf(stdout, NULL, _IOLBF, 0);
258 ** Scan through all the SELECTED messages and check for a
259 ** match. If the message does not match, then unselect it.
261 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
262 if (is_selected(mp, msgnum)) {
263 if ((fp = fopen(cp = m_name(msgnum), "r")) == NULL)
264 admonish(cp, "unable to read message");
265 if (fp && pmatches(fp, msgnum, 0L, 0L)) {
272 printf("%s\n", m_name(msgnum));
274 /* if it doesn't match, then unselect it */
275 unset_selected(mp, msgnum);
287 adios(NULL, "no messages match specification");
292 ** Add the matching messages to sequences
294 for (seqp = 0; seqs[seqp]; seqp++)
295 if (!seq_addsel(mp, seqs[seqp], publicsw, zerosw))
299 ** Print total matched if not printing each matched message number.
302 printf("%d hit%s\n", mp->numsel, mp->numsel == 1 ? "" : "s");
305 context_replace(curfolder, folder); /* update current folder */
306 seq_save(mp); /* synchronize message sequences */
307 context_save(); /* save the context file */
308 folder_free(mp); /* free folder/message structure */
309 listsw = 0; /* HACK */
317 if (listsw && !isatty(fileno(stdout)))
322 static struct swit parswit[] = {
336 { "date pattern", 0 },
338 { "from pattern", 0 },
340 { "search pattern", 0 },
342 { "subject pattern", 0 },
346 { "-othercomponent pattern", 15 },
350 { "before date", 0 },
352 { "datefield field", 5 },
356 /* DEFINITIONS FOR PATTERN MATCHING */
359 ** We really should be using re_comp() and re_exec() here. Unfortunately,
360 ** pick advertises that lowercase characters matches characters of both
361 ** cases. Since re_exec() doesn't exhibit this behavior, we are stuck
362 ** with this version. Furthermore, we need to be able to save and restore
363 ** the state of the pattern matcher in order to do things "efficiently".
365 ** The matching power of this algorithm isn't as powerful as the re_xxx()
366 ** routines (no \(xxx\) and \n constructs). Such is life.
382 static char linebuf[LBSIZE + 1];
384 /* the magic array for case-independence */
386 0000,0001,0002,0003,0004,0005,0006,0007,
387 0010,0011,0012,0013,0014,0015,0016,0017,
388 0020,0021,0022,0023,0024,0025,0026,0027,
389 0030,0031,0032,0033,0034,0035,0036,0037,
390 0040,0041,0042,0043,0044,0045,0046,0047,
391 0050,0051,0052,0053,0054,0055,0056,0057,
392 0060,0061,0062,0063,0064,0065,0066,0067,
393 0070,0071,0072,0073,0074,0075,0076,0077,
394 0100,0141,0142,0143,0144,0145,0146,0147,
395 0150,0151,0152,0153,0154,0155,0156,0157,
396 0160,0161,0162,0163,0164,0165,0166,0167,
397 0170,0171,0172,0133,0134,0135,0136,0137,
398 0140,0141,0142,0143,0144,0145,0146,0147,
399 0150,0151,0152,0153,0154,0155,0156,0157,
400 0160,0161,0162,0163,0164,0165,0166,0167,
401 0170,0171,0172,0173,0174,0175,0176,0177,
403 0200,0201,0202,0203,0204,0205,0206,0207,
404 0210,0211,0212,0213,0214,0215,0216,0217,
405 0220,0221,0222,0223,0224,0225,0226,0227,
406 0230,0231,0232,0233,0234,0235,0236,0237,
407 0240,0241,0242,0243,0244,0245,0246,0247,
408 0250,0251,0252,0253,0254,0255,0256,0257,
409 0260,0261,0262,0263,0264,0265,0266,0267,
410 0270,0271,0272,0273,0274,0275,0276,0277,
411 0300,0301,0302,0303,0304,0305,0306,0307,
412 0310,0311,0312,0313,0314,0315,0316,0317,
413 0320,0321,0322,0323,0324,0325,0326,0327,
414 0330,0331,0332,0333,0334,0335,0336,0337,
415 0340,0341,0342,0343,0344,0345,0346,0347,
416 0350,0351,0352,0353,0354,0355,0356,0357,
417 0360,0361,0362,0363,0364,0365,0366,0367,
418 0370,0371,0372,0373,0374,0375,0376,0377,
422 ** DEFINITIONS FOR NEXUS
425 #define nxtarg() (*argp ? *argp++ : NULL)
426 #define prvarg() argp--
428 #define padvise if (!talked++) advise
434 /* for {OR,AND,NOT}action */
436 struct nexus *un_L_child;
437 struct nexus *un_R_child;
444 char un_expbuf[ESIZE];
457 #define n_L_child un.st1.un_L_child
458 #define n_R_child un.st1.un_R_child
460 #define n_header un.st2.un_header
461 #define n_circf un.st2.un_circf
462 #define n_expbuf un.st2.un_expbuf
463 #define n_patbuf un.st2.un_patbuf
465 #define n_datef un.st3.un_datef
466 #define n_after un.st3.un_after
467 #define n_tws un.st3.un_tws
470 static int pdebug = 0;
475 static struct nexus *head;
478 ** prototypes for date routines
480 static struct tws *tws_parse(char *, int);
481 static struct tws *tws_special(char *);
486 static void PRaction(struct nexus *, int);
487 static int gcompile(struct nexus *, char *);
488 static int advance(char *, char *);
489 static int cclass(unsigned char *, int, int);
490 static int tcompile(char *, struct tws *, int);
492 static struct nexus *parse(void);
493 static struct nexus *nexp1(void);
494 static struct nexus *nexp2(void);
495 static struct nexus *nexp3(void);
496 static struct nexus *newnexus(int (*)());
498 static int ORaction();
499 static int ANDaction();
500 static int NOTaction();
501 static int GREPaction();
502 static int TWSaction();
506 pcompile(char **vec, char *date)
510 if ((cp = getenv("MHPDEBUG")) && *cp)
514 if ((datesw = date) == NULL)
518 if ((head = parse()) == NULL)
519 return (talked ? 0 : 1);
522 padvise(NULL, "%s unexpected", *argp);
530 static struct nexus *
534 register struct nexus *n, *o;
536 if ((n = nexp1()) == NULL || (cp = nxtarg()) == NULL)
540 padvise(NULL, "%s unexpected", cp);
546 switch (smatch(cp, parswit)) {
548 ambigsw(cp, parswit);
552 fprintf(stderr, "-%s unknown\n", cp);
557 o = newnexus(ORaction);
559 if ((o->n_R_child = parse()))
561 padvise(NULL, "missing disjunctive");
571 static struct nexus *
575 register struct nexus *n, *o;
577 if ((n = nexp2()) == NULL || (cp = nxtarg()) == NULL)
581 padvise(NULL, "%s unexpected", cp);
587 switch (smatch(cp, parswit)) {
589 ambigsw(cp, parswit);
593 fprintf(stderr, "-%s unknown\n", cp);
598 o = newnexus(ANDaction);
600 if ((o->n_R_child = nexp1()))
602 padvise(NULL, "missing conjunctive");
613 static struct nexus *
617 register struct nexus *n;
619 if ((cp = nxtarg()) == NULL)
629 switch (smatch(cp, parswit)) {
631 ambigsw(cp, parswit);
635 fprintf(stderr, "-%s unknown\n", cp);
640 n = newnexus(NOTaction);
641 if ((n->n_L_child = nexp3()))
643 padvise(NULL, "missing negation");
653 static struct nexus *
657 register char *cp, *dp;
658 char buffer[BUFSIZ], temp[64];
659 register struct nexus *n;
661 if ((cp = nxtarg()) == NULL)
665 padvise(NULL, "%s unexpected", cp);
673 switch (i = smatch(cp, parswit)) {
675 ambigsw(cp, parswit);
679 fprintf(stderr, "-%s unknown\n", cp);
684 if ((n = parse()) == NULL) {
685 padvise(NULL, "missing group");
688 if ((cp = nxtarg()) == NULL) {
689 padvise(NULL, "missing -rbrace");
692 if (*cp++ == '-' && smatch(cp, parswit) == PRRBR)
694 padvise(NULL, "%s unexpected", --cp);
706 strncpy(temp, parswit[i].sw, sizeof(temp));
707 temp[sizeof(temp) - 1] = '\0';
708 dp = *brkstring(temp, " ", NULL);
710 if (!(cp = nxtarg())) { /* allow -xyz arguments */
711 padvise(NULL, "missing argument to %s", argp[-2]);
714 n = newnexus(GREPaction);
716 snprintf(buffer, sizeof(buffer), "^%s[ \t]*:.*%s", dp, cp);
721 n = newnexus(GREPaction);
723 if (!(cp = nxtarg())) { /* allow -xyz arguments */
724 padvise(NULL, "missing argument to %s", argp[-2]);
729 if (!gcompile(n, dp)) {
730 padvise(NULL, "pattern error in %s %s", argp[-2], cp);
733 n->n_patbuf = getcpy(dp);
737 padvise(NULL, "internal error!");
741 if (!(datesw = nxtarg()) || *datesw == '-') {
742 padvise(NULL, "missing argument to %s",
750 if (!(cp = nxtarg())) { /* allow -xyz arguments */
751 padvise(NULL, "missing argument to %s", argp[-2]);
754 n = newnexus(TWSaction);
756 if (!tcompile(cp, &n->n_tws, n->n_after = i == PRAFTR)) {
757 padvise(NULL, "unable to parse %s %s", argp[-2], cp);
765 static struct nexus *
766 newnexus(int (*action)())
768 register struct nexus *p;
770 if ((p = (struct nexus *) calloc((size_t) 1, sizeof *p)) == NULL)
771 adios(NULL, "unable to allocate component storage");
773 p->n_action = action;
778 #define args(a) a, fp, msgnum, start, stop
779 #define params args(n)
781 register struct nexus *n; \
788 pmatches(FILE *fp, int msgnum, long start, long stop)
793 if (!talked++ && pdebug)
796 return (*head->n_action) (args(head));
801 PRaction(struct nexus *n, int level)
805 for (i = 0; i < level; i++)
806 fprintf(stderr, "| ");
808 if (n->n_action == ORaction) {
809 fprintf(stderr, "OR\n");
810 PRaction(n->n_L_child, level + 1);
811 PRaction(n->n_R_child, level + 1);
814 if (n->n_action == ANDaction) {
815 fprintf(stderr, "AND\n");
816 PRaction(n->n_L_child, level + 1);
817 PRaction(n->n_R_child, level + 1);
820 if (n->n_action == NOTaction) {
821 fprintf(stderr, "NOT\n");
822 PRaction(n->n_L_child, level + 1);
825 if (n->n_action == GREPaction) {
826 fprintf(stderr, "PATTERN(%s) %s\n",
827 n->n_header ? "header" : "body", n->n_patbuf);
830 if (n->n_action == TWSaction) {
831 fprintf(stderr, "TEMPORAL(%s) %s: %s\n",
832 n->n_after ? "after" : "before", n->n_datef,
833 dasctime(&n->n_tws));
836 fprintf(stderr, "UNKNOWN(0x%x)\n",
837 (unsigned int)(unsigned long) (*n->n_action));
845 if ((*n->n_L_child->n_action) (args(n->n_L_child)))
847 return (*n->n_R_child->n_action) (args(n->n_R_child));
855 if (!(*n->n_L_child->n_action) (args(n->n_L_child)))
857 return (*n->n_R_child->n_action) (args(n->n_R_child));
865 return (!(*n->n_L_child->n_action) (args(n->n_L_child)));
870 gcompile(struct nexus *n, char *astr)
874 register unsigned char *ep, *dp, *sp, *lastep = 0;
876 dp = (ep = n->n_expbuf) + sizeof n->n_expbuf;
887 if ((c = *sp++) != '*')
914 if ((c = *sp++) == '^') {
924 if (c == '-' && *sp != '\0' && *sp != ']') {
925 for (c = ep[-1]+1; c < *sp; c++) {
928 if (c == '\0' || ep >= dp)
934 if (c == '\0' || ep >= dp)
937 } while ((c = *sp++) != ']');
944 if ((c = *sp++) == '\0')
964 register char *p1, *p2, *ebp, *cbp;
967 fseek(fp, start, SEEK_SET);
971 if (body && n->n_header)
978 if (fgets(ibuf, sizeof ibuf, fp) == NULL
979 || (stop && pos >= stop)) {
984 pos += (long) strlen(ibuf);
986 ebp = ibuf + strlen(ibuf);
989 if (lf && c != '\n') {
990 if (c != ' ' && c != '\t') {
1009 if (c && p1 < &linebuf[LBSIZE - 1])
1019 if (advance(p1, p2))
1027 if (*p1 == c || cc[(unsigned char)*p1] == c)
1028 if (advance(p1, p2))
1035 if (advance(p1, p2))
1043 advance(char *alp, char *aep)
1045 register unsigned char *lp, *ep, *curlp;
1047 lp = (unsigned char *)alp;
1048 ep = (unsigned char *)aep;
1052 if (*ep++ == *lp++ || ep[-1] == cc[lp[-1]])
1070 if (cclass(ep, *lp++, 1)) {
1077 if (cclass(ep, *lp++, 0)) {
1091 while (*lp++ == *ep || cc[lp[-1]] == *ep)
1099 while (cclass(ep, *lp++, ep[-1] == (CCL | STAR)))
1107 if (advance(lp, ep))
1109 } while (lp > curlp);
1113 admonish(NULL, "advance() botch -- you lose big");
1120 cclass(unsigned char *aset, int ac, int af)
1122 register unsigned int n;
1123 register unsigned char c, *set;
1131 if (*set++ == c || set[-1] == cc[c])
1139 tcompile(char *ap, struct tws *tb, int isafter)
1141 register struct tws *tw;
1143 if ((tw = tws_parse(ap, isafter)) == NULL)
1152 tws_parse(char *ap, int isafter)
1154 char buffer[BUFSIZ];
1155 register struct tws *tw, *ts;
1157 if ((tw = tws_special(ap)) != NULL) {
1158 tw->tw_sec = tw->tw_min = isafter ? 59 : 0;
1159 tw->tw_hour = isafter ? 23 : 0;
1162 if ((tw = dparsetime(ap)) != NULL)
1165 if ((ts = dlocaltimenow()) == NULL)
1168 snprintf(buffer, sizeof(buffer), "%s %s", ap, dtwszone(ts));
1169 if ((tw = dparsetime(buffer)) != NULL)
1172 snprintf(buffer, sizeof(buffer), "%s %02d:%02d:%02d %s", ap,
1173 ts->tw_hour, ts->tw_min, ts->tw_sec, dtwszone(ts));
1174 if ((tw = dparsetime(buffer)) != NULL)
1177 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s",
1178 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year, ap);
1179 if ((tw = dparsetime(buffer)) != NULL)
1182 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s %s",
1183 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year,
1185 if ((tw = dparsetime(buffer)) != NULL)
1193 tws_special(char *ap)
1197 register struct tws *tw;
1200 if (!mh_strcasecmp(ap, "today"))
1201 return dlocaltime(&clock);
1202 if (!mh_strcasecmp(ap, "yesterday")) {
1203 clock -= (long) (60 * 60 * 24);
1204 return dlocaltime(&clock);
1206 if (!mh_strcasecmp(ap, "tomorrow")) {
1207 clock += (long) (60 * 60 * 24);
1208 return dlocaltime(&clock);
1211 for (i = 0; tw_ldotw[i]; i++)
1212 if (!mh_strcasecmp(ap, tw_ldotw[i]))
1215 if ((tw = dlocaltime(&clock)) == NULL)
1217 if ((i -= tw->tw_wday) > 0)
1223 else /* -ddd days ago */
1224 i = atoi(ap); /* we should error check this */
1226 clock += (long) ((60 * 60 * 24) * i);
1227 return dlocaltime(&clock);
1237 char buf[BUFSIZ], name[NAMESZ];
1238 register struct tws *tw;
1240 fseek(fp, start, SEEK_SET);
1241 for (state = FLD, bp = NULL;;) {
1242 switch (state = m_getfld(state, name, buf, sizeof buf, fp)) {
1251 while (state == FLDPLUS) {
1252 state = m_getfld(state, name, buf,
1256 if (!mh_strcasecmp(name, n->n_datef))
1258 if (state != FLDEOF)
1266 if (state == LENERR || state == FMTERR)
1267 advise(NULL, "format error in message %d", msgnum);
1273 adios(NULL, "internal error -- you lose");
1278 if ((tw = dparsetime(bp)) == NULL)
1279 advise(NULL, "unable to parse %s field in message %d, matching...",
1280 n->n_datef, msgnum), state = 1;
1282 state = n->n_after ? (twsort(tw, &n->n_tws) > 0)
1283 : (twsort(tw, &n->n_tws) < 0);