2 ** pick.c -- search for messages by content
4 ** This code is Copyright (c) 2002, 2008, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
16 #ifdef HAVE_SYS_TIME_H
17 # include <sys/time.h>
21 static struct swit switches[] = {
35 { "date pattern", 0 },
37 { "from pattern", 0 },
39 { "search pattern", 0 },
41 { "subject pattern", 0 },
45 { "-othercomponent pattern", 0 },
51 { "datefield field", 5 }, /* 5 chars required to differ from -date */
53 { "sequence name", 0 },
76 static int pcompile(char **, char *);
77 static int pmatches(FILE *, int, long, long);
80 static int listsw = -1;
85 main(int argc, char **argv)
87 int publicsw = -1, zerosw = 1, vecp = 0;
88 unsigned int seqp = 0;
90 char *maildir, *folder = NULL, buf[100];
91 char *cp, **argp, **arguments;
92 char *seqs[NUMATTRS + 1], *vec[MAXARGS];
93 struct msgs_array msgs = { 0, 0, NULL };
97 if (atexit(putzero_done) != 0) {
98 adios(EX_OSERR, NULL, "atexit failed");
101 setlocale(LC_ALL, "");
102 invo_name = mhbasename(argv[0]);
104 /* read user profile/context */
107 arguments = getarguments(invo_name, argc, argv, 1);
110 while ((cp = *argp++)) {
116 switch (smatch(cp, switches)) {
118 ambigsw(cp, switches);
119 listsw = 0; /* HACK */
122 adios(EX_USAGE, NULL, "-%s unknown", cp);
125 snprintf(buf, sizeof(buf), "%s [+folder] [msgs] [switches]", invo_name);
126 print_help(buf, switches, 1);
127 listsw = 0; /* HACK */
128 exit(argc == 2 ? EX_OK : EX_USAGE);
130 print_version(invo_name);
131 listsw = 0; /* HACK */
132 exit(argc == 2 ? EX_OK : EX_USAGE);
145 if (!(cp = *argp++)) /* allow -xyz arguments */
146 adios(EX_USAGE, NULL, "missing argument to %s",
151 adios(EX_SOFTWARE, NULL, "internal error!");
162 if (!(cp = *argp++) || *cp == '-')
163 adios(EX_USAGE, NULL, "missing argument to %s",
166 /* check if too many sequences specified */
167 if (seqp >= NUMATTRS)
168 adios(EX_USAGE, NULL, "too many sequences (more than %d) specified", NUMATTRS);
196 if (*cp == '+' || *cp == '@') {
198 adios(EX_USAGE, NULL, "only one folder at a time!");
200 folder = getcpy(expandfol(cp));
202 app_msgarg(&msgs, cp);
207 ** If we didn't specify which messages to search,
208 ** then search the whole folder.
211 app_msgarg(&msgs, seq_all);
214 folder = getcurfol();
215 maildir = toabsdir(folder);
217 if (chdir(maildir) == NOTOK)
218 adios(EX_OSERR, maildir, "unable to change directory to");
220 /* read folder and create message structure */
221 if (!(mp = folder_read(folder)))
222 adios(EX_IOERR, NULL, "unable to read folder %s", folder);
224 /* check for empty folder */
226 adios(EX_DATAERR, NULL, "no messages in %s", folder);
228 /* parse all the message ranges/sequences and set SELECTED */
229 for (msgnum = 0; msgnum < msgs.size; msgnum++)
230 if (!m_convert(mp, msgs.msgs[msgnum]))
232 seq_setprev(mp); /* set the previous-sequence */
235 ** If we aren't saving the results to a sequence,
236 ** we default to list the results.
241 if (publicsw == 1 && is_readonly(mp))
242 adios(EX_NOPERM, NULL, "folder %s is read-only, so -public not allowed",
245 if (!pcompile(vec, NULL))
252 ** If printing message numbers to standard out,
253 ** force line buffering on.
256 setvbuf(stdout, NULL, _IOLBF, 0);
259 ** Scan through all the SELECTED messages and check for a
260 ** match. If the message does not match, then unselect it.
262 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
263 if (is_selected(mp, msgnum)) {
264 if ((fp = fopen(cp = m_name(msgnum), "r")) == NULL)
265 admonish(cp, "unable to read message");
266 if (fp && pmatches(fp, msgnum, 0L, 0L)) {
273 printf("%s\n", m_name(msgnum));
275 /* if it doesn't match, then unselect it */
276 unset_selected(mp, msgnum);
288 adios(EX_DATAERR, NULL, "no messages match specification");
293 ** Add the matching messages to sequences
295 for (seqp = 0; seqs[seqp]; seqp++)
296 if (!seq_addsel(mp, seqs[seqp], publicsw, zerosw))
300 ** Print total matched if not printing each matched message number.
303 printf("%d hit%s\n", mp->numsel, mp->numsel == 1 ? "" : "s");
306 context_replace(curfolder, folder); /* update current folder */
307 seq_save(mp); /* synchronize message sequences */
308 context_save(); /* save the context file */
309 folder_free(mp); /* free folder/message structure */
310 listsw = 0; /* HACK */
318 if (listsw && !isatty(fileno(stdout)))
323 static struct swit parswit[] = {
337 { "date pattern", 0 },
339 { "from pattern", 0 },
341 { "search pattern", 0 },
343 { "subject pattern", 0 },
347 { "-othercomponent pattern", 15 },
351 { "before date", 0 },
353 { "datefield field", 5 },
357 /* DEFINITIONS FOR PATTERN MATCHING */
360 ** We really should be using re_comp() and re_exec() here. Unfortunately,
361 ** pick advertises that lowercase characters matches characters of both
362 ** cases. Since re_exec() doesn't exhibit this behavior, we are stuck
363 ** with this version. Furthermore, we need to be able to save and restore
364 ** the state of the pattern matcher in order to do things "efficiently".
366 ** The matching power of this algorithm isn't as powerful as the re_xxx()
367 ** routines (no \(xxx\) and \n constructs). Such is life.
383 static char linebuf[LBSIZE + 1];
384 static char decoded_linebuf[LBSIZE + 1];
386 /* the magic array for case-independence */
388 0000,0001,0002,0003,0004,0005,0006,0007,
389 0010,0011,0012,0013,0014,0015,0016,0017,
390 0020,0021,0022,0023,0024,0025,0026,0027,
391 0030,0031,0032,0033,0034,0035,0036,0037,
392 0040,0041,0042,0043,0044,0045,0046,0047,
393 0050,0051,0052,0053,0054,0055,0056,0057,
394 0060,0061,0062,0063,0064,0065,0066,0067,
395 0070,0071,0072,0073,0074,0075,0076,0077,
396 0100,0141,0142,0143,0144,0145,0146,0147,
397 0150,0151,0152,0153,0154,0155,0156,0157,
398 0160,0161,0162,0163,0164,0165,0166,0167,
399 0170,0171,0172,0133,0134,0135,0136,0137,
400 0140,0141,0142,0143,0144,0145,0146,0147,
401 0150,0151,0152,0153,0154,0155,0156,0157,
402 0160,0161,0162,0163,0164,0165,0166,0167,
403 0170,0171,0172,0173,0174,0175,0176,0177,
405 0200,0201,0202,0203,0204,0205,0206,0207,
406 0210,0211,0212,0213,0214,0215,0216,0217,
407 0220,0221,0222,0223,0224,0225,0226,0227,
408 0230,0231,0232,0233,0234,0235,0236,0237,
409 0240,0241,0242,0243,0244,0245,0246,0247,
410 0250,0251,0252,0253,0254,0255,0256,0257,
411 0260,0261,0262,0263,0264,0265,0266,0267,
412 0270,0271,0272,0273,0274,0275,0276,0277,
413 0300,0301,0302,0303,0304,0305,0306,0307,
414 0310,0311,0312,0313,0314,0315,0316,0317,
415 0320,0321,0322,0323,0324,0325,0326,0327,
416 0330,0331,0332,0333,0334,0335,0336,0337,
417 0340,0341,0342,0343,0344,0345,0346,0347,
418 0350,0351,0352,0353,0354,0355,0356,0357,
419 0360,0361,0362,0363,0364,0365,0366,0367,
420 0370,0371,0372,0373,0374,0375,0376,0377,
424 ** DEFINITIONS FOR NEXUS
427 #define nxtarg() (*argp ? *argp++ : NULL)
428 #define prvarg() argp--
430 #define padvise if (!talked++) advise
436 /* for {OR,AND,NOT}action */
438 struct nexus *un_L_child;
439 struct nexus *un_R_child;
446 char un_expbuf[ESIZE];
459 #define n_L_child un.st1.un_L_child
460 #define n_R_child un.st1.un_R_child
462 #define n_header un.st2.un_header
463 #define n_circf un.st2.un_circf
464 #define n_expbuf un.st2.un_expbuf
465 #define n_patbuf un.st2.un_patbuf
467 #define n_datef un.st3.un_datef
468 #define n_after un.st3.un_after
469 #define n_tws un.st3.un_tws
472 static int pdebug = 0;
477 static struct nexus *head;
480 ** prototypes for date routines
482 static struct tws *tws_parse(char *, int);
483 static struct tws *tws_special(char *);
488 static void PRaction(struct nexus *, int);
489 static int gcompile(struct nexus *, char *);
490 static int advance(char *, char *);
491 static int cclass(unsigned char *, int, int);
492 static int tcompile(char *, struct tws *, int);
494 static struct nexus *parse(void);
495 static struct nexus *nexp1(void);
496 static struct nexus *nexp2(void);
497 static struct nexus *nexp3(void);
498 static struct nexus *newnexus(int (*)());
500 static int ORaction();
501 static int ANDaction();
502 static int NOTaction();
503 static int GREPaction();
504 static int TWSaction();
508 pcompile(char **vec, char *date)
512 if ((cp = getenv("MHPDEBUG")) && *cp)
516 if ((datesw = date) == NULL)
520 if ((head = parse()) == NULL)
521 return (talked ? 0 : 1);
524 padvise(NULL, "%s unexpected", *argp);
532 static struct nexus *
538 if ((n = nexp1()) == NULL || (cp = nxtarg()) == NULL)
542 padvise(NULL, "%s unexpected", cp);
548 switch (smatch(cp, parswit)) {
550 ambigsw(cp, parswit);
554 fprintf(stderr, "-%s unknown\n", cp);
559 o = newnexus(ORaction);
561 if ((o->n_R_child = parse()))
563 padvise(NULL, "missing disjunctive");
573 static struct nexus *
579 if ((n = nexp2()) == NULL || (cp = nxtarg()) == NULL)
583 padvise(NULL, "%s unexpected", cp);
589 switch (smatch(cp, parswit)) {
591 ambigsw(cp, parswit);
595 fprintf(stderr, "-%s unknown\n", cp);
600 o = newnexus(ANDaction);
602 if ((o->n_R_child = nexp1()))
604 padvise(NULL, "missing conjunctive");
615 static struct nexus *
621 if ((cp = nxtarg()) == NULL)
631 switch (smatch(cp, parswit)) {
633 ambigsw(cp, parswit);
637 fprintf(stderr, "-%s unknown\n", cp);
642 n = newnexus(NOTaction);
643 if ((n->n_L_child = nexp3()))
645 padvise(NULL, "missing negation");
655 static struct nexus *
660 char buffer[BUFSIZ], temp[64];
663 if ((cp = nxtarg()) == NULL)
667 padvise(NULL, "%s unexpected", cp);
675 switch (i = smatch(cp, parswit)) {
677 ambigsw(cp, parswit);
681 fprintf(stderr, "-%s unknown\n", cp);
686 if ((n = parse()) == NULL) {
687 padvise(NULL, "missing group");
690 if ((cp = nxtarg()) == NULL) {
691 padvise(NULL, "missing -rbrace");
694 if (*cp++ == '-' && smatch(cp, parswit) == PRRBR)
696 padvise(NULL, "%s unexpected", --cp);
708 strncpy(temp, parswit[i].sw, sizeof(temp));
709 temp[sizeof(temp) - 1] = '\0';
710 dp = *brkstring(temp, " ", NULL);
712 if (!(cp = nxtarg())) { /* allow -xyz arguments */
713 padvise(NULL, "missing argument to %s", argp[-2]);
716 n = newnexus(GREPaction);
718 snprintf(buffer, sizeof(buffer), "^%s[ \t]*:.*%s", dp, cp);
723 n = newnexus(GREPaction);
725 if (!(cp = nxtarg())) { /* allow -xyz arguments */
726 padvise(NULL, "missing argument to %s", argp[-2]);
731 if (!gcompile(n, dp)) {
732 padvise(NULL, "pattern error in %s %s", argp[-2], cp);
735 n->n_patbuf = getcpy(dp);
739 padvise(NULL, "internal error!");
743 if (!(datesw = nxtarg()) || *datesw == '-') {
744 padvise(NULL, "missing argument to %s",
752 if (!(cp = nxtarg())) { /* allow -xyz arguments */
753 padvise(NULL, "missing argument to %s", argp[-2]);
756 n = newnexus(TWSaction);
758 if (!tcompile(cp, &n->n_tws, n->n_after = i == PRAFTR)) {
759 padvise(NULL, "unable to parse %s %s", argp[-2], cp);
767 static struct nexus *
768 newnexus(int (*action)())
772 if ((p = (struct nexus *) mh_xcalloc((size_t) 1, sizeof *p)) == NULL)
773 adios(EX_OSERR, NULL, "unable to allocate component storage");
775 p->n_action = action;
780 #define args(a) a, fp, msgnum, start, stop
781 #define params args(n)
790 pmatches(FILE *fp, int msgnum, long start, long stop)
795 if (!talked++ && pdebug)
798 return (*head->n_action) (args(head));
803 PRaction(struct nexus *n, int level)
807 for (i = 0; i < level; i++)
808 fprintf(stderr, "| ");
810 if (n->n_action == ORaction) {
811 fprintf(stderr, "OR\n");
812 PRaction(n->n_L_child, level + 1);
813 PRaction(n->n_R_child, level + 1);
816 if (n->n_action == ANDaction) {
817 fprintf(stderr, "AND\n");
818 PRaction(n->n_L_child, level + 1);
819 PRaction(n->n_R_child, level + 1);
822 if (n->n_action == NOTaction) {
823 fprintf(stderr, "NOT\n");
824 PRaction(n->n_L_child, level + 1);
827 if (n->n_action == GREPaction) {
828 fprintf(stderr, "PATTERN(%s) %s\n",
829 n->n_header ? "header" : "body", n->n_patbuf);
832 if (n->n_action == TWSaction) {
833 fprintf(stderr, "TEMPORAL(%s) %s: %s\n",
834 n->n_after ? "after" : "before", n->n_datef,
835 dasctime(&n->n_tws));
838 fprintf(stderr, "UNKNOWN(0x%x)\n",
839 (unsigned int)(unsigned long) (*n->n_action));
847 if ((*n->n_L_child->n_action) (args(n->n_L_child)))
849 return (*n->n_R_child->n_action) (args(n->n_R_child));
857 if (!(*n->n_L_child->n_action) (args(n->n_L_child)))
859 return (*n->n_R_child->n_action) (args(n->n_R_child));
867 return (!(*n->n_L_child->n_action) (args(n->n_L_child)));
872 gcompile(struct nexus *n, char *astr)
876 unsigned char *ep, *dp, *sp, *lastep = 0;
878 dp = (ep = n->n_expbuf) + sizeof n->n_expbuf;
889 if ((c = *sp++) != '*')
916 if ((c = *sp++) == '^') {
926 if (c == '-' && *sp != '\0' && *sp != ']') {
927 for (c = ep[-1]+1; c < *sp; c++) {
930 if (c == '\0' || ep >= dp)
936 if (c == '\0' || ep >= dp)
939 } while ((c = *sp++) != ']');
946 if ((c = *sp++) == '\0')
966 char *p1, *p2, *ebp, *cbp;
969 fseek(fp, start, SEEK_SET);
973 if (body && n->n_header)
980 if (fgets(ibuf, sizeof ibuf, fp) == NULL
981 || (stop && pos >= stop)) {
986 pos += (long) strlen(ibuf);
988 ebp = ibuf + strlen(ibuf);
991 if (lf && c != '\n') {
992 if (c != ' ' && c != '\t') {
1011 if (c && p1 < &linebuf[LBSIZE - 1])
1020 /* Attempt to decode as a MIME header. If it's the last header,
1021 * body will be 1 and lf will be at least 1. */
1022 if ((body == 0 || lf > 0) &&
1023 decode_rfc2047 (linebuf, decoded_linebuf, sizeof decoded_linebuf)) {
1024 p1 = decoded_linebuf;
1028 if (advance(p1, p2))
1036 if (*p1 == c || cc[(unsigned char)*p1] == c)
1037 if (advance(p1, p2))
1044 if (advance(p1, p2))
1052 advance(char *alp, char *aep)
1054 unsigned char *lp, *ep, *curlp;
1056 lp = (unsigned char *)alp;
1057 ep = (unsigned char *)aep;
1061 if (*ep++ == *lp++ || ep[-1] == cc[lp[-1]])
1079 if (cclass(ep, *lp++, 1)) {
1086 if (cclass(ep, *lp++, 0)) {
1100 while (*lp++ == *ep || cc[lp[-1]] == *ep)
1108 while (cclass(ep, *lp++, ep[-1] == (CCL | STAR)))
1116 if (advance(lp, ep))
1118 } while (lp > curlp);
1122 admonish(NULL, "advance() botch -- you lose big");
1129 cclass(unsigned char *aset, int ac, int af)
1132 unsigned char c, *set;
1140 if (*set++ == c || set[-1] == cc[c])
1148 tcompile(char *ap, struct tws *tb, int isafter)
1152 if ((tw = tws_parse(ap, isafter)) == NULL)
1161 tws_parse(char *ap, int isafter)
1163 char buffer[BUFSIZ];
1164 struct tws *tw, *ts;
1166 if ((tw = tws_special(ap)) != NULL) {
1167 tw->tw_sec = tw->tw_min = isafter ? 59 : 0;
1168 tw->tw_hour = isafter ? 23 : 0;
1171 if ((tw = dparsetime(ap)) != NULL)
1174 if ((ts = dlocaltimenow()) == NULL)
1177 snprintf(buffer, sizeof(buffer), "%s %s", ap, dtwszone(ts));
1178 if ((tw = dparsetime(buffer)) != NULL)
1181 snprintf(buffer, sizeof(buffer), "%s %02d:%02d:%02d %s", ap,
1182 ts->tw_hour, ts->tw_min, ts->tw_sec, dtwszone(ts));
1183 if ((tw = dparsetime(buffer)) != NULL)
1186 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s",
1187 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year, ap);
1188 if ((tw = dparsetime(buffer)) != NULL)
1191 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s %s",
1192 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year,
1194 if ((tw = dparsetime(buffer)) != NULL)
1202 tws_special(char *ap)
1209 if (!mh_strcasecmp(ap, "today"))
1210 return dlocaltime(&clock);
1211 if (!mh_strcasecmp(ap, "yesterday")) {
1212 clock -= (long) (60 * 60 * 24);
1213 return dlocaltime(&clock);
1215 if (!mh_strcasecmp(ap, "tomorrow")) {
1216 clock += (long) (60 * 60 * 24);
1217 return dlocaltime(&clock);
1220 for (i = 0; tw_ldotw[i]; i++)
1221 if (!mh_strcasecmp(ap, tw_ldotw[i]))
1224 if ((tw = dlocaltime(&clock)) == NULL)
1226 if ((i -= tw->tw_wday) > 0)
1232 else /* -ddd days ago */
1233 i = atoi(ap); /* we should error check this */
1235 clock += (long) ((60 * 60 * 24) * i);
1236 return dlocaltime(&clock);
1246 char buf[BUFSIZ], name[NAMESZ];
1249 fseek(fp, start, SEEK_SET);
1250 for (state = FLD, bp = NULL;;) {
1251 switch (state = m_getfld(state, name, buf, sizeof buf, fp)) {
1260 while (state == FLDPLUS) {
1261 state = m_getfld(state, name, buf,
1265 if (!mh_strcasecmp(name, n->n_datef))
1267 if (state != FLDEOF)
1275 if (state == LENERR || state == FMTERR)
1276 advise(NULL, "format error in message %d", msgnum);
1282 adios(EX_SOFTWARE, NULL, "internal error -- you lose");
1287 if ((tw = dparsetime(bp)) == NULL)
1288 advise(NULL, "unable to parse %s field in message %d, matching...",
1289 n->n_datef, msgnum), state = 1;
1291 state = n->n_after ? (twsort(tw, &n->n_tws) > 0)
1292 : (twsort(tw, &n->n_tws) < 0);