2 ** pick.c -- search for messages by content
4 ** This code is Copyright (c) 2002, 2008, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
15 #include <h/scansbr.h>
16 #include <h/fmt_scan.h>
18 #ifdef HAVE_SYS_TIME_H
19 # include <sys/time.h>
23 static struct swit switches[] = {
37 { "date pattern", 0 },
39 { "from pattern", 0 },
41 { "search pattern", 0 },
43 { "subject pattern", 0 },
47 { "-othercomponent pattern", 0 },
53 { "datefield field", 5 }, /* 5 chars required to differ from -date */
55 { "sequence name", 0 },
69 { "format format", 0 },
71 { "width columns", 0 },
79 char *version=VERSION;
84 static int pcompile(char **, char *);
85 static int pmatches(FILE *, int, long, long);
88 static int listsw = -1;
92 static void printmsg(FILE *, struct msgs *, int, char *, int);
95 main(int argc, char **argv)
97 int publicsw = -1, zerosw = 1, vecp = 0, width = 0;
98 unsigned int seqp = 0;
100 char *maildir, *folder = NULL, buf[100];
101 char *cp, **argp, **arguments;
102 char *seqs[NUMATTRS + 1], *vec[MAXARGS];
103 struct msgs_array msgs = { 0, 0, NULL };
109 if (atexit(putzero_done) != 0) {
110 adios(EX_OSERR, NULL, "atexit failed");
113 setlocale(LC_ALL, "");
114 invo_name = mhbasename(argv[0]);
116 /* read user profile/context */
119 arguments = getarguments(invo_name, argc, argv, 1);
122 if (strcmp(invo_name, "scan")==0) {
126 while ((cp = *argp++)) {
132 switch (smatch(cp, switches)) {
134 ambigsw(cp, switches);
135 listsw = 0; /* HACK */
138 adios(EX_USAGE, NULL, "-%s unknown", cp);
141 snprintf(buf, sizeof(buf), "%s [+folder] [msgs] [switches]", invo_name);
142 print_help(buf, switches, 1);
143 listsw = 0; /* HACK */
144 exit(argc == 2 ? EX_OK : EX_USAGE);
146 print_version(invo_name);
147 listsw = 0; /* HACK */
148 exit(argc == 2 ? EX_OK : EX_USAGE);
161 if (!(cp = *argp++)) /* allow -xyz arguments */
162 adios(EX_USAGE, NULL, "missing argument to %s",
167 adios(EX_SOFTWARE, NULL, "internal error!");
178 if (!(cp = *argp++) || *cp == '-')
179 adios(EX_USAGE, NULL, "missing argument to %s",
182 /* check if too many sequences specified */
183 if (seqp >= NUMATTRS)
184 adios(EX_USAGE, NULL, "too many sequences (more than %d) specified", NUMATTRS);
211 if (!(form = *argp++) || *form == '-') {
212 adios(EX_USAGE, NULL, "missing argument to %s", argp[-2]);
216 if (!(cp = *argp++) || *cp == '-') {
217 adios(EX_USAGE, NULL, "missing argument to %s",
224 if (*cp == '+' || *cp == '@') {
226 adios(EX_USAGE, NULL, "only one folder at a time!");
228 folder = mh_xstrdup(expandfol(cp));
230 app_msgarg(&msgs, cp);
234 fmtstr = new_fs(form, "pick.default");
237 ** If we didn't specify which messages to search,
238 ** then search the whole folder.
241 app_msgarg(&msgs, seq_all);
244 folder = getcurfol();
245 maildir = toabsdir(folder);
247 if (chdir(maildir) == NOTOK)
248 adios(EX_OSERR, maildir, "unable to change directory to");
250 /* read folder and create message structure */
251 if (!(mp = folder_read(folder)))
252 adios(EX_IOERR, NULL, "unable to read folder %s", folder);
254 /* check for empty folder */
256 adios(EX_DATAERR, NULL, "no messages in %s", folder);
258 /* parse all the message ranges/sequences and set SELECTED */
259 for (msgnum = 0; msgnum < msgs.size; msgnum++)
260 if (!m_convert(mp, msgs.msgs[msgnum]))
262 seq_setprev(mp); /* set the previous-sequence */
265 ** If we aren't saving the results to a sequence,
266 ** we default to list the results.
271 if (publicsw == 1 && is_readonly(mp))
272 adios(EX_NOPERM, NULL, "folder %s is read-only, so -public not allowed",
275 if (!pcompile(vec, NULL))
282 ** If printing message numbers to standard out,
283 ** force line buffering on.
286 setvbuf(stdout, NULL, _IOLBF, 0);
289 ** Scan through all the SELECTED messages and check for a
290 ** match. If the message does not match, then unselect it.
292 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
293 if (is_selected(mp, msgnum)) {
294 if ((fp = fopen(cp = m_name(msgnum), "r")) == NULL)
295 admonish(cp, "unable to read message");
296 if (fp && pmatches(fp, msgnum, 0L, 0L)) {
303 printmsg(fp, mp, msgnum, fmtstr, width);
306 /* if it doesn't match, then unselect it */
307 unset_selected(mp, msgnum);
318 adios(EX_DATAERR, NULL, "no messages match specification");
323 ** Add the matching messages to sequences
325 for (seqp = 0; seqs[seqp]; seqp++)
326 if (!seq_addsel(mp, seqs[seqp], publicsw, zerosw))
330 ** Print total matched if not printing each matched message number.
333 printf("%d hit%s\n", mp->numsel, mp->numsel == 1 ? "" : "s");
336 context_replace(curfolder, folder); /* update current folder */
337 seq_save(mp); /* synchronize message sequences */
338 context_save(); /* save the context file */
339 folder_free(mp); /* free folder/message structure */
340 listsw = 0; /* HACK */
348 if (listsw && !isatty(fileno(stdout)))
353 printmsg(FILE *f, struct msgs *mp, int msgnum, char *fmtstr, int width)
357 boolean unseen = FALSE;
359 fseek(f, 0L, SEEK_SET);
361 seqnum = seq_getnum(mp, seq_unseen);
362 unseen = in_sequence(mp, seqnum, msgnum);
364 switch (state = scan(f, msgnum, SCN_FOLD, fmtstr,
365 width, msgnum==mp->curmsg, unseen)) {
370 advise(NULL, "message %d: empty", msgnum);
373 adios(EX_SOFTWARE, NULL, "scan() botch(%d)", state);
378 static struct swit parswit[] = {
392 { "date pattern", 0 },
394 { "from pattern", 0 },
396 { "search pattern", 0 },
398 { "subject pattern", 0 },
402 { "-othercomponent pattern", 15 },
406 { "before date", 0 },
408 { "datefield field", 5 },
412 /* DEFINITIONS FOR PATTERN MATCHING */
415 ** We really should be using re_comp() and re_exec() here. Unfortunately,
416 ** pick advertises that lowercase characters matches characters of both
417 ** cases. Since re_exec() doesn't exhibit this behavior, we are stuck
418 ** with this version. Furthermore, we need to be able to save and restore
419 ** the state of the pattern matcher in order to do things "efficiently".
421 ** The matching power of this algorithm isn't as powerful as the re_xxx()
422 ** routines (no \(xxx\) and \n constructs). Such is life.
438 static char linebuf[LBSIZE + 1];
439 static char decoded_linebuf[LBSIZE + 1];
441 /* the magic array for case-independence */
443 0000,0001,0002,0003,0004,0005,0006,0007,
444 0010,0011,0012,0013,0014,0015,0016,0017,
445 0020,0021,0022,0023,0024,0025,0026,0027,
446 0030,0031,0032,0033,0034,0035,0036,0037,
447 0040,0041,0042,0043,0044,0045,0046,0047,
448 0050,0051,0052,0053,0054,0055,0056,0057,
449 0060,0061,0062,0063,0064,0065,0066,0067,
450 0070,0071,0072,0073,0074,0075,0076,0077,
451 0100,0141,0142,0143,0144,0145,0146,0147,
452 0150,0151,0152,0153,0154,0155,0156,0157,
453 0160,0161,0162,0163,0164,0165,0166,0167,
454 0170,0171,0172,0133,0134,0135,0136,0137,
455 0140,0141,0142,0143,0144,0145,0146,0147,
456 0150,0151,0152,0153,0154,0155,0156,0157,
457 0160,0161,0162,0163,0164,0165,0166,0167,
458 0170,0171,0172,0173,0174,0175,0176,0177,
460 0200,0201,0202,0203,0204,0205,0206,0207,
461 0210,0211,0212,0213,0214,0215,0216,0217,
462 0220,0221,0222,0223,0224,0225,0226,0227,
463 0230,0231,0232,0233,0234,0235,0236,0237,
464 0240,0241,0242,0243,0244,0245,0246,0247,
465 0250,0251,0252,0253,0254,0255,0256,0257,
466 0260,0261,0262,0263,0264,0265,0266,0267,
467 0270,0271,0272,0273,0274,0275,0276,0277,
468 0300,0301,0302,0303,0304,0305,0306,0307,
469 0310,0311,0312,0313,0314,0315,0316,0317,
470 0320,0321,0322,0323,0324,0325,0326,0327,
471 0330,0331,0332,0333,0334,0335,0336,0337,
472 0340,0341,0342,0343,0344,0345,0346,0347,
473 0350,0351,0352,0353,0354,0355,0356,0357,
474 0360,0361,0362,0363,0364,0365,0366,0367,
475 0370,0371,0372,0373,0374,0375,0376,0377,
479 ** DEFINITIONS FOR NEXUS
482 #define nxtarg() (*argp ? *argp++ : NULL)
483 #define prvarg() argp--
485 #define padvise if (!talked++) advise
491 /* for {OR,AND,NOT}action */
493 struct nexus *un_L_child;
494 struct nexus *un_R_child;
501 char un_expbuf[ESIZE];
514 #define n_L_child un.st1.un_L_child
515 #define n_R_child un.st1.un_R_child
517 #define n_header un.st2.un_header
518 #define n_circf un.st2.un_circf
519 #define n_expbuf un.st2.un_expbuf
520 #define n_patbuf un.st2.un_patbuf
522 #define n_datef un.st3.un_datef
523 #define n_after un.st3.un_after
524 #define n_tws un.st3.un_tws
527 static int pdebug = 0;
532 static struct nexus *head;
535 ** prototypes for date routines
537 static struct tws *tws_parse(char *, int);
538 static struct tws *tws_special(char *);
543 static void PRaction(struct nexus *, int);
544 static int gcompile(struct nexus *, char *);
545 static int advance(char *, char *);
546 static int cclass(unsigned char *, int, int);
547 static int tcompile(char *, struct tws *, int);
549 static struct nexus *parse(void);
550 static struct nexus *nexp1(void);
551 static struct nexus *nexp2(void);
552 static struct nexus *nexp3(void);
553 static struct nexus *newnexus(int (*)());
555 static int ORaction();
556 static int ANDaction();
557 static int NOTaction();
558 static int GREPaction();
559 static int TWSaction();
563 pcompile(char **vec, char *date)
567 if ((cp = getenv("MHPDEBUG")) && *cp)
571 if ((datesw = date) == NULL)
575 if ((head = parse()) == NULL)
576 return (talked ? 0 : 1);
579 padvise(NULL, "%s unexpected", *argp);
587 static struct nexus *
593 if ((n = nexp1()) == NULL || (cp = nxtarg()) == NULL)
597 padvise(NULL, "%s unexpected", cp);
603 switch (smatch(cp, parswit)) {
605 ambigsw(cp, parswit);
609 fprintf(stderr, "-%s unknown\n", cp);
614 o = newnexus(ORaction);
616 if ((o->n_R_child = parse()))
618 padvise(NULL, "missing disjunctive");
628 static struct nexus *
634 if ((n = nexp2()) == NULL || (cp = nxtarg()) == NULL)
638 padvise(NULL, "%s unexpected", cp);
644 switch (smatch(cp, parswit)) {
646 ambigsw(cp, parswit);
650 fprintf(stderr, "-%s unknown\n", cp);
655 o = newnexus(ANDaction);
657 if ((o->n_R_child = nexp1()))
659 padvise(NULL, "missing conjunctive");
670 static struct nexus *
676 if ((cp = nxtarg()) == NULL)
686 switch (smatch(cp, parswit)) {
688 ambigsw(cp, parswit);
692 fprintf(stderr, "-%s unknown\n", cp);
697 n = newnexus(NOTaction);
698 if ((n->n_L_child = nexp3()))
700 padvise(NULL, "missing negation");
710 static struct nexus *
715 char buffer[BUFSIZ], temp[64];
718 if ((cp = nxtarg()) == NULL)
722 padvise(NULL, "%s unexpected", cp);
730 switch (i = smatch(cp, parswit)) {
732 ambigsw(cp, parswit);
736 fprintf(stderr, "-%s unknown\n", cp);
741 if ((n = parse()) == NULL) {
742 padvise(NULL, "missing group");
745 if ((cp = nxtarg()) == NULL) {
746 padvise(NULL, "missing -rbrace");
749 if (*cp++ == '-' && smatch(cp, parswit) == PRRBR)
751 padvise(NULL, "%s unexpected", --cp);
763 strncpy(temp, parswit[i].sw, sizeof(temp));
764 temp[sizeof(temp) - 1] = '\0';
765 dp = *brkstring(temp, " ", NULL);
767 if (!(cp = nxtarg())) { /* allow -xyz arguments */
768 padvise(NULL, "missing argument to %s", argp[-2]);
771 n = newnexus(GREPaction);
773 snprintf(buffer, sizeof(buffer), "^%s[ \t]*:.*%s", dp, cp);
778 n = newnexus(GREPaction);
780 if (!(cp = nxtarg())) { /* allow -xyz arguments */
781 padvise(NULL, "missing argument to %s", argp[-2]);
786 if (!gcompile(n, dp)) {
787 padvise(NULL, "pattern error in %s %s", argp[-2], cp);
790 n->n_patbuf = mh_xstrdup(dp);
794 padvise(NULL, "internal error!");
798 if (!(datesw = nxtarg()) || *datesw == '-') {
799 padvise(NULL, "missing argument to %s",
807 if (!(cp = nxtarg())) { /* allow -xyz arguments */
808 padvise(NULL, "missing argument to %s", argp[-2]);
811 n = newnexus(TWSaction);
813 if (!tcompile(cp, &n->n_tws, n->n_after = i == PRAFTR)) {
814 padvise(NULL, "unable to parse %s %s", argp[-2], cp);
822 static struct nexus *
823 newnexus(int (*action)())
827 p = mh_xcalloc(1, sizeof *p);
829 p->n_action = action;
834 #define args(a) a, fp, msgnum, start, stop
835 #define params args(n)
844 pmatches(FILE *fp, int msgnum, long start, long stop)
849 if (!talked++ && pdebug)
852 return (*head->n_action) (args(head));
857 PRaction(struct nexus *n, int level)
861 for (i = 0; i < level; i++)
862 fprintf(stderr, "| ");
864 if (n->n_action == ORaction) {
865 fprintf(stderr, "OR\n");
866 PRaction(n->n_L_child, level + 1);
867 PRaction(n->n_R_child, level + 1);
870 if (n->n_action == ANDaction) {
871 fprintf(stderr, "AND\n");
872 PRaction(n->n_L_child, level + 1);
873 PRaction(n->n_R_child, level + 1);
876 if (n->n_action == NOTaction) {
877 fprintf(stderr, "NOT\n");
878 PRaction(n->n_L_child, level + 1);
881 if (n->n_action == GREPaction) {
882 fprintf(stderr, "PATTERN(%s) %s\n",
883 n->n_header ? "header" : "body", n->n_patbuf);
886 if (n->n_action == TWSaction) {
887 fprintf(stderr, "TEMPORAL(%s) %s: %s\n",
888 n->n_after ? "after" : "before", n->n_datef,
889 dasctime(&n->n_tws));
892 fprintf(stderr, "UNKNOWN(0x%x)\n",
893 (unsigned int)(unsigned long) (*n->n_action));
901 if ((*n->n_L_child->n_action) (args(n->n_L_child)))
903 return (*n->n_R_child->n_action) (args(n->n_R_child));
911 if (!(*n->n_L_child->n_action) (args(n->n_L_child)))
913 return (*n->n_R_child->n_action) (args(n->n_R_child));
921 return (!(*n->n_L_child->n_action) (args(n->n_L_child)));
926 gcompile(struct nexus *n, char *astr)
930 unsigned char *ep, *dp, *sp, *lastep = 0;
932 dp = (ep = n->n_expbuf) + sizeof n->n_expbuf;
943 if ((c = *sp++) != '*')
970 if ((c = *sp++) == '^') {
980 if (c == '-' && *sp != '\0' && *sp != ']') {
981 for (c = ep[-1]+1; c < *sp; c++) {
984 if (c == '\0' || ep >= dp)
990 if (c == '\0' || ep >= dp)
993 } while ((c = *sp++) != ']');
1000 if ((c = *sp++) == '\0')
1020 char *p1, *p2, *ebp, *cbp;
1023 fseek(fp, start, SEEK_SET);
1027 if (body && n->n_header)
1034 if (fgets(ibuf, sizeof ibuf, fp) == NULL
1035 || (stop && pos >= stop)) {
1040 pos += (long) strlen(ibuf);
1042 ebp = ibuf + strlen(ibuf);
1045 if (lf && c != '\n') {
1046 if (c != ' ' && c != '\t') {
1065 if (c && p1 < &linebuf[LBSIZE - 1])
1075 ** Attempt to decode as a MIME header. If it's the
1076 ** last header, body will be 1 and lf will be at least 1.
1078 if ((body == 0 || lf > 0) && decode_rfc2047(linebuf,
1079 decoded_linebuf, sizeof decoded_linebuf)) {
1080 p1 = decoded_linebuf;
1084 if (advance(p1, p2))
1092 if (*p1 == c || cc[(unsigned char)*p1] == c)
1093 if (advance(p1, p2))
1100 if (advance(p1, p2))
1108 advance(char *alp, char *aep)
1110 unsigned char *lp, *ep, *curlp;
1112 lp = (unsigned char *)alp;
1113 ep = (unsigned char *)aep;
1117 if (*ep++ == *lp++ || ep[-1] == cc[lp[-1]])
1135 if (cclass(ep, *lp++, 1)) {
1142 if (cclass(ep, *lp++, 0)) {
1156 while (*lp++ == *ep || cc[lp[-1]] == *ep)
1164 while (cclass(ep, *lp++, ep[-1] == (CCL | STAR)))
1172 if (advance(lp, ep))
1174 } while (lp > curlp);
1178 admonish(NULL, "advance() botch -- you lose big");
1185 cclass(unsigned char *aset, int ac, int af)
1188 unsigned char c, *set;
1196 if (*set++ == c || set[-1] == cc[c])
1204 tcompile(char *ap, struct tws *tb, int isafter)
1208 if ((tw = tws_parse(ap, isafter)) == NULL)
1217 tws_parse(char *ap, int isafter)
1219 char buffer[BUFSIZ];
1220 struct tws *tw, *ts;
1222 if ((tw = tws_special(ap)) != NULL) {
1223 tw->tw_sec = tw->tw_min = isafter ? 59 : 0;
1224 tw->tw_hour = isafter ? 23 : 0;
1227 if ((tw = dparsetime(ap)) != NULL)
1230 if ((ts = dlocaltimenow()) == NULL)
1233 snprintf(buffer, sizeof(buffer), "%s %s", ap, dtwszone(ts));
1234 if ((tw = dparsetime(buffer)) != NULL)
1237 snprintf(buffer, sizeof(buffer), "%s %02d:%02d:%02d %s", ap,
1238 ts->tw_hour, ts->tw_min, ts->tw_sec, dtwszone(ts));
1239 if ((tw = dparsetime(buffer)) != NULL)
1242 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s",
1243 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year, ap);
1244 if ((tw = dparsetime(buffer)) != NULL)
1247 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s %s",
1248 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year,
1250 if ((tw = dparsetime(buffer)) != NULL)
1258 tws_special(char *ap)
1265 if (!mh_strcasecmp(ap, "today"))
1266 return dlocaltime(&clock);
1267 if (!mh_strcasecmp(ap, "yesterday")) {
1268 clock -= (long) (60 * 60 * 24);
1269 return dlocaltime(&clock);
1271 if (!mh_strcasecmp(ap, "tomorrow")) {
1272 clock += (long) (60 * 60 * 24);
1273 return dlocaltime(&clock);
1276 for (i = 0; tw_ldotw[i]; i++)
1277 if (!mh_strcasecmp(ap, tw_ldotw[i]))
1280 if ((tw = dlocaltime(&clock)) == NULL)
1282 if ((i -= tw->tw_wday) > 0)
1288 else /* -ddd days ago */
1289 i = atoi(ap); /* we should error check this */
1291 clock += (long) ((60 * 60 * 24) * i);
1292 return dlocaltime(&clock);
1301 struct field f = {{0}};
1305 fseek(fp, start, SEEK_SET);
1306 for (state = FLD2, bp = NULL;;) {
1307 switch (state = m_getfld2(state, &f, fp)) {
1316 bp = mh_xstrdup(f.value);
1317 if (mh_strcasecmp(f.name, n->n_datef)==0) {
1323 advise(NULL, "format error in message %d", msgnum);
1328 adios(EX_IOERR, "m_getfld2", "io error on message %d", msgnum);
1337 adios(EX_SOFTWARE, NULL, "internal error -- you lose");
1342 if ((tw = dparsetime(bp)) == NULL)
1343 advise(NULL, "unable to parse %s field in message %d, matching...",
1344 n->n_datef, msgnum), state = 1;
1346 state = n->n_after ? (twsort(tw, &n->n_tws) > 0)
1347 : (twsort(tw, &n->n_tws) < 0);