2 ** pick.c -- search for messages by content
4 ** This code is Copyright (c) 2002, 2008, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
16 #ifdef HAVE_SYS_TIME_H
17 # include <sys/time.h>
21 static struct swit switches[] = {
35 { "date pattern", 0 },
37 { "from pattern", 0 },
39 { "search pattern", 0 },
41 { "subject pattern", 0 },
45 { "-othercomponent pattern", 0 },
51 { "datefield field", 5 }, /* 5 chars required to differ from -date */
53 { "sequence name", 0 },
76 static int pcompile(char **, char *);
77 static int pmatches(FILE *, int, long, long);
80 static int listsw = -1;
85 main(int argc, char **argv)
87 int publicsw = -1, zerosw = 1, vecp = 0;
88 unsigned int seqp = 0;
90 char *maildir, *folder = NULL, buf[100];
91 char *cp, **argp, **arguments;
92 char *seqs[NUMATTRS + 1], *vec[MAXARGS];
93 struct msgs_array msgs = { 0, 0, NULL };
97 if (atexit(putzero_done) != 0) {
98 adios(EX_OSERR, NULL, "atexit failed");
101 setlocale(LC_ALL, "");
102 invo_name = mhbasename(argv[0]);
104 /* read user profile/context */
107 arguments = getarguments(invo_name, argc, argv, 1);
110 while ((cp = *argp++)) {
116 switch (smatch(cp, switches)) {
118 ambigsw(cp, switches);
119 listsw = 0; /* HACK */
122 adios(EX_USAGE, NULL, "-%s unknown", cp);
125 snprintf(buf, sizeof(buf), "%s [+folder] [msgs] [switches]", invo_name);
126 print_help(buf, switches, 1);
127 listsw = 0; /* HACK */
128 exit(argc == 2 ? EX_OK : EX_USAGE);
130 print_version(invo_name);
131 listsw = 0; /* HACK */
132 exit(argc == 2 ? EX_OK : EX_USAGE);
145 if (!(cp = *argp++)) /* allow -xyz arguments */
146 adios(EX_USAGE, NULL, "missing argument to %s",
151 adios(EX_SOFTWARE, NULL, "internal error!");
162 if (!(cp = *argp++) || *cp == '-')
163 adios(EX_USAGE, NULL, "missing argument to %s",
166 /* check if too many sequences specified */
167 if (seqp >= NUMATTRS)
168 adios(EX_USAGE, NULL, "too many sequences (more than %d) specified", NUMATTRS);
196 if (*cp == '+' || *cp == '@') {
198 adios(EX_USAGE, NULL, "only one folder at a time!");
200 folder = getcpy(expandfol(cp));
202 app_msgarg(&msgs, cp);
207 ** If we didn't specify which messages to search,
208 ** then search the whole folder.
211 app_msgarg(&msgs, seq_all);
214 folder = getcurfol();
215 maildir = toabsdir(folder);
217 if (chdir(maildir) == NOTOK)
218 adios(EX_OSERR, maildir, "unable to change directory to");
220 /* read folder and create message structure */
221 if (!(mp = folder_read(folder)))
222 adios(EX_IOERR, NULL, "unable to read folder %s", folder);
224 /* check for empty folder */
226 adios(EX_DATAERR, NULL, "no messages in %s", folder);
228 /* parse all the message ranges/sequences and set SELECTED */
229 for (msgnum = 0; msgnum < msgs.size; msgnum++)
230 if (!m_convert(mp, msgs.msgs[msgnum]))
232 seq_setprev(mp); /* set the previous-sequence */
235 ** If we aren't saving the results to a sequence,
236 ** we default to list the results.
241 if (publicsw == 1 && is_readonly(mp))
242 adios(EX_NOPERM, NULL, "folder %s is read-only, so -public not allowed",
245 if (!pcompile(vec, NULL))
252 ** If printing message numbers to standard out,
253 ** force line buffering on.
256 setvbuf(stdout, NULL, _IOLBF, 0);
259 ** Scan through all the SELECTED messages and check for a
260 ** match. If the message does not match, then unselect it.
262 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
263 if (is_selected(mp, msgnum)) {
264 if ((fp = fopen(cp = m_name(msgnum), "r")) == NULL)
265 admonish(cp, "unable to read message");
266 if (fp && pmatches(fp, msgnum, 0L, 0L)) {
273 printf("%s\n", m_name(msgnum));
275 /* if it doesn't match, then unselect it */
276 unset_selected(mp, msgnum);
287 adios(EX_DATAERR, NULL, "no messages match specification");
292 ** Add the matching messages to sequences
294 for (seqp = 0; seqs[seqp]; seqp++)
295 if (!seq_addsel(mp, seqs[seqp], publicsw, zerosw))
299 ** Print total matched if not printing each matched message number.
302 printf("%d hit%s\n", mp->numsel, mp->numsel == 1 ? "" : "s");
305 context_replace(curfolder, folder); /* update current folder */
306 seq_save(mp); /* synchronize message sequences */
307 context_save(); /* save the context file */
308 folder_free(mp); /* free folder/message structure */
309 listsw = 0; /* HACK */
317 if (listsw && !isatty(fileno(stdout)))
322 static struct swit parswit[] = {
336 { "date pattern", 0 },
338 { "from pattern", 0 },
340 { "search pattern", 0 },
342 { "subject pattern", 0 },
346 { "-othercomponent pattern", 15 },
350 { "before date", 0 },
352 { "datefield field", 5 },
356 /* DEFINITIONS FOR PATTERN MATCHING */
359 ** We really should be using re_comp() and re_exec() here. Unfortunately,
360 ** pick advertises that lowercase characters matches characters of both
361 ** cases. Since re_exec() doesn't exhibit this behavior, we are stuck
362 ** with this version. Furthermore, we need to be able to save and restore
363 ** the state of the pattern matcher in order to do things "efficiently".
365 ** The matching power of this algorithm isn't as powerful as the re_xxx()
366 ** routines (no \(xxx\) and \n constructs). Such is life.
382 static char linebuf[LBSIZE + 1];
383 static char decoded_linebuf[LBSIZE + 1];
385 /* the magic array for case-independence */
387 0000,0001,0002,0003,0004,0005,0006,0007,
388 0010,0011,0012,0013,0014,0015,0016,0017,
389 0020,0021,0022,0023,0024,0025,0026,0027,
390 0030,0031,0032,0033,0034,0035,0036,0037,
391 0040,0041,0042,0043,0044,0045,0046,0047,
392 0050,0051,0052,0053,0054,0055,0056,0057,
393 0060,0061,0062,0063,0064,0065,0066,0067,
394 0070,0071,0072,0073,0074,0075,0076,0077,
395 0100,0141,0142,0143,0144,0145,0146,0147,
396 0150,0151,0152,0153,0154,0155,0156,0157,
397 0160,0161,0162,0163,0164,0165,0166,0167,
398 0170,0171,0172,0133,0134,0135,0136,0137,
399 0140,0141,0142,0143,0144,0145,0146,0147,
400 0150,0151,0152,0153,0154,0155,0156,0157,
401 0160,0161,0162,0163,0164,0165,0166,0167,
402 0170,0171,0172,0173,0174,0175,0176,0177,
404 0200,0201,0202,0203,0204,0205,0206,0207,
405 0210,0211,0212,0213,0214,0215,0216,0217,
406 0220,0221,0222,0223,0224,0225,0226,0227,
407 0230,0231,0232,0233,0234,0235,0236,0237,
408 0240,0241,0242,0243,0244,0245,0246,0247,
409 0250,0251,0252,0253,0254,0255,0256,0257,
410 0260,0261,0262,0263,0264,0265,0266,0267,
411 0270,0271,0272,0273,0274,0275,0276,0277,
412 0300,0301,0302,0303,0304,0305,0306,0307,
413 0310,0311,0312,0313,0314,0315,0316,0317,
414 0320,0321,0322,0323,0324,0325,0326,0327,
415 0330,0331,0332,0333,0334,0335,0336,0337,
416 0340,0341,0342,0343,0344,0345,0346,0347,
417 0350,0351,0352,0353,0354,0355,0356,0357,
418 0360,0361,0362,0363,0364,0365,0366,0367,
419 0370,0371,0372,0373,0374,0375,0376,0377,
423 ** DEFINITIONS FOR NEXUS
426 #define nxtarg() (*argp ? *argp++ : NULL)
427 #define prvarg() argp--
429 #define padvise if (!talked++) advise
435 /* for {OR,AND,NOT}action */
437 struct nexus *un_L_child;
438 struct nexus *un_R_child;
445 char un_expbuf[ESIZE];
458 #define n_L_child un.st1.un_L_child
459 #define n_R_child un.st1.un_R_child
461 #define n_header un.st2.un_header
462 #define n_circf un.st2.un_circf
463 #define n_expbuf un.st2.un_expbuf
464 #define n_patbuf un.st2.un_patbuf
466 #define n_datef un.st3.un_datef
467 #define n_after un.st3.un_after
468 #define n_tws un.st3.un_tws
471 static int pdebug = 0;
476 static struct nexus *head;
479 ** prototypes for date routines
481 static struct tws *tws_parse(char *, int);
482 static struct tws *tws_special(char *);
487 static void PRaction(struct nexus *, int);
488 static int gcompile(struct nexus *, char *);
489 static int advance(char *, char *);
490 static int cclass(unsigned char *, int, int);
491 static int tcompile(char *, struct tws *, int);
493 static struct nexus *parse(void);
494 static struct nexus *nexp1(void);
495 static struct nexus *nexp2(void);
496 static struct nexus *nexp3(void);
497 static struct nexus *newnexus(int (*)());
499 static int ORaction();
500 static int ANDaction();
501 static int NOTaction();
502 static int GREPaction();
503 static int TWSaction();
507 pcompile(char **vec, char *date)
511 if ((cp = getenv("MHPDEBUG")) && *cp)
515 if ((datesw = date) == NULL)
519 if ((head = parse()) == NULL)
520 return (talked ? 0 : 1);
523 padvise(NULL, "%s unexpected", *argp);
531 static struct nexus *
537 if ((n = nexp1()) == NULL || (cp = nxtarg()) == NULL)
541 padvise(NULL, "%s unexpected", cp);
547 switch (smatch(cp, parswit)) {
549 ambigsw(cp, parswit);
553 fprintf(stderr, "-%s unknown\n", cp);
558 o = newnexus(ORaction);
560 if ((o->n_R_child = parse()))
562 padvise(NULL, "missing disjunctive");
572 static struct nexus *
578 if ((n = nexp2()) == NULL || (cp = nxtarg()) == NULL)
582 padvise(NULL, "%s unexpected", cp);
588 switch (smatch(cp, parswit)) {
590 ambigsw(cp, parswit);
594 fprintf(stderr, "-%s unknown\n", cp);
599 o = newnexus(ANDaction);
601 if ((o->n_R_child = nexp1()))
603 padvise(NULL, "missing conjunctive");
614 static struct nexus *
620 if ((cp = nxtarg()) == NULL)
630 switch (smatch(cp, parswit)) {
632 ambigsw(cp, parswit);
636 fprintf(stderr, "-%s unknown\n", cp);
641 n = newnexus(NOTaction);
642 if ((n->n_L_child = nexp3()))
644 padvise(NULL, "missing negation");
654 static struct nexus *
659 char buffer[BUFSIZ], temp[64];
662 if ((cp = nxtarg()) == NULL)
666 padvise(NULL, "%s unexpected", cp);
674 switch (i = smatch(cp, parswit)) {
676 ambigsw(cp, parswit);
680 fprintf(stderr, "-%s unknown\n", cp);
685 if ((n = parse()) == NULL) {
686 padvise(NULL, "missing group");
689 if ((cp = nxtarg()) == NULL) {
690 padvise(NULL, "missing -rbrace");
693 if (*cp++ == '-' && smatch(cp, parswit) == PRRBR)
695 padvise(NULL, "%s unexpected", --cp);
707 strncpy(temp, parswit[i].sw, sizeof(temp));
708 temp[sizeof(temp) - 1] = '\0';
709 dp = *brkstring(temp, " ", NULL);
711 if (!(cp = nxtarg())) { /* allow -xyz arguments */
712 padvise(NULL, "missing argument to %s", argp[-2]);
715 n = newnexus(GREPaction);
717 snprintf(buffer, sizeof(buffer), "^%s[ \t]*:.*%s", dp, cp);
722 n = newnexus(GREPaction);
724 if (!(cp = nxtarg())) { /* allow -xyz arguments */
725 padvise(NULL, "missing argument to %s", argp[-2]);
730 if (!gcompile(n, dp)) {
731 padvise(NULL, "pattern error in %s %s", argp[-2], cp);
734 n->n_patbuf = getcpy(dp);
738 padvise(NULL, "internal error!");
742 if (!(datesw = nxtarg()) || *datesw == '-') {
743 padvise(NULL, "missing argument to %s",
751 if (!(cp = nxtarg())) { /* allow -xyz arguments */
752 padvise(NULL, "missing argument to %s", argp[-2]);
755 n = newnexus(TWSaction);
757 if (!tcompile(cp, &n->n_tws, n->n_after = i == PRAFTR)) {
758 padvise(NULL, "unable to parse %s %s", argp[-2], cp);
766 static struct nexus *
767 newnexus(int (*action)())
771 if ((p = (struct nexus *) mh_xcalloc((size_t) 1, sizeof *p)) == NULL)
772 adios(EX_OSERR, NULL, "unable to allocate component storage");
774 p->n_action = action;
779 #define args(a) a, fp, msgnum, start, stop
780 #define params args(n)
789 pmatches(FILE *fp, int msgnum, long start, long stop)
794 if (!talked++ && pdebug)
797 return (*head->n_action) (args(head));
802 PRaction(struct nexus *n, int level)
806 for (i = 0; i < level; i++)
807 fprintf(stderr, "| ");
809 if (n->n_action == ORaction) {
810 fprintf(stderr, "OR\n");
811 PRaction(n->n_L_child, level + 1);
812 PRaction(n->n_R_child, level + 1);
815 if (n->n_action == ANDaction) {
816 fprintf(stderr, "AND\n");
817 PRaction(n->n_L_child, level + 1);
818 PRaction(n->n_R_child, level + 1);
821 if (n->n_action == NOTaction) {
822 fprintf(stderr, "NOT\n");
823 PRaction(n->n_L_child, level + 1);
826 if (n->n_action == GREPaction) {
827 fprintf(stderr, "PATTERN(%s) %s\n",
828 n->n_header ? "header" : "body", n->n_patbuf);
831 if (n->n_action == TWSaction) {
832 fprintf(stderr, "TEMPORAL(%s) %s: %s\n",
833 n->n_after ? "after" : "before", n->n_datef,
834 dasctime(&n->n_tws));
837 fprintf(stderr, "UNKNOWN(0x%x)\n",
838 (unsigned int)(unsigned long) (*n->n_action));
846 if ((*n->n_L_child->n_action) (args(n->n_L_child)))
848 return (*n->n_R_child->n_action) (args(n->n_R_child));
856 if (!(*n->n_L_child->n_action) (args(n->n_L_child)))
858 return (*n->n_R_child->n_action) (args(n->n_R_child));
866 return (!(*n->n_L_child->n_action) (args(n->n_L_child)));
871 gcompile(struct nexus *n, char *astr)
875 unsigned char *ep, *dp, *sp, *lastep = 0;
877 dp = (ep = n->n_expbuf) + sizeof n->n_expbuf;
888 if ((c = *sp++) != '*')
915 if ((c = *sp++) == '^') {
925 if (c == '-' && *sp != '\0' && *sp != ']') {
926 for (c = ep[-1]+1; c < *sp; c++) {
929 if (c == '\0' || ep >= dp)
935 if (c == '\0' || ep >= dp)
938 } while ((c = *sp++) != ']');
945 if ((c = *sp++) == '\0')
965 char *p1, *p2, *ebp, *cbp;
968 fseek(fp, start, SEEK_SET);
972 if (body && n->n_header)
979 if (fgets(ibuf, sizeof ibuf, fp) == NULL
980 || (stop && pos >= stop)) {
985 pos += (long) strlen(ibuf);
987 ebp = ibuf + strlen(ibuf);
990 if (lf && c != '\n') {
991 if (c != ' ' && c != '\t') {
1010 if (c && p1 < &linebuf[LBSIZE - 1])
1020 ** Attempt to decode as a MIME header. If it's the
1021 ** last header, body will be 1 and lf will be at least 1.
1023 if ((body == 0 || lf > 0) && decode_rfc2047(linebuf,
1024 decoded_linebuf, sizeof decoded_linebuf)) {
1025 p1 = decoded_linebuf;
1029 if (advance(p1, p2))
1037 if (*p1 == c || cc[(unsigned char)*p1] == c)
1038 if (advance(p1, p2))
1045 if (advance(p1, p2))
1053 advance(char *alp, char *aep)
1055 unsigned char *lp, *ep, *curlp;
1057 lp = (unsigned char *)alp;
1058 ep = (unsigned char *)aep;
1062 if (*ep++ == *lp++ || ep[-1] == cc[lp[-1]])
1080 if (cclass(ep, *lp++, 1)) {
1087 if (cclass(ep, *lp++, 0)) {
1101 while (*lp++ == *ep || cc[lp[-1]] == *ep)
1109 while (cclass(ep, *lp++, ep[-1] == (CCL | STAR)))
1117 if (advance(lp, ep))
1119 } while (lp > curlp);
1123 admonish(NULL, "advance() botch -- you lose big");
1130 cclass(unsigned char *aset, int ac, int af)
1133 unsigned char c, *set;
1141 if (*set++ == c || set[-1] == cc[c])
1149 tcompile(char *ap, struct tws *tb, int isafter)
1153 if ((tw = tws_parse(ap, isafter)) == NULL)
1162 tws_parse(char *ap, int isafter)
1164 char buffer[BUFSIZ];
1165 struct tws *tw, *ts;
1167 if ((tw = tws_special(ap)) != NULL) {
1168 tw->tw_sec = tw->tw_min = isafter ? 59 : 0;
1169 tw->tw_hour = isafter ? 23 : 0;
1172 if ((tw = dparsetime(ap)) != NULL)
1175 if ((ts = dlocaltimenow()) == NULL)
1178 snprintf(buffer, sizeof(buffer), "%s %s", ap, dtwszone(ts));
1179 if ((tw = dparsetime(buffer)) != NULL)
1182 snprintf(buffer, sizeof(buffer), "%s %02d:%02d:%02d %s", ap,
1183 ts->tw_hour, ts->tw_min, ts->tw_sec, dtwszone(ts));
1184 if ((tw = dparsetime(buffer)) != NULL)
1187 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s",
1188 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year, ap);
1189 if ((tw = dparsetime(buffer)) != NULL)
1192 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s %s",
1193 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year,
1195 if ((tw = dparsetime(buffer)) != NULL)
1203 tws_special(char *ap)
1210 if (!mh_strcasecmp(ap, "today"))
1211 return dlocaltime(&clock);
1212 if (!mh_strcasecmp(ap, "yesterday")) {
1213 clock -= (long) (60 * 60 * 24);
1214 return dlocaltime(&clock);
1216 if (!mh_strcasecmp(ap, "tomorrow")) {
1217 clock += (long) (60 * 60 * 24);
1218 return dlocaltime(&clock);
1221 for (i = 0; tw_ldotw[i]; i++)
1222 if (!mh_strcasecmp(ap, tw_ldotw[i]))
1225 if ((tw = dlocaltime(&clock)) == NULL)
1227 if ((i -= tw->tw_wday) > 0)
1233 else /* -ddd days ago */
1234 i = atoi(ap); /* we should error check this */
1236 clock += (long) ((60 * 60 * 24) * i);
1237 return dlocaltime(&clock);
1247 char buf[BUFSIZ], name[NAMESZ];
1250 fseek(fp, start, SEEK_SET);
1251 for (state = FLD, bp = NULL;;) {
1252 switch (state = m_getfld(state, name, buf, sizeof buf, fp)) {
1260 while (state == FLDPLUS) {
1261 state = m_getfld(state, name, buf,
1265 if (!mh_strcasecmp(name, n->n_datef))
1273 if (state == LENERR || state == FMTERR)
1274 advise(NULL, "format error in message %d", msgnum);
1280 adios(EX_SOFTWARE, NULL, "internal error -- you lose");
1285 if ((tw = dparsetime(bp)) == NULL)
1286 advise(NULL, "unable to parse %s field in message %d, matching...",
1287 n->n_datef, msgnum), state = 1;
1289 state = n->n_after ? (twsort(tw, &n->n_tws) > 0)
1290 : (twsort(tw, &n->n_tws) < 0);