2 ** pick.c -- search for messages by content
4 ** This code is Copyright (c) 2002, 2008, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
16 #ifdef HAVE_SYS_TIME_H
17 # include <sys/time.h>
21 static struct swit switches[] = {
35 { "date pattern", 0 },
37 { "from pattern", 0 },
39 { "search pattern", 0 },
41 { "subject pattern", 0 },
45 { "-othercomponent pattern", 0 },
51 { "datefield field", 5 }, /* 5 chars required to differ from -date */
53 { "sequence name", 0 },
76 static int pcompile(char **, char *);
77 static int pmatches(FILE *, int, long, long);
80 static int listsw = -1;
85 main(int argc, char **argv)
87 int publicsw = -1, zerosw = 1, vecp = 0;
88 unsigned int seqp = 0;
90 char *maildir, *folder = NULL, buf[100];
91 char *cp, **argp, **arguments;
92 char *seqs[NUMATTRS + 1], *vec[MAXARGS];
93 struct msgs_array msgs = { 0, 0, NULL };
97 if (atexit(putzero_done) != 0) {
98 adios(EX_OSERR, NULL, "atexit failed");
101 setlocale(LC_ALL, "");
102 invo_name = mhbasename(argv[0]);
104 /* read user profile/context */
107 arguments = getarguments(invo_name, argc, argv, 1);
110 while ((cp = *argp++)) {
116 switch (smatch(cp, switches)) {
118 ambigsw(cp, switches);
119 listsw = 0; /* HACK */
122 adios(EX_USAGE, NULL, "-%s unknown", cp);
125 snprintf(buf, sizeof(buf), "%s [+folder] [msgs] [switches]", invo_name);
126 print_help(buf, switches, 1);
127 listsw = 0; /* HACK */
128 exit(argc == 2 ? EX_OK : EX_USAGE);
130 print_version(invo_name);
131 listsw = 0; /* HACK */
132 exit(argc == 2 ? EX_OK : EX_USAGE);
145 if (!(cp = *argp++)) /* allow -xyz arguments */
146 adios(EX_USAGE, NULL, "missing argument to %s",
151 adios(EX_SOFTWARE, NULL, "internal error!");
162 if (!(cp = *argp++) || *cp == '-')
163 adios(EX_USAGE, NULL, "missing argument to %s",
166 /* check if too many sequences specified */
167 if (seqp >= NUMATTRS)
168 adios(EX_USAGE, NULL, "too many sequences (more than %d) specified", NUMATTRS);
196 if (*cp == '+' || *cp == '@') {
198 adios(EX_USAGE, NULL, "only one folder at a time!");
200 folder = getcpy(expandfol(cp));
202 app_msgarg(&msgs, cp);
207 ** If we didn't specify which messages to search,
208 ** then search the whole folder.
211 app_msgarg(&msgs, seq_all);
214 folder = getcurfol();
215 maildir = toabsdir(folder);
217 if (chdir(maildir) == NOTOK)
218 adios(EX_OSERR, maildir, "unable to change directory to");
220 /* read folder and create message structure */
221 if (!(mp = folder_read(folder)))
222 adios(EX_IOERR, NULL, "unable to read folder %s", folder);
224 /* check for empty folder */
226 adios(EX_DATAERR, NULL, "no messages in %s", folder);
228 /* parse all the message ranges/sequences and set SELECTED */
229 for (msgnum = 0; msgnum < msgs.size; msgnum++)
230 if (!m_convert(mp, msgs.msgs[msgnum]))
232 seq_setprev(mp); /* set the previous-sequence */
235 ** If we aren't saving the results to a sequence,
236 ** we default to list the results.
241 if (publicsw == 1 && is_readonly(mp))
242 adios(EX_NOPERM, NULL, "folder %s is read-only, so -public not allowed",
245 if (!pcompile(vec, NULL))
252 ** If printing message numbers to standard out,
253 ** force line buffering on.
256 setvbuf(stdout, NULL, _IOLBF, 0);
259 ** Scan through all the SELECTED messages and check for a
260 ** match. If the message does not match, then unselect it.
262 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
263 if (is_selected(mp, msgnum)) {
264 if ((fp = fopen(cp = m_name(msgnum), "r")) == NULL)
265 admonish(cp, "unable to read message");
266 if (fp && pmatches(fp, msgnum, 0L, 0L)) {
273 printf("%s\n", m_name(msgnum));
275 /* if it doesn't match, then unselect it */
276 unset_selected(mp, msgnum);
287 adios(EX_DATAERR, NULL, "no messages match specification");
292 ** Add the matching messages to sequences
294 for (seqp = 0; seqs[seqp]; seqp++)
295 if (!seq_addsel(mp, seqs[seqp], publicsw, zerosw))
299 ** Print total matched if not printing each matched message number.
302 printf("%d hit%s\n", mp->numsel, mp->numsel == 1 ? "" : "s");
305 context_replace(curfolder, folder); /* update current folder */
306 seq_save(mp); /* synchronize message sequences */
307 context_save(); /* save the context file */
308 folder_free(mp); /* free folder/message structure */
309 listsw = 0; /* HACK */
317 if (listsw && !isatty(fileno(stdout)))
322 static struct swit parswit[] = {
336 { "date pattern", 0 },
338 { "from pattern", 0 },
340 { "search pattern", 0 },
342 { "subject pattern", 0 },
346 { "-othercomponent pattern", 15 },
350 { "before date", 0 },
352 { "datefield field", 5 },
356 /* DEFINITIONS FOR PATTERN MATCHING */
359 ** We really should be using re_comp() and re_exec() here. Unfortunately,
360 ** pick advertises that lowercase characters matches characters of both
361 ** cases. Since re_exec() doesn't exhibit this behavior, we are stuck
362 ** with this version. Furthermore, we need to be able to save and restore
363 ** the state of the pattern matcher in order to do things "efficiently".
365 ** The matching power of this algorithm isn't as powerful as the re_xxx()
366 ** routines (no \(xxx\) and \n constructs). Such is life.
382 static char linebuf[LBSIZE + 1];
383 static char decoded_linebuf[LBSIZE + 1];
385 /* the magic array for case-independence */
387 0000,0001,0002,0003,0004,0005,0006,0007,
388 0010,0011,0012,0013,0014,0015,0016,0017,
389 0020,0021,0022,0023,0024,0025,0026,0027,
390 0030,0031,0032,0033,0034,0035,0036,0037,
391 0040,0041,0042,0043,0044,0045,0046,0047,
392 0050,0051,0052,0053,0054,0055,0056,0057,
393 0060,0061,0062,0063,0064,0065,0066,0067,
394 0070,0071,0072,0073,0074,0075,0076,0077,
395 0100,0141,0142,0143,0144,0145,0146,0147,
396 0150,0151,0152,0153,0154,0155,0156,0157,
397 0160,0161,0162,0163,0164,0165,0166,0167,
398 0170,0171,0172,0133,0134,0135,0136,0137,
399 0140,0141,0142,0143,0144,0145,0146,0147,
400 0150,0151,0152,0153,0154,0155,0156,0157,
401 0160,0161,0162,0163,0164,0165,0166,0167,
402 0170,0171,0172,0173,0174,0175,0176,0177,
404 0200,0201,0202,0203,0204,0205,0206,0207,
405 0210,0211,0212,0213,0214,0215,0216,0217,
406 0220,0221,0222,0223,0224,0225,0226,0227,
407 0230,0231,0232,0233,0234,0235,0236,0237,
408 0240,0241,0242,0243,0244,0245,0246,0247,
409 0250,0251,0252,0253,0254,0255,0256,0257,
410 0260,0261,0262,0263,0264,0265,0266,0267,
411 0270,0271,0272,0273,0274,0275,0276,0277,
412 0300,0301,0302,0303,0304,0305,0306,0307,
413 0310,0311,0312,0313,0314,0315,0316,0317,
414 0320,0321,0322,0323,0324,0325,0326,0327,
415 0330,0331,0332,0333,0334,0335,0336,0337,
416 0340,0341,0342,0343,0344,0345,0346,0347,
417 0350,0351,0352,0353,0354,0355,0356,0357,
418 0360,0361,0362,0363,0364,0365,0366,0367,
419 0370,0371,0372,0373,0374,0375,0376,0377,
423 ** DEFINITIONS FOR NEXUS
426 #define nxtarg() (*argp ? *argp++ : NULL)
427 #define prvarg() argp--
429 #define padvise if (!talked++) advise
435 /* for {OR,AND,NOT}action */
437 struct nexus *un_L_child;
438 struct nexus *un_R_child;
445 char un_expbuf[ESIZE];
458 #define n_L_child un.st1.un_L_child
459 #define n_R_child un.st1.un_R_child
461 #define n_header un.st2.un_header
462 #define n_circf un.st2.un_circf
463 #define n_expbuf un.st2.un_expbuf
464 #define n_patbuf un.st2.un_patbuf
466 #define n_datef un.st3.un_datef
467 #define n_after un.st3.un_after
468 #define n_tws un.st3.un_tws
471 static int pdebug = 0;
476 static struct nexus *head;
479 ** prototypes for date routines
481 static struct tws *tws_parse(char *, int);
482 static struct tws *tws_special(char *);
487 static void PRaction(struct nexus *, int);
488 static int gcompile(struct nexus *, char *);
489 static int advance(char *, char *);
490 static int cclass(unsigned char *, int, int);
491 static int tcompile(char *, struct tws *, int);
493 static struct nexus *parse(void);
494 static struct nexus *nexp1(void);
495 static struct nexus *nexp2(void);
496 static struct nexus *nexp3(void);
497 static struct nexus *newnexus(int (*)());
499 static int ORaction();
500 static int ANDaction();
501 static int NOTaction();
502 static int GREPaction();
503 static int TWSaction();
507 pcompile(char **vec, char *date)
511 if ((cp = getenv("MHPDEBUG")) && *cp)
515 if ((datesw = date) == NULL)
519 if ((head = parse()) == NULL)
520 return (talked ? 0 : 1);
523 padvise(NULL, "%s unexpected", *argp);
531 static struct nexus *
537 if ((n = nexp1()) == NULL || (cp = nxtarg()) == NULL)
541 padvise(NULL, "%s unexpected", cp);
547 switch (smatch(cp, parswit)) {
549 ambigsw(cp, parswit);
553 fprintf(stderr, "-%s unknown\n", cp);
558 o = newnexus(ORaction);
560 if ((o->n_R_child = parse()))
562 padvise(NULL, "missing disjunctive");
572 static struct nexus *
578 if ((n = nexp2()) == NULL || (cp = nxtarg()) == NULL)
582 padvise(NULL, "%s unexpected", cp);
588 switch (smatch(cp, parswit)) {
590 ambigsw(cp, parswit);
594 fprintf(stderr, "-%s unknown\n", cp);
599 o = newnexus(ANDaction);
601 if ((o->n_R_child = nexp1()))
603 padvise(NULL, "missing conjunctive");
614 static struct nexus *
620 if ((cp = nxtarg()) == NULL)
630 switch (smatch(cp, parswit)) {
632 ambigsw(cp, parswit);
636 fprintf(stderr, "-%s unknown\n", cp);
641 n = newnexus(NOTaction);
642 if ((n->n_L_child = nexp3()))
644 padvise(NULL, "missing negation");
654 static struct nexus *
659 char buffer[BUFSIZ], temp[64];
662 if ((cp = nxtarg()) == NULL)
666 padvise(NULL, "%s unexpected", cp);
674 switch (i = smatch(cp, parswit)) {
676 ambigsw(cp, parswit);
680 fprintf(stderr, "-%s unknown\n", cp);
685 if ((n = parse()) == NULL) {
686 padvise(NULL, "missing group");
689 if ((cp = nxtarg()) == NULL) {
690 padvise(NULL, "missing -rbrace");
693 if (*cp++ == '-' && smatch(cp, parswit) == PRRBR)
695 padvise(NULL, "%s unexpected", --cp);
707 strncpy(temp, parswit[i].sw, sizeof(temp));
708 temp[sizeof(temp) - 1] = '\0';
709 dp = *brkstring(temp, " ", NULL);
711 if (!(cp = nxtarg())) { /* allow -xyz arguments */
712 padvise(NULL, "missing argument to %s", argp[-2]);
715 n = newnexus(GREPaction);
717 snprintf(buffer, sizeof(buffer), "^%s[ \t]*:.*%s", dp, cp);
722 n = newnexus(GREPaction);
724 if (!(cp = nxtarg())) { /* allow -xyz arguments */
725 padvise(NULL, "missing argument to %s", argp[-2]);
730 if (!gcompile(n, dp)) {
731 padvise(NULL, "pattern error in %s %s", argp[-2], cp);
734 n->n_patbuf = getcpy(dp);
738 padvise(NULL, "internal error!");
742 if (!(datesw = nxtarg()) || *datesw == '-') {
743 padvise(NULL, "missing argument to %s",
751 if (!(cp = nxtarg())) { /* allow -xyz arguments */
752 padvise(NULL, "missing argument to %s", argp[-2]);
755 n = newnexus(TWSaction);
757 if (!tcompile(cp, &n->n_tws, n->n_after = i == PRAFTR)) {
758 padvise(NULL, "unable to parse %s %s", argp[-2], cp);
766 static struct nexus *
767 newnexus(int (*action)())
771 p = (struct nexus *) mh_xcalloc((size_t) 1, sizeof *p);
773 p->n_action = action;
778 #define args(a) a, fp, msgnum, start, stop
779 #define params args(n)
788 pmatches(FILE *fp, int msgnum, long start, long stop)
793 if (!talked++ && pdebug)
796 return (*head->n_action) (args(head));
801 PRaction(struct nexus *n, int level)
805 for (i = 0; i < level; i++)
806 fprintf(stderr, "| ");
808 if (n->n_action == ORaction) {
809 fprintf(stderr, "OR\n");
810 PRaction(n->n_L_child, level + 1);
811 PRaction(n->n_R_child, level + 1);
814 if (n->n_action == ANDaction) {
815 fprintf(stderr, "AND\n");
816 PRaction(n->n_L_child, level + 1);
817 PRaction(n->n_R_child, level + 1);
820 if (n->n_action == NOTaction) {
821 fprintf(stderr, "NOT\n");
822 PRaction(n->n_L_child, level + 1);
825 if (n->n_action == GREPaction) {
826 fprintf(stderr, "PATTERN(%s) %s\n",
827 n->n_header ? "header" : "body", n->n_patbuf);
830 if (n->n_action == TWSaction) {
831 fprintf(stderr, "TEMPORAL(%s) %s: %s\n",
832 n->n_after ? "after" : "before", n->n_datef,
833 dasctime(&n->n_tws));
836 fprintf(stderr, "UNKNOWN(0x%x)\n",
837 (unsigned int)(unsigned long) (*n->n_action));
845 if ((*n->n_L_child->n_action) (args(n->n_L_child)))
847 return (*n->n_R_child->n_action) (args(n->n_R_child));
855 if (!(*n->n_L_child->n_action) (args(n->n_L_child)))
857 return (*n->n_R_child->n_action) (args(n->n_R_child));
865 return (!(*n->n_L_child->n_action) (args(n->n_L_child)));
870 gcompile(struct nexus *n, char *astr)
874 unsigned char *ep, *dp, *sp, *lastep = 0;
876 dp = (ep = n->n_expbuf) + sizeof n->n_expbuf;
887 if ((c = *sp++) != '*')
914 if ((c = *sp++) == '^') {
924 if (c == '-' && *sp != '\0' && *sp != ']') {
925 for (c = ep[-1]+1; c < *sp; c++) {
928 if (c == '\0' || ep >= dp)
934 if (c == '\0' || ep >= dp)
937 } while ((c = *sp++) != ']');
944 if ((c = *sp++) == '\0')
964 char *p1, *p2, *ebp, *cbp;
967 fseek(fp, start, SEEK_SET);
971 if (body && n->n_header)
978 if (fgets(ibuf, sizeof ibuf, fp) == NULL
979 || (stop && pos >= stop)) {
984 pos += (long) strlen(ibuf);
986 ebp = ibuf + strlen(ibuf);
989 if (lf && c != '\n') {
990 if (c != ' ' && c != '\t') {
1009 if (c && p1 < &linebuf[LBSIZE - 1])
1019 ** Attempt to decode as a MIME header. If it's the
1020 ** last header, body will be 1 and lf will be at least 1.
1022 if ((body == 0 || lf > 0) && decode_rfc2047(linebuf,
1023 decoded_linebuf, sizeof decoded_linebuf)) {
1024 p1 = decoded_linebuf;
1028 if (advance(p1, p2))
1036 if (*p1 == c || cc[(unsigned char)*p1] == c)
1037 if (advance(p1, p2))
1044 if (advance(p1, p2))
1052 advance(char *alp, char *aep)
1054 unsigned char *lp, *ep, *curlp;
1056 lp = (unsigned char *)alp;
1057 ep = (unsigned char *)aep;
1061 if (*ep++ == *lp++ || ep[-1] == cc[lp[-1]])
1079 if (cclass(ep, *lp++, 1)) {
1086 if (cclass(ep, *lp++, 0)) {
1100 while (*lp++ == *ep || cc[lp[-1]] == *ep)
1108 while (cclass(ep, *lp++, ep[-1] == (CCL | STAR)))
1116 if (advance(lp, ep))
1118 } while (lp > curlp);
1122 admonish(NULL, "advance() botch -- you lose big");
1129 cclass(unsigned char *aset, int ac, int af)
1132 unsigned char c, *set;
1140 if (*set++ == c || set[-1] == cc[c])
1148 tcompile(char *ap, struct tws *tb, int isafter)
1152 if ((tw = tws_parse(ap, isafter)) == NULL)
1161 tws_parse(char *ap, int isafter)
1163 char buffer[BUFSIZ];
1164 struct tws *tw, *ts;
1166 if ((tw = tws_special(ap)) != NULL) {
1167 tw->tw_sec = tw->tw_min = isafter ? 59 : 0;
1168 tw->tw_hour = isafter ? 23 : 0;
1171 if ((tw = dparsetime(ap)) != NULL)
1174 if ((ts = dlocaltimenow()) == NULL)
1177 snprintf(buffer, sizeof(buffer), "%s %s", ap, dtwszone(ts));
1178 if ((tw = dparsetime(buffer)) != NULL)
1181 snprintf(buffer, sizeof(buffer), "%s %02d:%02d:%02d %s", ap,
1182 ts->tw_hour, ts->tw_min, ts->tw_sec, dtwszone(ts));
1183 if ((tw = dparsetime(buffer)) != NULL)
1186 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s",
1187 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year, ap);
1188 if ((tw = dparsetime(buffer)) != NULL)
1191 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s %s",
1192 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year,
1194 if ((tw = dparsetime(buffer)) != NULL)
1202 tws_special(char *ap)
1209 if (!mh_strcasecmp(ap, "today"))
1210 return dlocaltime(&clock);
1211 if (!mh_strcasecmp(ap, "yesterday")) {
1212 clock -= (long) (60 * 60 * 24);
1213 return dlocaltime(&clock);
1215 if (!mh_strcasecmp(ap, "tomorrow")) {
1216 clock += (long) (60 * 60 * 24);
1217 return dlocaltime(&clock);
1220 for (i = 0; tw_ldotw[i]; i++)
1221 if (!mh_strcasecmp(ap, tw_ldotw[i]))
1224 if ((tw = dlocaltime(&clock)) == NULL)
1226 if ((i -= tw->tw_wday) > 0)
1232 else /* -ddd days ago */
1233 i = atoi(ap); /* we should error check this */
1235 clock += (long) ((60 * 60 * 24) * i);
1236 return dlocaltime(&clock);
1245 struct field f = free_field;
1249 fseek(fp, start, SEEK_SET);
1250 for (state = FLD2, bp = NULL;;) {
1251 switch (state = m_getfld2(state, &f, fp)) {
1257 bp = getcpy(f.value);
1258 if (mh_strcasecmp(f.name, n->n_datef)==0) {
1266 advise(NULL, "format error in message %d", msgnum);
1277 adios(EX_SOFTWARE, NULL, "internal error -- you lose");
1282 if ((tw = dparsetime(bp)) == NULL)
1283 advise(NULL, "unable to parse %s field in message %d, matching...",
1284 n->n_datef, msgnum), state = 1;
1286 state = n->n_after ? (twsort(tw, &n->n_tws) > 0)
1287 : (twsort(tw, &n->n_tws) < 0);