2 ** pick.c -- search for messages by content
4 ** This code is Copyright (c) 2002, 2008, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
16 #ifdef HAVE_SYS_TIME_H
17 # include <sys/time.h>
21 static struct swit switches[] = {
35 { "date pattern", 0 },
37 { "from pattern", 0 },
39 { "search pattern", 0 },
41 { "subject pattern", 0 },
45 { "-othercomponent pattern", 0 },
51 { "datefield field", 5 }, /* 5 chars required to differ from -date */
53 { "sequence name", 0 },
76 static int pcompile(char **, char *);
77 static int pmatches(FILE *, int, long, long);
80 static int listsw = -1;
85 main(int argc, char **argv)
87 int publicsw = -1, zerosw = 1, vecp = 0;
88 unsigned int seqp = 0;
90 char *maildir, *folder = NULL, buf[100];
91 char *cp, **argp, **arguments;
92 char *seqs[NUMATTRS + 1], *vec[MAXARGS];
93 struct msgs_array msgs = { 0, 0, NULL };
97 if (atexit(putzero_done) != 0) {
98 adios(EX_OSERR, NULL, "atexit failed");
101 setlocale(LC_ALL, "");
102 invo_name = mhbasename(argv[0]);
104 /* read user profile/context */
107 arguments = getarguments(invo_name, argc, argv, 1);
110 while ((cp = *argp++)) {
116 switch (smatch(cp, switches)) {
118 ambigsw(cp, switches);
119 listsw = 0; /* HACK */
122 adios(EX_USAGE, NULL, "-%s unknown", cp);
125 snprintf(buf, sizeof(buf), "%s [+folder] [msgs] [switches]", invo_name);
126 print_help(buf, switches, 1);
127 listsw = 0; /* HACK */
128 exit(argc == 2 ? EX_OK : EX_USAGE);
130 print_version(invo_name);
131 listsw = 0; /* HACK */
132 exit(argc == 2 ? EX_OK : EX_USAGE);
145 if (!(cp = *argp++)) /* allow -xyz arguments */
146 adios(EX_USAGE, NULL, "missing argument to %s",
151 adios(EX_SOFTWARE, NULL, "internal error!");
162 if (!(cp = *argp++) || *cp == '-')
163 adios(EX_USAGE, NULL, "missing argument to %s",
166 /* check if too many sequences specified */
167 if (seqp >= NUMATTRS)
168 adios(EX_USAGE, NULL, "too many sequences (more than %d) specified", NUMATTRS);
196 if (*cp == '+' || *cp == '@') {
198 adios(EX_USAGE, NULL, "only one folder at a time!");
200 folder = getcpy(expandfol(cp));
202 app_msgarg(&msgs, cp);
207 ** If we didn't specify which messages to search,
208 ** then search the whole folder.
211 app_msgarg(&msgs, seq_all);
214 folder = getcurfol();
215 maildir = toabsdir(folder);
217 if (chdir(maildir) == NOTOK)
218 adios(EX_OSERR, maildir, "unable to change directory to");
220 /* read folder and create message structure */
221 if (!(mp = folder_read(folder)))
222 adios(EX_IOERR, NULL, "unable to read folder %s", folder);
224 /* check for empty folder */
226 adios(EX_DATAERR, NULL, "no messages in %s", folder);
228 /* parse all the message ranges/sequences and set SELECTED */
229 for (msgnum = 0; msgnum < msgs.size; msgnum++)
230 if (!m_convert(mp, msgs.msgs[msgnum]))
232 seq_setprev(mp); /* set the previous-sequence */
235 ** If we aren't saving the results to a sequence,
236 ** we default to list the results.
241 if (publicsw == 1 && is_readonly(mp))
242 adios(EX_NOPERM, NULL, "folder %s is read-only, so -public not allowed",
245 if (!pcompile(vec, NULL))
252 ** If printing message numbers to standard out,
253 ** force line buffering on.
256 setvbuf(stdout, NULL, _IOLBF, 0);
259 ** Scan through all the SELECTED messages and check for a
260 ** match. If the message does not match, then unselect it.
262 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
263 if (is_selected(mp, msgnum)) {
264 if ((fp = fopen(cp = m_name(msgnum), "r")) == NULL)
265 admonish(cp, "unable to read message");
266 if (fp && pmatches(fp, msgnum, 0L, 0L)) {
273 printf("%s\n", m_name(msgnum));
275 /* if it doesn't match, then unselect it */
276 unset_selected(mp, msgnum);
288 adios(EX_DATAERR, NULL, "no messages match specification");
293 ** Add the matching messages to sequences
295 for (seqp = 0; seqs[seqp]; seqp++)
296 if (!seq_addsel(mp, seqs[seqp], publicsw, zerosw))
300 ** Print total matched if not printing each matched message number.
303 printf("%d hit%s\n", mp->numsel, mp->numsel == 1 ? "" : "s");
306 context_replace(curfolder, folder); /* update current folder */
307 seq_save(mp); /* synchronize message sequences */
308 context_save(); /* save the context file */
309 folder_free(mp); /* free folder/message structure */
310 listsw = 0; /* HACK */
318 if (listsw && !isatty(fileno(stdout)))
323 static struct swit parswit[] = {
337 { "date pattern", 0 },
339 { "from pattern", 0 },
341 { "search pattern", 0 },
343 { "subject pattern", 0 },
347 { "-othercomponent pattern", 15 },
351 { "before date", 0 },
353 { "datefield field", 5 },
357 /* DEFINITIONS FOR PATTERN MATCHING */
360 ** We really should be using re_comp() and re_exec() here. Unfortunately,
361 ** pick advertises that lowercase characters matches characters of both
362 ** cases. Since re_exec() doesn't exhibit this behavior, we are stuck
363 ** with this version. Furthermore, we need to be able to save and restore
364 ** the state of the pattern matcher in order to do things "efficiently".
366 ** The matching power of this algorithm isn't as powerful as the re_xxx()
367 ** routines (no \(xxx\) and \n constructs). Such is life.
383 static char linebuf[LBSIZE + 1];
384 static char decoded_linebuf[LBSIZE + 1];
386 /* the magic array for case-independence */
388 0000,0001,0002,0003,0004,0005,0006,0007,
389 0010,0011,0012,0013,0014,0015,0016,0017,
390 0020,0021,0022,0023,0024,0025,0026,0027,
391 0030,0031,0032,0033,0034,0035,0036,0037,
392 0040,0041,0042,0043,0044,0045,0046,0047,
393 0050,0051,0052,0053,0054,0055,0056,0057,
394 0060,0061,0062,0063,0064,0065,0066,0067,
395 0070,0071,0072,0073,0074,0075,0076,0077,
396 0100,0141,0142,0143,0144,0145,0146,0147,
397 0150,0151,0152,0153,0154,0155,0156,0157,
398 0160,0161,0162,0163,0164,0165,0166,0167,
399 0170,0171,0172,0133,0134,0135,0136,0137,
400 0140,0141,0142,0143,0144,0145,0146,0147,
401 0150,0151,0152,0153,0154,0155,0156,0157,
402 0160,0161,0162,0163,0164,0165,0166,0167,
403 0170,0171,0172,0173,0174,0175,0176,0177,
405 0200,0201,0202,0203,0204,0205,0206,0207,
406 0210,0211,0212,0213,0214,0215,0216,0217,
407 0220,0221,0222,0223,0224,0225,0226,0227,
408 0230,0231,0232,0233,0234,0235,0236,0237,
409 0240,0241,0242,0243,0244,0245,0246,0247,
410 0250,0251,0252,0253,0254,0255,0256,0257,
411 0260,0261,0262,0263,0264,0265,0266,0267,
412 0270,0271,0272,0273,0274,0275,0276,0277,
413 0300,0301,0302,0303,0304,0305,0306,0307,
414 0310,0311,0312,0313,0314,0315,0316,0317,
415 0320,0321,0322,0323,0324,0325,0326,0327,
416 0330,0331,0332,0333,0334,0335,0336,0337,
417 0340,0341,0342,0343,0344,0345,0346,0347,
418 0350,0351,0352,0353,0354,0355,0356,0357,
419 0360,0361,0362,0363,0364,0365,0366,0367,
420 0370,0371,0372,0373,0374,0375,0376,0377,
424 ** DEFINITIONS FOR NEXUS
427 #define nxtarg() (*argp ? *argp++ : NULL)
428 #define prvarg() argp--
430 #define padvise if (!talked++) advise
436 /* for {OR,AND,NOT}action */
438 struct nexus *un_L_child;
439 struct nexus *un_R_child;
446 char un_expbuf[ESIZE];
459 #define n_L_child un.st1.un_L_child
460 #define n_R_child un.st1.un_R_child
462 #define n_header un.st2.un_header
463 #define n_circf un.st2.un_circf
464 #define n_expbuf un.st2.un_expbuf
465 #define n_patbuf un.st2.un_patbuf
467 #define n_datef un.st3.un_datef
468 #define n_after un.st3.un_after
469 #define n_tws un.st3.un_tws
472 static int pdebug = 0;
477 static struct nexus *head;
480 ** prototypes for date routines
482 static struct tws *tws_parse(char *, int);
483 static struct tws *tws_special(char *);
488 static void PRaction(struct nexus *, int);
489 static int gcompile(struct nexus *, char *);
490 static int advance(char *, char *);
491 static int cclass(unsigned char *, int, int);
492 static int tcompile(char *, struct tws *, int);
494 static struct nexus *parse(void);
495 static struct nexus *nexp1(void);
496 static struct nexus *nexp2(void);
497 static struct nexus *nexp3(void);
498 static struct nexus *newnexus(int (*)());
500 static int ORaction();
501 static int ANDaction();
502 static int NOTaction();
503 static int GREPaction();
504 static int TWSaction();
508 pcompile(char **vec, char *date)
512 if ((cp = getenv("MHPDEBUG")) && *cp)
516 if ((datesw = date) == NULL)
520 if ((head = parse()) == NULL)
521 return (talked ? 0 : 1);
524 padvise(NULL, "%s unexpected", *argp);
532 static struct nexus *
538 if ((n = nexp1()) == NULL || (cp = nxtarg()) == NULL)
542 padvise(NULL, "%s unexpected", cp);
548 switch (smatch(cp, parswit)) {
550 ambigsw(cp, parswit);
554 fprintf(stderr, "-%s unknown\n", cp);
559 o = newnexus(ORaction);
561 if ((o->n_R_child = parse()))
563 padvise(NULL, "missing disjunctive");
573 static struct nexus *
579 if ((n = nexp2()) == NULL || (cp = nxtarg()) == NULL)
583 padvise(NULL, "%s unexpected", cp);
589 switch (smatch(cp, parswit)) {
591 ambigsw(cp, parswit);
595 fprintf(stderr, "-%s unknown\n", cp);
600 o = newnexus(ANDaction);
602 if ((o->n_R_child = nexp1()))
604 padvise(NULL, "missing conjunctive");
615 static struct nexus *
621 if ((cp = nxtarg()) == NULL)
631 switch (smatch(cp, parswit)) {
633 ambigsw(cp, parswit);
637 fprintf(stderr, "-%s unknown\n", cp);
642 n = newnexus(NOTaction);
643 if ((n->n_L_child = nexp3()))
645 padvise(NULL, "missing negation");
655 static struct nexus *
660 char buffer[BUFSIZ], temp[64];
663 if ((cp = nxtarg()) == NULL)
667 padvise(NULL, "%s unexpected", cp);
675 switch (i = smatch(cp, parswit)) {
677 ambigsw(cp, parswit);
681 fprintf(stderr, "-%s unknown\n", cp);
686 if ((n = parse()) == NULL) {
687 padvise(NULL, "missing group");
690 if ((cp = nxtarg()) == NULL) {
691 padvise(NULL, "missing -rbrace");
694 if (*cp++ == '-' && smatch(cp, parswit) == PRRBR)
696 padvise(NULL, "%s unexpected", --cp);
708 strncpy(temp, parswit[i].sw, sizeof(temp));
709 temp[sizeof(temp) - 1] = '\0';
710 dp = *brkstring(temp, " ", NULL);
712 if (!(cp = nxtarg())) { /* allow -xyz arguments */
713 padvise(NULL, "missing argument to %s", argp[-2]);
716 n = newnexus(GREPaction);
718 snprintf(buffer, sizeof(buffer), "^%s[ \t]*:.*%s", dp, cp);
723 n = newnexus(GREPaction);
725 if (!(cp = nxtarg())) { /* allow -xyz arguments */
726 padvise(NULL, "missing argument to %s", argp[-2]);
731 if (!gcompile(n, dp)) {
732 padvise(NULL, "pattern error in %s %s", argp[-2], cp);
735 n->n_patbuf = getcpy(dp);
739 padvise(NULL, "internal error!");
743 if (!(datesw = nxtarg()) || *datesw == '-') {
744 padvise(NULL, "missing argument to %s",
752 if (!(cp = nxtarg())) { /* allow -xyz arguments */
753 padvise(NULL, "missing argument to %s", argp[-2]);
756 n = newnexus(TWSaction);
758 if (!tcompile(cp, &n->n_tws, n->n_after = i == PRAFTR)) {
759 padvise(NULL, "unable to parse %s %s", argp[-2], cp);
767 static struct nexus *
768 newnexus(int (*action)())
772 if ((p = (struct nexus *) mh_xcalloc((size_t) 1, sizeof *p)) == NULL)
773 adios(EX_OSERR, NULL, "unable to allocate component storage");
775 p->n_action = action;
780 #define args(a) a, fp, msgnum, start, stop
781 #define params args(n)
790 pmatches(FILE *fp, int msgnum, long start, long stop)
795 if (!talked++ && pdebug)
798 return (*head->n_action) (args(head));
803 PRaction(struct nexus *n, int level)
807 for (i = 0; i < level; i++)
808 fprintf(stderr, "| ");
810 if (n->n_action == ORaction) {
811 fprintf(stderr, "OR\n");
812 PRaction(n->n_L_child, level + 1);
813 PRaction(n->n_R_child, level + 1);
816 if (n->n_action == ANDaction) {
817 fprintf(stderr, "AND\n");
818 PRaction(n->n_L_child, level + 1);
819 PRaction(n->n_R_child, level + 1);
822 if (n->n_action == NOTaction) {
823 fprintf(stderr, "NOT\n");
824 PRaction(n->n_L_child, level + 1);
827 if (n->n_action == GREPaction) {
828 fprintf(stderr, "PATTERN(%s) %s\n",
829 n->n_header ? "header" : "body", n->n_patbuf);
832 if (n->n_action == TWSaction) {
833 fprintf(stderr, "TEMPORAL(%s) %s: %s\n",
834 n->n_after ? "after" : "before", n->n_datef,
835 dasctime(&n->n_tws));
838 fprintf(stderr, "UNKNOWN(0x%x)\n",
839 (unsigned int)(unsigned long) (*n->n_action));
847 if ((*n->n_L_child->n_action) (args(n->n_L_child)))
849 return (*n->n_R_child->n_action) (args(n->n_R_child));
857 if (!(*n->n_L_child->n_action) (args(n->n_L_child)))
859 return (*n->n_R_child->n_action) (args(n->n_R_child));
867 return (!(*n->n_L_child->n_action) (args(n->n_L_child)));
872 gcompile(struct nexus *n, char *astr)
876 unsigned char *ep, *dp, *sp, *lastep = 0;
878 dp = (ep = n->n_expbuf) + sizeof n->n_expbuf;
889 if ((c = *sp++) != '*')
916 if ((c = *sp++) == '^') {
926 if (c == '-' && *sp != '\0' && *sp != ']') {
927 for (c = ep[-1]+1; c < *sp; c++) {
930 if (c == '\0' || ep >= dp)
936 if (c == '\0' || ep >= dp)
939 } while ((c = *sp++) != ']');
946 if ((c = *sp++) == '\0')
966 char *p1, *p2, *ebp, *cbp;
969 fseek(fp, start, SEEK_SET);
973 if (body && n->n_header)
980 if (fgets(ibuf, sizeof ibuf, fp) == NULL
981 || (stop && pos >= stop)) {
986 pos += (long) strlen(ibuf);
988 ebp = ibuf + strlen(ibuf);
991 if (lf && c != '\n') {
992 if (c != ' ' && c != '\t') {
1011 if (c && p1 < &linebuf[LBSIZE - 1])
1021 ** Attempt to decode as a MIME header. If it's the
1022 ** last header, body will be 1 and lf will be at least 1.
1024 if ((body == 0 || lf > 0) && decode_rfc2047(linebuf,
1025 decoded_linebuf, sizeof decoded_linebuf)) {
1026 p1 = decoded_linebuf;
1030 if (advance(p1, p2))
1038 if (*p1 == c || cc[(unsigned char)*p1] == c)
1039 if (advance(p1, p2))
1046 if (advance(p1, p2))
1054 advance(char *alp, char *aep)
1056 unsigned char *lp, *ep, *curlp;
1058 lp = (unsigned char *)alp;
1059 ep = (unsigned char *)aep;
1063 if (*ep++ == *lp++ || ep[-1] == cc[lp[-1]])
1081 if (cclass(ep, *lp++, 1)) {
1088 if (cclass(ep, *lp++, 0)) {
1102 while (*lp++ == *ep || cc[lp[-1]] == *ep)
1110 while (cclass(ep, *lp++, ep[-1] == (CCL | STAR)))
1118 if (advance(lp, ep))
1120 } while (lp > curlp);
1124 admonish(NULL, "advance() botch -- you lose big");
1131 cclass(unsigned char *aset, int ac, int af)
1134 unsigned char c, *set;
1142 if (*set++ == c || set[-1] == cc[c])
1150 tcompile(char *ap, struct tws *tb, int isafter)
1154 if ((tw = tws_parse(ap, isafter)) == NULL)
1163 tws_parse(char *ap, int isafter)
1165 char buffer[BUFSIZ];
1166 struct tws *tw, *ts;
1168 if ((tw = tws_special(ap)) != NULL) {
1169 tw->tw_sec = tw->tw_min = isafter ? 59 : 0;
1170 tw->tw_hour = isafter ? 23 : 0;
1173 if ((tw = dparsetime(ap)) != NULL)
1176 if ((ts = dlocaltimenow()) == NULL)
1179 snprintf(buffer, sizeof(buffer), "%s %s", ap, dtwszone(ts));
1180 if ((tw = dparsetime(buffer)) != NULL)
1183 snprintf(buffer, sizeof(buffer), "%s %02d:%02d:%02d %s", ap,
1184 ts->tw_hour, ts->tw_min, ts->tw_sec, dtwszone(ts));
1185 if ((tw = dparsetime(buffer)) != NULL)
1188 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s",
1189 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year, ap);
1190 if ((tw = dparsetime(buffer)) != NULL)
1193 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s %s",
1194 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year,
1196 if ((tw = dparsetime(buffer)) != NULL)
1204 tws_special(char *ap)
1211 if (!mh_strcasecmp(ap, "today"))
1212 return dlocaltime(&clock);
1213 if (!mh_strcasecmp(ap, "yesterday")) {
1214 clock -= (long) (60 * 60 * 24);
1215 return dlocaltime(&clock);
1217 if (!mh_strcasecmp(ap, "tomorrow")) {
1218 clock += (long) (60 * 60 * 24);
1219 return dlocaltime(&clock);
1222 for (i = 0; tw_ldotw[i]; i++)
1223 if (!mh_strcasecmp(ap, tw_ldotw[i]))
1226 if ((tw = dlocaltime(&clock)) == NULL)
1228 if ((i -= tw->tw_wday) > 0)
1234 else /* -ddd days ago */
1235 i = atoi(ap); /* we should error check this */
1237 clock += (long) ((60 * 60 * 24) * i);
1238 return dlocaltime(&clock);
1248 char buf[BUFSIZ], name[NAMESZ];
1251 fseek(fp, start, SEEK_SET);
1252 for (state = FLD, bp = NULL;;) {
1253 switch (state = m_getfld(state, name, buf, sizeof buf, fp)) {
1262 while (state == FLDPLUS) {
1263 state = m_getfld(state, name, buf,
1267 if (!mh_strcasecmp(name, n->n_datef))
1269 if (state != FLDEOF)
1277 if (state == LENERR || state == FMTERR)
1278 advise(NULL, "format error in message %d", msgnum);
1284 adios(EX_SOFTWARE, NULL, "internal error -- you lose");
1289 if ((tw = dparsetime(bp)) == NULL)
1290 advise(NULL, "unable to parse %s field in message %d, matching...",
1291 n->n_datef, msgnum), state = 1;
1293 state = n->n_after ? (twsort(tw, &n->n_tws) > 0)
1294 : (twsort(tw, &n->n_tws) < 0);