2 ** pick.c -- search for messages by content
4 ** This code is Copyright (c) 2002, 2008, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
16 #ifdef HAVE_SYS_TIME_H
17 # include <sys/time.h>
21 static struct swit switches[] = {
35 { "date pattern", 0 },
37 { "from pattern", 0 },
39 { "search pattern", 0 },
41 { "subject pattern", 0 },
45 { "-othercomponent pattern", 0 },
51 { "datefield field", 5 }, /* 5 chars required to differ from -date */
53 { "sequence name", 0 },
73 char *version=VERSION;
78 static int pcompile(char **, char *);
79 static int pmatches(FILE *, int, long, long);
82 static int listsw = -1;
87 main(int argc, char **argv)
89 int publicsw = -1, zerosw = 1, vecp = 0;
90 unsigned int seqp = 0;
92 char *maildir, *folder = NULL, buf[100];
93 char *cp, **argp, **arguments;
94 char *seqs[NUMATTRS + 1], *vec[MAXARGS];
95 struct msgs_array msgs = { 0, 0, NULL };
99 if (atexit(putzero_done) != 0) {
100 adios(EX_OSERR, NULL, "atexit failed");
103 setlocale(LC_ALL, "");
104 invo_name = mhbasename(argv[0]);
106 /* read user profile/context */
109 arguments = getarguments(invo_name, argc, argv, 1);
112 while ((cp = *argp++)) {
118 switch (smatch(cp, switches)) {
120 ambigsw(cp, switches);
121 listsw = 0; /* HACK */
124 adios(EX_USAGE, NULL, "-%s unknown", cp);
127 snprintf(buf, sizeof(buf), "%s [+folder] [msgs] [switches]", invo_name);
128 print_help(buf, switches, 1);
129 listsw = 0; /* HACK */
130 exit(argc == 2 ? EX_OK : EX_USAGE);
132 print_version(invo_name);
133 listsw = 0; /* HACK */
134 exit(argc == 2 ? EX_OK : EX_USAGE);
147 if (!(cp = *argp++)) /* allow -xyz arguments */
148 adios(EX_USAGE, NULL, "missing argument to %s",
153 adios(EX_SOFTWARE, NULL, "internal error!");
164 if (!(cp = *argp++) || *cp == '-')
165 adios(EX_USAGE, NULL, "missing argument to %s",
168 /* check if too many sequences specified */
169 if (seqp >= NUMATTRS)
170 adios(EX_USAGE, NULL, "too many sequences (more than %d) specified", NUMATTRS);
198 if (*cp == '+' || *cp == '@') {
200 adios(EX_USAGE, NULL, "only one folder at a time!");
202 folder = mh_xstrdup(expandfol(cp));
204 app_msgarg(&msgs, cp);
209 ** If we didn't specify which messages to search,
210 ** then search the whole folder.
213 app_msgarg(&msgs, seq_all);
216 folder = getcurfol();
217 maildir = toabsdir(folder);
219 if (chdir(maildir) == NOTOK)
220 adios(EX_OSERR, maildir, "unable to change directory to");
222 /* read folder and create message structure */
223 if (!(mp = folder_read(folder)))
224 adios(EX_IOERR, NULL, "unable to read folder %s", folder);
226 /* check for empty folder */
228 adios(EX_DATAERR, NULL, "no messages in %s", folder);
230 /* parse all the message ranges/sequences and set SELECTED */
231 for (msgnum = 0; msgnum < msgs.size; msgnum++)
232 if (!m_convert(mp, msgs.msgs[msgnum]))
234 seq_setprev(mp); /* set the previous-sequence */
237 ** If we aren't saving the results to a sequence,
238 ** we default to list the results.
243 if (publicsw == 1 && is_readonly(mp))
244 adios(EX_NOPERM, NULL, "folder %s is read-only, so -public not allowed",
247 if (!pcompile(vec, NULL))
254 ** If printing message numbers to standard out,
255 ** force line buffering on.
258 setvbuf(stdout, NULL, _IOLBF, 0);
261 ** Scan through all the SELECTED messages and check for a
262 ** match. If the message does not match, then unselect it.
264 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
265 if (is_selected(mp, msgnum)) {
266 if ((fp = fopen(cp = m_name(msgnum), "r")) == NULL)
267 admonish(cp, "unable to read message");
268 if (fp && pmatches(fp, msgnum, 0L, 0L)) {
275 printf("%s\n", m_name(msgnum));
277 /* if it doesn't match, then unselect it */
278 unset_selected(mp, msgnum);
289 adios(EX_DATAERR, NULL, "no messages match specification");
294 ** Add the matching messages to sequences
296 for (seqp = 0; seqs[seqp]; seqp++)
297 if (!seq_addsel(mp, seqs[seqp], publicsw, zerosw))
301 ** Print total matched if not printing each matched message number.
304 printf("%d hit%s\n", mp->numsel, mp->numsel == 1 ? "" : "s");
307 context_replace(curfolder, folder); /* update current folder */
308 seq_save(mp); /* synchronize message sequences */
309 context_save(); /* save the context file */
310 folder_free(mp); /* free folder/message structure */
311 listsw = 0; /* HACK */
319 if (listsw && !isatty(fileno(stdout)))
324 static struct swit parswit[] = {
338 { "date pattern", 0 },
340 { "from pattern", 0 },
342 { "search pattern", 0 },
344 { "subject pattern", 0 },
348 { "-othercomponent pattern", 15 },
352 { "before date", 0 },
354 { "datefield field", 5 },
358 /* DEFINITIONS FOR PATTERN MATCHING */
361 ** We really should be using re_comp() and re_exec() here. Unfortunately,
362 ** pick advertises that lowercase characters matches characters of both
363 ** cases. Since re_exec() doesn't exhibit this behavior, we are stuck
364 ** with this version. Furthermore, we need to be able to save and restore
365 ** the state of the pattern matcher in order to do things "efficiently".
367 ** The matching power of this algorithm isn't as powerful as the re_xxx()
368 ** routines (no \(xxx\) and \n constructs). Such is life.
384 static char linebuf[LBSIZE + 1];
385 static char decoded_linebuf[LBSIZE + 1];
387 /* the magic array for case-independence */
389 0000,0001,0002,0003,0004,0005,0006,0007,
390 0010,0011,0012,0013,0014,0015,0016,0017,
391 0020,0021,0022,0023,0024,0025,0026,0027,
392 0030,0031,0032,0033,0034,0035,0036,0037,
393 0040,0041,0042,0043,0044,0045,0046,0047,
394 0050,0051,0052,0053,0054,0055,0056,0057,
395 0060,0061,0062,0063,0064,0065,0066,0067,
396 0070,0071,0072,0073,0074,0075,0076,0077,
397 0100,0141,0142,0143,0144,0145,0146,0147,
398 0150,0151,0152,0153,0154,0155,0156,0157,
399 0160,0161,0162,0163,0164,0165,0166,0167,
400 0170,0171,0172,0133,0134,0135,0136,0137,
401 0140,0141,0142,0143,0144,0145,0146,0147,
402 0150,0151,0152,0153,0154,0155,0156,0157,
403 0160,0161,0162,0163,0164,0165,0166,0167,
404 0170,0171,0172,0173,0174,0175,0176,0177,
406 0200,0201,0202,0203,0204,0205,0206,0207,
407 0210,0211,0212,0213,0214,0215,0216,0217,
408 0220,0221,0222,0223,0224,0225,0226,0227,
409 0230,0231,0232,0233,0234,0235,0236,0237,
410 0240,0241,0242,0243,0244,0245,0246,0247,
411 0250,0251,0252,0253,0254,0255,0256,0257,
412 0260,0261,0262,0263,0264,0265,0266,0267,
413 0270,0271,0272,0273,0274,0275,0276,0277,
414 0300,0301,0302,0303,0304,0305,0306,0307,
415 0310,0311,0312,0313,0314,0315,0316,0317,
416 0320,0321,0322,0323,0324,0325,0326,0327,
417 0330,0331,0332,0333,0334,0335,0336,0337,
418 0340,0341,0342,0343,0344,0345,0346,0347,
419 0350,0351,0352,0353,0354,0355,0356,0357,
420 0360,0361,0362,0363,0364,0365,0366,0367,
421 0370,0371,0372,0373,0374,0375,0376,0377,
425 ** DEFINITIONS FOR NEXUS
428 #define nxtarg() (*argp ? *argp++ : NULL)
429 #define prvarg() argp--
431 #define padvise if (!talked++) advise
437 /* for {OR,AND,NOT}action */
439 struct nexus *un_L_child;
440 struct nexus *un_R_child;
447 char un_expbuf[ESIZE];
460 #define n_L_child un.st1.un_L_child
461 #define n_R_child un.st1.un_R_child
463 #define n_header un.st2.un_header
464 #define n_circf un.st2.un_circf
465 #define n_expbuf un.st2.un_expbuf
466 #define n_patbuf un.st2.un_patbuf
468 #define n_datef un.st3.un_datef
469 #define n_after un.st3.un_after
470 #define n_tws un.st3.un_tws
473 static int pdebug = 0;
478 static struct nexus *head;
481 ** prototypes for date routines
483 static struct tws *tws_parse(char *, int);
484 static struct tws *tws_special(char *);
489 static void PRaction(struct nexus *, int);
490 static int gcompile(struct nexus *, char *);
491 static int advance(char *, char *);
492 static int cclass(unsigned char *, int, int);
493 static int tcompile(char *, struct tws *, int);
495 static struct nexus *parse(void);
496 static struct nexus *nexp1(void);
497 static struct nexus *nexp2(void);
498 static struct nexus *nexp3(void);
499 static struct nexus *newnexus(int (*)());
501 static int ORaction();
502 static int ANDaction();
503 static int NOTaction();
504 static int GREPaction();
505 static int TWSaction();
509 pcompile(char **vec, char *date)
513 if ((cp = getenv("MHPDEBUG")) && *cp)
517 if ((datesw = date) == NULL)
521 if ((head = parse()) == NULL)
522 return (talked ? 0 : 1);
525 padvise(NULL, "%s unexpected", *argp);
533 static struct nexus *
539 if ((n = nexp1()) == NULL || (cp = nxtarg()) == NULL)
543 padvise(NULL, "%s unexpected", cp);
549 switch (smatch(cp, parswit)) {
551 ambigsw(cp, parswit);
555 fprintf(stderr, "-%s unknown\n", cp);
560 o = newnexus(ORaction);
562 if ((o->n_R_child = parse()))
564 padvise(NULL, "missing disjunctive");
574 static struct nexus *
580 if ((n = nexp2()) == NULL || (cp = nxtarg()) == NULL)
584 padvise(NULL, "%s unexpected", cp);
590 switch (smatch(cp, parswit)) {
592 ambigsw(cp, parswit);
596 fprintf(stderr, "-%s unknown\n", cp);
601 o = newnexus(ANDaction);
603 if ((o->n_R_child = nexp1()))
605 padvise(NULL, "missing conjunctive");
616 static struct nexus *
622 if ((cp = nxtarg()) == NULL)
632 switch (smatch(cp, parswit)) {
634 ambigsw(cp, parswit);
638 fprintf(stderr, "-%s unknown\n", cp);
643 n = newnexus(NOTaction);
644 if ((n->n_L_child = nexp3()))
646 padvise(NULL, "missing negation");
656 static struct nexus *
661 char buffer[BUFSIZ], temp[64];
664 if ((cp = nxtarg()) == NULL)
668 padvise(NULL, "%s unexpected", cp);
676 switch (i = smatch(cp, parswit)) {
678 ambigsw(cp, parswit);
682 fprintf(stderr, "-%s unknown\n", cp);
687 if ((n = parse()) == NULL) {
688 padvise(NULL, "missing group");
691 if ((cp = nxtarg()) == NULL) {
692 padvise(NULL, "missing -rbrace");
695 if (*cp++ == '-' && smatch(cp, parswit) == PRRBR)
697 padvise(NULL, "%s unexpected", --cp);
709 strncpy(temp, parswit[i].sw, sizeof(temp));
710 temp[sizeof(temp) - 1] = '\0';
711 dp = *brkstring(temp, " ", NULL);
713 if (!(cp = nxtarg())) { /* allow -xyz arguments */
714 padvise(NULL, "missing argument to %s", argp[-2]);
717 n = newnexus(GREPaction);
719 snprintf(buffer, sizeof(buffer), "^%s[ \t]*:.*%s", dp, cp);
724 n = newnexus(GREPaction);
726 if (!(cp = nxtarg())) { /* allow -xyz arguments */
727 padvise(NULL, "missing argument to %s", argp[-2]);
732 if (!gcompile(n, dp)) {
733 padvise(NULL, "pattern error in %s %s", argp[-2], cp);
736 n->n_patbuf = mh_xstrdup(dp);
740 padvise(NULL, "internal error!");
744 if (!(datesw = nxtarg()) || *datesw == '-') {
745 padvise(NULL, "missing argument to %s",
753 if (!(cp = nxtarg())) { /* allow -xyz arguments */
754 padvise(NULL, "missing argument to %s", argp[-2]);
757 n = newnexus(TWSaction);
759 if (!tcompile(cp, &n->n_tws, n->n_after = i == PRAFTR)) {
760 padvise(NULL, "unable to parse %s %s", argp[-2], cp);
768 static struct nexus *
769 newnexus(int (*action)())
773 p = mh_xcalloc(1, sizeof *p);
775 p->n_action = action;
780 #define args(a) a, fp, msgnum, start, stop
781 #define params args(n)
790 pmatches(FILE *fp, int msgnum, long start, long stop)
795 if (!talked++ && pdebug)
798 return (*head->n_action) (args(head));
803 PRaction(struct nexus *n, int level)
807 for (i = 0; i < level; i++)
808 fprintf(stderr, "| ");
810 if (n->n_action == ORaction) {
811 fprintf(stderr, "OR\n");
812 PRaction(n->n_L_child, level + 1);
813 PRaction(n->n_R_child, level + 1);
816 if (n->n_action == ANDaction) {
817 fprintf(stderr, "AND\n");
818 PRaction(n->n_L_child, level + 1);
819 PRaction(n->n_R_child, level + 1);
822 if (n->n_action == NOTaction) {
823 fprintf(stderr, "NOT\n");
824 PRaction(n->n_L_child, level + 1);
827 if (n->n_action == GREPaction) {
828 fprintf(stderr, "PATTERN(%s) %s\n",
829 n->n_header ? "header" : "body", n->n_patbuf);
832 if (n->n_action == TWSaction) {
833 fprintf(stderr, "TEMPORAL(%s) %s: %s\n",
834 n->n_after ? "after" : "before", n->n_datef,
835 dasctime(&n->n_tws));
838 fprintf(stderr, "UNKNOWN(0x%x)\n",
839 (unsigned int)(unsigned long) (*n->n_action));
847 if ((*n->n_L_child->n_action) (args(n->n_L_child)))
849 return (*n->n_R_child->n_action) (args(n->n_R_child));
857 if (!(*n->n_L_child->n_action) (args(n->n_L_child)))
859 return (*n->n_R_child->n_action) (args(n->n_R_child));
867 return (!(*n->n_L_child->n_action) (args(n->n_L_child)));
872 gcompile(struct nexus *n, char *astr)
876 unsigned char *ep, *dp, *sp, *lastep = 0;
878 dp = (ep = n->n_expbuf) + sizeof n->n_expbuf;
889 if ((c = *sp++) != '*')
916 if ((c = *sp++) == '^') {
926 if (c == '-' && *sp != '\0' && *sp != ']') {
927 for (c = ep[-1]+1; c < *sp; c++) {
930 if (c == '\0' || ep >= dp)
936 if (c == '\0' || ep >= dp)
939 } while ((c = *sp++) != ']');
946 if ((c = *sp++) == '\0')
966 char *p1, *p2, *ebp, *cbp;
969 fseek(fp, start, SEEK_SET);
973 if (body && n->n_header)
980 if (fgets(ibuf, sizeof ibuf, fp) == NULL
981 || (stop && pos >= stop)) {
986 pos += (long) strlen(ibuf);
988 ebp = ibuf + strlen(ibuf);
991 if (lf && c != '\n') {
992 if (c != ' ' && c != '\t') {
1011 if (c && p1 < &linebuf[LBSIZE - 1])
1021 ** Attempt to decode as a MIME header. If it's the
1022 ** last header, body will be 1 and lf will be at least 1.
1024 if ((body == 0 || lf > 0) && decode_rfc2047(linebuf,
1025 decoded_linebuf, sizeof decoded_linebuf)) {
1026 p1 = decoded_linebuf;
1030 if (advance(p1, p2))
1038 if (*p1 == c || cc[(unsigned char)*p1] == c)
1039 if (advance(p1, p2))
1046 if (advance(p1, p2))
1054 advance(char *alp, char *aep)
1056 unsigned char *lp, *ep, *curlp;
1058 lp = (unsigned char *)alp;
1059 ep = (unsigned char *)aep;
1063 if (*ep++ == *lp++ || ep[-1] == cc[lp[-1]])
1081 if (cclass(ep, *lp++, 1)) {
1088 if (cclass(ep, *lp++, 0)) {
1102 while (*lp++ == *ep || cc[lp[-1]] == *ep)
1110 while (cclass(ep, *lp++, ep[-1] == (CCL | STAR)))
1118 if (advance(lp, ep))
1120 } while (lp > curlp);
1124 admonish(NULL, "advance() botch -- you lose big");
1131 cclass(unsigned char *aset, int ac, int af)
1134 unsigned char c, *set;
1142 if (*set++ == c || set[-1] == cc[c])
1150 tcompile(char *ap, struct tws *tb, int isafter)
1154 if ((tw = tws_parse(ap, isafter)) == NULL)
1163 tws_parse(char *ap, int isafter)
1165 char buffer[BUFSIZ];
1166 struct tws *tw, *ts;
1168 if ((tw = tws_special(ap)) != NULL) {
1169 tw->tw_sec = tw->tw_min = isafter ? 59 : 0;
1170 tw->tw_hour = isafter ? 23 : 0;
1173 if ((tw = dparsetime(ap)) != NULL)
1176 if ((ts = dlocaltimenow()) == NULL)
1179 snprintf(buffer, sizeof(buffer), "%s %s", ap, dtwszone(ts));
1180 if ((tw = dparsetime(buffer)) != NULL)
1183 snprintf(buffer, sizeof(buffer), "%s %02d:%02d:%02d %s", ap,
1184 ts->tw_hour, ts->tw_min, ts->tw_sec, dtwszone(ts));
1185 if ((tw = dparsetime(buffer)) != NULL)
1188 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s",
1189 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year, ap);
1190 if ((tw = dparsetime(buffer)) != NULL)
1193 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s %s",
1194 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year,
1196 if ((tw = dparsetime(buffer)) != NULL)
1204 tws_special(char *ap)
1211 if (!mh_strcasecmp(ap, "today"))
1212 return dlocaltime(&clock);
1213 if (!mh_strcasecmp(ap, "yesterday")) {
1214 clock -= (long) (60 * 60 * 24);
1215 return dlocaltime(&clock);
1217 if (!mh_strcasecmp(ap, "tomorrow")) {
1218 clock += (long) (60 * 60 * 24);
1219 return dlocaltime(&clock);
1222 for (i = 0; tw_ldotw[i]; i++)
1223 if (!mh_strcasecmp(ap, tw_ldotw[i]))
1226 if ((tw = dlocaltime(&clock)) == NULL)
1228 if ((i -= tw->tw_wday) > 0)
1234 else /* -ddd days ago */
1235 i = atoi(ap); /* we should error check this */
1237 clock += (long) ((60 * 60 * 24) * i);
1238 return dlocaltime(&clock);
1247 struct field f = {{0}};
1251 fseek(fp, start, SEEK_SET);
1252 for (state = FLD2, bp = NULL;;) {
1253 switch (state = m_getfld2(state, &f, fp)) {
1262 bp = mh_xstrdup(f.value);
1263 if (mh_strcasecmp(f.name, n->n_datef)==0) {
1269 advise(NULL, "format error in message %d", msgnum);
1274 adios(EX_IOERR, "m_getfld2", "io error on message %d", msgnum);
1283 adios(EX_SOFTWARE, NULL, "internal error -- you lose");
1288 if ((tw = dparsetime(bp)) == NULL)
1289 advise(NULL, "unable to parse %s field in message %d, matching...",
1290 n->n_datef, msgnum), state = 1;
1292 state = n->n_after ? (twsort(tw, &n->n_tws) > 0)
1293 : (twsort(tw, &n->n_tws) < 0);