2 ** pick.c -- search for messages by content
4 ** This code is Copyright (c) 2002, 2008, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
12 #include <h/scansbr.h>
13 #include <h/fmt_scan.h>
21 #ifdef HAVE_SYS_TIME_H
22 # include <sys/time.h>
26 static struct swit switches[] = {
40 { "date pattern", 0 },
42 { "from pattern", 0 },
44 { "search pattern", 0 },
46 { "subject pattern", 0 },
50 { "-othercomponent pattern", 0 },
56 { "datefield field", 5 }, /* 5 chars required to differ from -date */
58 { "sequence name", 0 },
72 { "format format", 0 },
74 { "width columns", 0 },
86 char *version=VERSION;
124 static struct nexus *head;
125 static boolean body = FALSE;
130 static int pcompile(char **, char *);
131 static int pmatches(FILE *, int);
132 static boolean nexus_match(struct field *, int, struct nexus *);
133 static void nexus_free(struct nexus **);
134 static void nexus_clear(struct nexus *);
135 static void nexus_debug(struct nexus *, size_t);
136 static void nexus_debug_grep(struct grep_data *);
137 static void print_debug_level(size_t);
138 static struct nexus * createonethread(char *);
139 static struct nexus * createpickthread(char *);
140 static void scan_mbox(char *, char *, int);
143 static int listsw = -1;
147 static void printmsg(FILE *, struct msgs *, int, char *, int);
150 main(int argc, char **argv)
152 int publicsw = -1, zerosw = 1, vecp = 0, width = 0;
153 unsigned int seqp = 0;
155 char *maildir, *folder = NULL, buf[100];
156 char *cp, **argp, **arguments;
157 char *seqs[NUMATTRS + 1], *vec[MAXARGS];
158 struct msgs_array msgs = { 0, 0, NULL };
165 if (atexit(putzero_done) != 0) {
166 adios(EX_OSERR, NULL, "atexit failed");
169 setlocale(LC_ALL, "");
170 invo_name = mhbasename(argv[0]);
172 /* read user profile/context */
175 arguments = getarguments(invo_name, argc, argv, 1);
178 if (strcmp(invo_name, "scan")==0) {
182 while ((cp = *argp++)) {
188 switch (smatch(cp, switches)) {
190 ambigsw(cp, switches);
191 listsw = 0; /* HACK */
194 adios(EX_USAGE, NULL, "-%s unknown", cp);
197 snprintf(buf, sizeof(buf), "%s [+folder] [msgs] [switches]", invo_name);
198 print_help(buf, switches, 1);
199 listsw = 0; /* HACK */
200 exit(argc == 2 ? EX_OK : EX_USAGE);
202 print_version(invo_name);
203 listsw = 0; /* HACK */
204 exit(argc == 2 ? EX_OK : EX_USAGE);
218 if (!(cp = *argp++)) /* allow -xyz arguments */
219 adios(EX_USAGE, NULL, "missing argument to %s",
224 adios(EX_SOFTWARE, NULL, "internal error!");
235 if (!(cp = *argp++) || *cp == '-')
236 adios(EX_USAGE, NULL, "missing argument to %s",
239 /* check if too many sequences specified */
240 if (seqp >= NUMATTRS)
241 adios(EX_USAGE, NULL, "too many sequences (more than %d) specified", NUMATTRS);
268 if (!(form = *argp++) || *form == '-') {
269 adios(EX_USAGE, NULL, "missing argument to %s", argp[-2]);
273 if (!(cp = *argp++) || *cp == '-') {
274 adios(EX_USAGE, NULL, "missing argument to %s",
280 if (!(cp = *argp++) || (cp[0] == '-' && cp[1])) {
281 adios(EX_USAGE, NULL, "missing argument to %s",
284 if (strcmp(file = cp, "-")!=0) {
285 file = mh_xstrdup(expanddir(cp));
290 if (*cp == '+' || *cp == '@') {
292 adios(EX_USAGE, NULL, "only one folder at a time!");
294 folder = mh_xstrdup(expandfol(cp));
296 app_msgarg(&msgs, cp);
300 fmtstr = new_fs(form, "pick.default");
304 adios(EX_USAGE, NULL, "\"+folder\" not allowed with -file");
307 adios(EX_USAGE, NULL, "\"msgs\" not allowed with -file");
310 adios(EX_USAGE, NULL, "section arguments not allowed with -file");
313 scan_mbox(file, fmtstr, width);
318 ** If we didn't specify which messages to search,
319 ** then search the whole folder.
322 app_msgarg(&msgs, seq_all);
325 folder = getcurfol();
326 maildir = toabsdir(folder);
328 if (chdir(maildir) == NOTOK)
329 adios(EX_OSERR, maildir, "unable to change directory to");
331 /* read folder and create message structure */
332 if (!(mp = folder_read(folder)))
333 adios(EX_IOERR, NULL, "unable to read folder %s", folder);
335 /* check for empty folder */
337 adios(EX_DATAERR, NULL, "no messages in %s", folder);
339 /* parse all the message ranges/sequences and set SELECTED */
340 for (msgnum = 0; msgnum < msgs.size; msgnum++)
341 if (!m_convert(mp, msgs.msgs[msgnum]))
343 seq_setprev(mp); /* set the previous-sequence */
346 ** If we aren't saving the results to a sequence,
347 ** we default to list the results.
352 if (publicsw == 1 && is_readonly(mp))
353 adios(EX_NOPERM, NULL, "folder %s is read-only, so -public not allowed",
356 if (!pcompile(vec, NULL))
363 ** If printing message numbers to standard out,
364 ** force line buffering on.
367 setvbuf(stdout, NULL, _IOLBF, 0);
370 ** Scan through all the SELECTED messages and check for a
371 ** match. If the message does not match, then unselect it.
373 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
374 if (is_selected(mp, msgnum)) {
375 if ((fp = fopen(cp = m_name(msgnum), "r")) == NULL)
376 admonish(cp, "unable to read message");
377 if (fp && pmatches(fp, msgnum)) {
384 printmsg(fp, mp, msgnum, fmtstr, width);
387 /* if it doesn't match, then unselect it */
388 unset_selected(mp, msgnum);
401 adios(EX_DATAERR, NULL, "no messages match specification");
406 ** Add the matching messages to sequences
408 for (seqp = 0; seqs[seqp]; seqp++)
409 if (!seq_addsel(mp, seqs[seqp], publicsw, zerosw))
413 ** Print total matched if not printing each matched message number.
416 printf("%d hit%s\n", mp->numsel, mp->numsel == 1 ? "" : "s");
419 context_replace(curfolder, folder); /* update current folder */
420 seq_save(mp); /* synchronize message sequences */
421 context_save(); /* save the context file */
422 folder_free(mp); /* free folder/message structure */
423 listsw = 0; /* HACK */
428 scan_mbox(char *file, char *fmtstr, int width)
434 if (strcmp(file, "-") == 0) {
437 } else if (!(in = fopen(file, "r"))) {
438 adios(EX_IOERR, file, "unable to open");
441 for (msgnum = 1; ;msgnum++) {
442 state = scan(in, msgnum, SCN_MBOX, fmtstr, width, 0, 0);
443 if (state != SCNMSG) {
453 if (listsw && !isatty(fileno(stdout)))
458 printmsg(FILE *f, struct msgs *mp, int msgnum, char *fmtstr, int width)
462 boolean unseen = FALSE;
464 fseek(f, 0L, SEEK_SET);
466 seqnum = seq_getnum(mp, seq_unseen);
467 unseen = in_sequence(mp, seqnum, msgnum);
469 switch (state = scan(f, msgnum, SCN_FOLD, fmtstr,
470 width, msgnum==mp->curmsg, unseen)) {
475 advise(NULL, "message %d: empty", msgnum);
478 adios(EX_SOFTWARE, NULL, "scan() botch(%d)", state);
482 static struct swit parswit[] = {
496 { "date pattern", 0 },
498 { "from pattern", 0 },
500 { "search pattern", 0 },
502 { "subject pattern", 0 },
506 { "-othercomponent pattern", 15 },
510 { "before date", 0 },
512 { "datefield field", 5 },
518 /* DEFINITIONS FOR PATTERN MATCHING */
521 ** We really should be using re_comp() and re_exec() here. Unfortunately,
522 ** pick advertises that lowercase characters matches characters of both
523 ** cases. Since re_exec() doesn't exhibit this behavior, we are stuck
524 ** with this version. Furthermore, we need to be able to save and restore
525 ** the state of the pattern matcher in order to do things "efficiently".
527 ** The matching power of this algorithm isn't as powerful as the re_xxx()
528 ** routines (no \(xxx\) and \n constructs). Such is life.
544 ** DEFINITIONS FOR NEXUS
547 #define nxtarg() (*argp ? *argp++ : NULL)
548 #define prvarg() argp--
550 #define padvise if (!talked++) advise
553 static int pdebug = 0;
559 ** prototypes for date routines
561 static struct tws *tws_parse(char *, int);
562 static struct tws *tws_special(char *);
567 static int gcompile(struct grep_data *, const char *);
568 static int tcompile(char *, struct tws *, int);
570 static struct nexus *parse(void);
571 static struct nexus *nexp1(void);
572 static struct nexus *nexp2(void);
573 static struct nexus *nexp3(void);
574 static struct nexus *newnexus(enum nexus_type);
577 pcompile(char **vec, char *date)
581 if ((cp = getenv("MHPDEBUG")) && *cp)
585 if ((datesw = date) == NULL)
589 if ((head = parse()) == NULL)
590 return (talked ? 0 : 1);
593 padvise(NULL, "%s unexpected", *argp);
601 static struct nexus *
606 struct bin_data *bin;
608 if ((n = nexp1()) == NULL || (cp = nxtarg()) == NULL)
612 padvise(NULL, "%s unexpected", cp);
618 switch (smatch(cp, parswit)) {
620 ambigsw(cp, parswit);
624 fprintf(stderr, "-%s unknown\n", cp);
632 if ((bin->right = parse()))
634 padvise(NULL, "missing disjunctive");
644 static struct nexus *
649 struct bin_data *bin;
651 if ((n = nexp2()) == NULL || (cp = nxtarg()) == NULL)
655 padvise(NULL, "%s unexpected", cp);
661 switch (smatch(cp, parswit)) {
663 ambigsw(cp, parswit);
667 fprintf(stderr, "-%s unknown\n", cp);
675 if ((bin->right = nexp1()))
677 padvise(NULL, "missing conjunctive");
688 static struct nexus *
693 struct bin_data *bin;
695 if ((cp = nxtarg()) == NULL)
705 switch (smatch(cp, parswit)) {
707 ambigsw(cp, parswit);
711 fprintf(stderr, "-%s unknown\n", cp);
718 if ((bin->left = nexp3()))
720 padvise(NULL, "missing negation");
730 static struct nexus *
735 char buffer[BUFSIZ], temp[64];
737 struct grep_data *gdata;
738 struct date_data *twsd;
740 if ((cp = nxtarg()) == NULL)
744 padvise(NULL, "%s unexpected", cp);
752 switch (i = smatch(cp, parswit)) {
754 ambigsw(cp, parswit);
758 fprintf(stderr, "-%s unknown\n", cp);
763 if ((n = parse()) == NULL) {
764 padvise(NULL, "missing group");
767 if ((cp = nxtarg()) == NULL) {
768 padvise(NULL, "missing -rbrace");
771 if (*cp++ == '-' && smatch(cp, parswit) == PRRBR)
773 padvise(NULL, "%s unexpected", --cp);
781 if (!(cp = nxtarg())) { /* allow -xyz arguments */
782 padvise(NULL, "missing argument to %s", argp[-2]);
784 return createpickthread(cp);
790 strncpy(temp, parswit[i].sw, sizeof(temp));
791 temp[sizeof(temp) - 1] = '\0';
792 dp = *brkstring(temp, " ", NULL);
794 if (!(cp = nxtarg())) { /* allow -xyz arguments */
795 padvise(NULL, "missing argument to %s", argp[-2]);
798 n = newnexus(grep_t);
800 gdata->header = mh_xstrdup(dp);
801 snprintf(buffer, sizeof(buffer), "%s", cp);
806 n = newnexus(grep_t);
808 gdata->header = NULL;
810 if (!(cp = nxtarg())) { /* allow -xyz arguments */
811 padvise(NULL, "missing argument to %s", argp[-2]);
816 if (!gcompile(gdata, dp)) {
817 padvise("regcomp", "pattern error in %s %s", argp[-2], cp);
823 padvise(NULL, "internal error!");
827 if (!(datesw = nxtarg()) || *datesw == '-') {
828 padvise(NULL, "missing argument to %s",
836 if (!(cp = nxtarg())) { /* allow -xyz arguments */
837 padvise(NULL, "missing argument to %s", argp[-2]);
840 n = newnexus(date_t);
842 twsd->datef = datesw;
843 if (!tcompile(cp, &twsd->tws, twsd->after = i == PRAFTR)) {
844 padvise(NULL, "unable to parse %s %s", argp[-2], cp);
852 static struct nexus *
853 newnexus(enum nexus_type t)
855 struct nexus *p = NULL;
856 p = mh_xcalloc(1, sizeof(struct nexus));
862 static void nexus_clear(struct nexus *n)
868 nexus_clear(n->data.b.right);
871 nexus_clear(n->data.b.left);
879 pmatches(FILE *fp, int msgnum)
881 struct field f = {{0}};
889 if (!talked++ && pdebug) {
890 nexus_debug(head, 0);
893 while (s == FLD2 || s == BODY2) {
894 switch (s = m_getfld2(s, &f, fp)) {
899 nexus_match(&f, msgnum, head);
905 nexus_match(&f, msgnum, head);
908 advise(NULL, "IOERR in message %d\n", msgnum);
913 adios(EX_SOFTWARE, "m_getfld2", "returned unknown state %d at message %d", s, msgnum);
920 match_grep(struct field *f, struct grep_data *g)
925 if (!g->header && *f->name) {
930 ret = regexec(g->preg, f->value, 0, NULL, 0);
934 /* check for the right field */
935 if (!(g->header && *g->header && mh_strcasecmp(g->header, f->name)==0)) {
939 if (decode_rfc2047(f->value, buf, sizeof(buf))) {
940 ret = regexec(g->preg, buf, 0, NULL, 0);
942 ret = regexec(g->preg, f->value, 0, NULL, 0);
951 regerror(ret, g->preg, buf, sizeof(buf));
952 fprintf(stderr, "%s\n", buf);
958 match_date(struct field *f, int msgnum, struct date_data *dd)
964 if (mh_strcasecmp(f->name, dd->datef)!=0) {
967 bp = mh_xstrdup(f->value);
968 if ((tw = dparsetime(bp)) == NULL) {
969 advise(NULL, "unable to parse %s field in message %d, not matching...", dd->datef, msgnum);
970 } else if (dd->after) {
971 ret = twsort(tw, &dd->tws) > 0;
973 ret = twsort(tw, &dd->tws) < 0;
981 nexus_match(struct field *f, int msgnum, struct nexus *n)
985 n->match = nexus_match(f, msgnum, n->data.b.left);
986 n->match = nexus_match(f, msgnum, n->data.b.right) && n->match;
989 n->match = nexus_match(f, msgnum, n->data.b.left);
990 n->match = nexus_match(f, msgnum, n->data.b.right) || n->match;
993 n->match = !nexus_match(f, msgnum, n->data.b.left);
999 n->match = match_date(f, msgnum, &n->data.d);
1005 n->match = match_grep(f, &n->data.g);
1008 adios(EX_SOFTWARE, NULL, "nexus tree contains a unknown nexus_type (%d)", n->t);
1014 nexus_debug(struct nexus *n, size_t level)
1016 struct date_data *dd;
1017 print_debug_level(level);
1020 fputs("AND\n", stderr);
1021 nexus_debug(n->data.b.left, level+1);
1022 nexus_debug(n->data.b.right, level+1);
1025 fputs("OR\n", stderr);
1026 nexus_debug(n->data.b.left, level+1);
1027 nexus_debug(n->data.b.right, level+1);
1030 fputs("NOT\n", stderr);
1031 nexus_debug(n->data.b.left, level+1);
1034 nexus_debug_grep(&n->data.g);
1038 fprintf(stderr, "TEMPORAL(%s) %s: %s\n",dd->after ? "after" : "before", dd->datef, dasctime(&dd->tws));
1041 adios(EX_SOFTWARE, NULL, "nexus tree contains a unknown nexus_type (%d)", n->t);
1046 nexus_debug_grep(struct grep_data *gd)
1048 char *buf, *buf2, *pbuf, *pbuf2;
1050 pbuf = pbuf2 = mh_xstrdup(gd->pattern);
1052 for (;*pbuf2; pbuf2++) {
1053 *pbuf2 = tolower(*pbuf2);
1057 buf = buf2 = mh_xstrdup(gd->header);
1058 for (;*buf2; buf2++) {
1059 *buf2 = tolower(*buf2);
1061 fprintf(stderr, "PETTERN(%s) %s\n", buf, pbuf);
1063 fprintf(stderr, "PETTERN(BODY) %s\n", pbuf);
1070 nexus_free(struct nexus **n)
1078 nexus_free(&(*n)->data.b.right);
1081 nexus_free(&(*n)->data.b.left);
1084 mh_free0(&(*n)->data.g.header);
1085 mh_free0(&(*n)->data.g.pattern);
1086 regfree((*n)->data.g.preg);
1090 advise(NULL, "Unknown nexus_type (%d) to free", (*n)->t);
1096 print_debug_level(size_t level)
1100 for (i = 0; i < level; i++) {
1101 fputs("| ", stderr);
1106 gcompile(struct grep_data *g, const char *astr)
1108 regex_t *preg = mh_xcalloc(1, sizeof(regex_t));
1113 g->pattern = mh_xstrdup(astr);
1114 ret = regcomp(preg, astr, REG_ICASE | REG_NOSUB);
1116 buf = mh_xcalloc(BUFSIZ, sizeof(char));
1117 regerror(ret, g->preg, buf, BUFSIZ*sizeof(char));
1118 fprintf(stderr, "%s\n", buf);
1126 tcompile(char *ap, struct tws *tb, int isafter)
1130 if ((tw = tws_parse(ap, isafter)) == NULL)
1139 tws_parse(char *ap, int isafter)
1141 char buffer[BUFSIZ];
1142 struct tws *tw, *ts;
1144 if ((tw = tws_special(ap)) != NULL) {
1145 tw->tw_sec = tw->tw_min = isafter ? 59 : 0;
1146 tw->tw_hour = isafter ? 23 : 0;
1149 if ((tw = dparsetime(ap)) != NULL)
1152 if ((ts = dlocaltimenow()) == NULL)
1155 snprintf(buffer, sizeof(buffer), "%s %s", ap, dtwszone(ts));
1156 if ((tw = dparsetime(buffer)) != NULL)
1159 snprintf(buffer, sizeof(buffer), "%s %02d:%02d:%02d %s", ap,
1160 ts->tw_hour, ts->tw_min, ts->tw_sec, dtwszone(ts));
1161 if ((tw = dparsetime(buffer)) != NULL)
1164 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s",
1165 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year, ap);
1166 if ((tw = dparsetime(buffer)) != NULL)
1169 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s %s",
1170 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year,
1172 if ((tw = dparsetime(buffer)) != NULL)
1180 tws_special(char *ap)
1187 if (!mh_strcasecmp(ap, "today"))
1188 return dlocaltime(&clock);
1189 if (!mh_strcasecmp(ap, "yesterday")) {
1190 clock -= (long) (60 * 60 * 24);
1191 return dlocaltime(&clock);
1193 if (!mh_strcasecmp(ap, "tomorrow")) {
1194 clock += (long) (60 * 60 * 24);
1195 return dlocaltime(&clock);
1198 for (i = 0; tw_ldotw[i]; i++)
1199 if (!mh_strcasecmp(ap, tw_ldotw[i]))
1202 if ((tw = dlocaltime(&clock)) == NULL)
1204 if ((i -= tw->tw_wday) > 0)
1210 else /* -ddd days ago */
1211 i = atoi(ap); /* we should error check this */
1213 clock += (long) ((60 * 60 * 24) * i);
1214 return dlocaltime(&clock);
1218 static struct nexus *
1219 createpickthread(char *msgs)
1221 char *folder = NULL;
1222 struct msgs_array msgarray = {0};
1223 struct msgs_array files = {0};
1224 struct nexus *ret = NULL;
1227 struct bin_data *bd;
1229 char **cp = brkstring(msgs, " \t", NULL);
1232 for (; cp && *cp; cp++) {
1240 folder = mh_xstrdup(*cp);
1243 app_msgarg(&msgarray, mh_xstrdup(*cp));
1247 parse_msgs(&msgarray, folder, &files);
1249 for (i = 0; i < files.size; i++) {
1250 buf = getthreadid(files.msgs[i]);
1252 adios(EX_DATAERR, NULL, "message %s is not part of a thread", basename(files.msgs[i]));
1256 c = createonethread(buf);
1264 or = newnexus(or_t);
1271 mh_free0(&(files.msgs));
1272 mh_free0(&(msgarray.msgs));
1277 static struct nexus *
1278 createonethread(char *c)
1280 struct nexus *ret = newnexus(or_t);
1281 struct nexus *left = newnexus(grep_t);
1282 struct nexus *right = newnexus(grep_t);
1285 ret->data.b.left = left;
1286 ret->data.b.right = right;
1287 left->data.g.header = mh_xstrdup("message-id");
1290 snprintf(buf, sizeof(buf), "^[ \t]*<%s>", c);
1291 if(!gcompile(&left->data.g, buf)) {
1292 padvise(NULL, "pattern error %s", c);
1296 right->data.g.header = mh_xstrdup("references");
1298 snprintf(buf, sizeof(buf), "^[ \t]*<%s>", c);
1299 if(!gcompile(&right->data.g, buf)) {
1300 padvise(NULL, "pattern error in %s", c);