2 ** pick.c -- search for messages by content
4 ** This code is Copyright (c) 2002, 2008, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
16 #ifdef HAVE_SYS_TIME_H
17 # include <sys/time.h>
21 static struct swit switches[] = {
35 { "date pattern", 0 },
37 { "from pattern", 0 },
39 { "search pattern", 0 },
41 { "subject pattern", 0 },
45 { "-othercomponent pattern", 0 },
51 { "datefield field", 5 }, /* 5 chars required to differ from -date */
53 { "sequence name", 0 },
76 static int pcompile(char **, char *);
77 static int pmatches(FILE *, int, long, long);
80 static int listsw = -1;
85 main(int argc, char **argv)
87 int publicsw = -1, zerosw = 1, vecp = 0;
88 unsigned int seqp = 0;
90 char *maildir, *folder = NULL, buf[100];
91 char *cp, **argp, **arguments;
92 char *seqs[NUMATTRS + 1], *vec[MAXARGS];
93 struct msgs_array msgs = { 0, 0, NULL };
97 if (atexit(putzero_done) != 0) {
98 adios(EX_OSERR, NULL, "atexit failed");
101 setlocale(LC_ALL, "");
102 invo_name = mhbasename(argv[0]);
104 /* read user profile/context */
107 arguments = getarguments(invo_name, argc, argv, 1);
110 while ((cp = *argp++)) {
116 switch (smatch(cp, switches)) {
118 ambigsw(cp, switches);
119 listsw = 0; /* HACK */
122 adios(EX_USAGE, NULL, "-%s unknown", cp);
125 snprintf(buf, sizeof(buf), "%s [+folder] [msgs] [switches]", invo_name);
126 print_help(buf, switches, 1);
127 listsw = 0; /* HACK */
128 exit(argc == 2 ? EX_OK : EX_USAGE);
130 print_version(invo_name);
131 listsw = 0; /* HACK */
132 exit(argc == 2 ? EX_OK : EX_USAGE);
145 if (!(cp = *argp++)) /* allow -xyz arguments */
146 adios(EX_USAGE, NULL, "missing argument to %s",
151 adios(EX_SOFTWARE, NULL, "internal error!");
162 if (!(cp = *argp++) || *cp == '-')
163 adios(EX_USAGE, NULL, "missing argument to %s",
166 /* check if too many sequences specified */
167 if (seqp >= NUMATTRS)
168 adios(EX_USAGE, NULL, "too many sequences (more than %d) specified", NUMATTRS);
196 if (*cp == '+' || *cp == '@') {
198 adios(EX_USAGE, NULL, "only one folder at a time!");
200 folder = getcpy(expandfol(cp));
202 app_msgarg(&msgs, cp);
207 ** If we didn't specify which messages to search,
208 ** then search the whole folder.
211 app_msgarg(&msgs, seq_all);
214 folder = getcurfol();
215 maildir = toabsdir(folder);
217 if (chdir(maildir) == NOTOK)
218 adios(EX_OSERR, maildir, "unable to change directory to");
220 /* read folder and create message structure */
221 if (!(mp = folder_read(folder)))
222 adios(EX_IOERR, NULL, "unable to read folder %s", folder);
224 /* check for empty folder */
226 adios(EX_DATAERR, NULL, "no messages in %s", folder);
228 /* parse all the message ranges/sequences and set SELECTED */
229 for (msgnum = 0; msgnum < msgs.size; msgnum++)
230 if (!m_convert(mp, msgs.msgs[msgnum]))
232 seq_setprev(mp); /* set the previous-sequence */
235 ** If we aren't saving the results to a sequence,
236 ** we default to list the results.
241 if (publicsw == 1 && is_readonly(mp))
242 adios(EX_NOPERM, NULL, "folder %s is read-only, so -public not allowed",
245 if (!pcompile(vec, NULL))
252 ** If printing message numbers to standard out,
253 ** force line buffering on.
256 setvbuf(stdout, NULL, _IOLBF, 0);
259 ** Scan through all the SELECTED messages and check for a
260 ** match. If the message does not match, then unselect it.
262 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
263 if (is_selected(mp, msgnum)) {
264 if ((fp = fopen(cp = m_name(msgnum), "r")) == NULL)
265 admonish(cp, "unable to read message");
266 if (fp && pmatches(fp, msgnum, 0L, 0L)) {
273 printf("%s\n", m_name(msgnum));
275 /* if it doesn't match, then unselect it */
276 unset_selected(mp, msgnum);
288 adios(EX_DATAERR, NULL, "no messages match specification");
293 ** Add the matching messages to sequences
295 for (seqp = 0; seqs[seqp]; seqp++)
296 if (!seq_addsel(mp, seqs[seqp], publicsw, zerosw))
300 ** Print total matched if not printing each matched message number.
303 printf("%d hit%s\n", mp->numsel, mp->numsel == 1 ? "" : "s");
306 context_replace(curfolder, folder); /* update current folder */
307 seq_save(mp); /* synchronize message sequences */
308 context_save(); /* save the context file */
309 folder_free(mp); /* free folder/message structure */
310 listsw = 0; /* HACK */
318 if (listsw && !isatty(fileno(stdout)))
323 static struct swit parswit[] = {
337 { "date pattern", 0 },
339 { "from pattern", 0 },
341 { "search pattern", 0 },
343 { "subject pattern", 0 },
347 { "-othercomponent pattern", 15 },
351 { "before date", 0 },
353 { "datefield field", 5 },
357 /* DEFINITIONS FOR PATTERN MATCHING */
360 ** We really should be using re_comp() and re_exec() here. Unfortunately,
361 ** pick advertises that lowercase characters matches characters of both
362 ** cases. Since re_exec() doesn't exhibit this behavior, we are stuck
363 ** with this version. Furthermore, we need to be able to save and restore
364 ** the state of the pattern matcher in order to do things "efficiently".
366 ** The matching power of this algorithm isn't as powerful as the re_xxx()
367 ** routines (no \(xxx\) and \n constructs). Such is life.
383 static char linebuf[LBSIZE + 1];
385 /* the magic array for case-independence */
387 0000,0001,0002,0003,0004,0005,0006,0007,
388 0010,0011,0012,0013,0014,0015,0016,0017,
389 0020,0021,0022,0023,0024,0025,0026,0027,
390 0030,0031,0032,0033,0034,0035,0036,0037,
391 0040,0041,0042,0043,0044,0045,0046,0047,
392 0050,0051,0052,0053,0054,0055,0056,0057,
393 0060,0061,0062,0063,0064,0065,0066,0067,
394 0070,0071,0072,0073,0074,0075,0076,0077,
395 0100,0141,0142,0143,0144,0145,0146,0147,
396 0150,0151,0152,0153,0154,0155,0156,0157,
397 0160,0161,0162,0163,0164,0165,0166,0167,
398 0170,0171,0172,0133,0134,0135,0136,0137,
399 0140,0141,0142,0143,0144,0145,0146,0147,
400 0150,0151,0152,0153,0154,0155,0156,0157,
401 0160,0161,0162,0163,0164,0165,0166,0167,
402 0170,0171,0172,0173,0174,0175,0176,0177,
404 0200,0201,0202,0203,0204,0205,0206,0207,
405 0210,0211,0212,0213,0214,0215,0216,0217,
406 0220,0221,0222,0223,0224,0225,0226,0227,
407 0230,0231,0232,0233,0234,0235,0236,0237,
408 0240,0241,0242,0243,0244,0245,0246,0247,
409 0250,0251,0252,0253,0254,0255,0256,0257,
410 0260,0261,0262,0263,0264,0265,0266,0267,
411 0270,0271,0272,0273,0274,0275,0276,0277,
412 0300,0301,0302,0303,0304,0305,0306,0307,
413 0310,0311,0312,0313,0314,0315,0316,0317,
414 0320,0321,0322,0323,0324,0325,0326,0327,
415 0330,0331,0332,0333,0334,0335,0336,0337,
416 0340,0341,0342,0343,0344,0345,0346,0347,
417 0350,0351,0352,0353,0354,0355,0356,0357,
418 0360,0361,0362,0363,0364,0365,0366,0367,
419 0370,0371,0372,0373,0374,0375,0376,0377,
423 ** DEFINITIONS FOR NEXUS
426 #define nxtarg() (*argp ? *argp++ : NULL)
427 #define prvarg() argp--
429 #define padvise if (!talked++) advise
435 /* for {OR,AND,NOT}action */
437 struct nexus *un_L_child;
438 struct nexus *un_R_child;
445 char un_expbuf[ESIZE];
458 #define n_L_child un.st1.un_L_child
459 #define n_R_child un.st1.un_R_child
461 #define n_header un.st2.un_header
462 #define n_circf un.st2.un_circf
463 #define n_expbuf un.st2.un_expbuf
464 #define n_patbuf un.st2.un_patbuf
466 #define n_datef un.st3.un_datef
467 #define n_after un.st3.un_after
468 #define n_tws un.st3.un_tws
471 static int pdebug = 0;
476 static struct nexus *head;
479 ** prototypes for date routines
481 static struct tws *tws_parse(char *, int);
482 static struct tws *tws_special(char *);
487 static void PRaction(struct nexus *, int);
488 static int gcompile(struct nexus *, char *);
489 static int advance(char *, char *);
490 static int cclass(unsigned char *, int, int);
491 static int tcompile(char *, struct tws *, int);
493 static struct nexus *parse(void);
494 static struct nexus *nexp1(void);
495 static struct nexus *nexp2(void);
496 static struct nexus *nexp3(void);
497 static struct nexus *newnexus(int (*)());
499 static int ORaction();
500 static int ANDaction();
501 static int NOTaction();
502 static int GREPaction();
503 static int TWSaction();
507 pcompile(char **vec, char *date)
511 if ((cp = getenv("MHPDEBUG")) && *cp)
515 if ((datesw = date) == NULL)
519 if ((head = parse()) == NULL)
520 return (talked ? 0 : 1);
523 padvise(NULL, "%s unexpected", *argp);
531 static struct nexus *
537 if ((n = nexp1()) == NULL || (cp = nxtarg()) == NULL)
541 padvise(NULL, "%s unexpected", cp);
547 switch (smatch(cp, parswit)) {
549 ambigsw(cp, parswit);
553 fprintf(stderr, "-%s unknown\n", cp);
558 o = newnexus(ORaction);
560 if ((o->n_R_child = parse()))
562 padvise(NULL, "missing disjunctive");
572 static struct nexus *
578 if ((n = nexp2()) == NULL || (cp = nxtarg()) == NULL)
582 padvise(NULL, "%s unexpected", cp);
588 switch (smatch(cp, parswit)) {
590 ambigsw(cp, parswit);
594 fprintf(stderr, "-%s unknown\n", cp);
599 o = newnexus(ANDaction);
601 if ((o->n_R_child = nexp1()))
603 padvise(NULL, "missing conjunctive");
614 static struct nexus *
620 if ((cp = nxtarg()) == NULL)
630 switch (smatch(cp, parswit)) {
632 ambigsw(cp, parswit);
636 fprintf(stderr, "-%s unknown\n", cp);
641 n = newnexus(NOTaction);
642 if ((n->n_L_child = nexp3()))
644 padvise(NULL, "missing negation");
654 static struct nexus *
659 char buffer[BUFSIZ], temp[64];
662 if ((cp = nxtarg()) == NULL)
666 padvise(NULL, "%s unexpected", cp);
674 switch (i = smatch(cp, parswit)) {
676 ambigsw(cp, parswit);
680 fprintf(stderr, "-%s unknown\n", cp);
685 if ((n = parse()) == NULL) {
686 padvise(NULL, "missing group");
689 if ((cp = nxtarg()) == NULL) {
690 padvise(NULL, "missing -rbrace");
693 if (*cp++ == '-' && smatch(cp, parswit) == PRRBR)
695 padvise(NULL, "%s unexpected", --cp);
707 strncpy(temp, parswit[i].sw, sizeof(temp));
708 temp[sizeof(temp) - 1] = '\0';
709 dp = *brkstring(temp, " ", NULL);
711 if (!(cp = nxtarg())) { /* allow -xyz arguments */
712 padvise(NULL, "missing argument to %s", argp[-2]);
715 n = newnexus(GREPaction);
717 snprintf(buffer, sizeof(buffer), "^%s[ \t]*:.*%s", dp, cp);
722 n = newnexus(GREPaction);
724 if (!(cp = nxtarg())) { /* allow -xyz arguments */
725 padvise(NULL, "missing argument to %s", argp[-2]);
730 if (!gcompile(n, dp)) {
731 padvise(NULL, "pattern error in %s %s", argp[-2], cp);
734 n->n_patbuf = getcpy(dp);
738 padvise(NULL, "internal error!");
742 if (!(datesw = nxtarg()) || *datesw == '-') {
743 padvise(NULL, "missing argument to %s",
751 if (!(cp = nxtarg())) { /* allow -xyz arguments */
752 padvise(NULL, "missing argument to %s", argp[-2]);
755 n = newnexus(TWSaction);
757 if (!tcompile(cp, &n->n_tws, n->n_after = i == PRAFTR)) {
758 padvise(NULL, "unable to parse %s %s", argp[-2], cp);
766 static struct nexus *
767 newnexus(int (*action)())
771 if ((p = (struct nexus *) mh_xcalloc((size_t) 1, sizeof *p)) == NULL)
772 adios(EX_OSERR, NULL, "unable to allocate component storage");
774 p->n_action = action;
779 #define args(a) a, fp, msgnum, start, stop
780 #define params args(n)
789 pmatches(FILE *fp, int msgnum, long start, long stop)
794 if (!talked++ && pdebug)
797 return (*head->n_action) (args(head));
802 PRaction(struct nexus *n, int level)
806 for (i = 0; i < level; i++)
807 fprintf(stderr, "| ");
809 if (n->n_action == ORaction) {
810 fprintf(stderr, "OR\n");
811 PRaction(n->n_L_child, level + 1);
812 PRaction(n->n_R_child, level + 1);
815 if (n->n_action == ANDaction) {
816 fprintf(stderr, "AND\n");
817 PRaction(n->n_L_child, level + 1);
818 PRaction(n->n_R_child, level + 1);
821 if (n->n_action == NOTaction) {
822 fprintf(stderr, "NOT\n");
823 PRaction(n->n_L_child, level + 1);
826 if (n->n_action == GREPaction) {
827 fprintf(stderr, "PATTERN(%s) %s\n",
828 n->n_header ? "header" : "body", n->n_patbuf);
831 if (n->n_action == TWSaction) {
832 fprintf(stderr, "TEMPORAL(%s) %s: %s\n",
833 n->n_after ? "after" : "before", n->n_datef,
834 dasctime(&n->n_tws));
837 fprintf(stderr, "UNKNOWN(0x%x)\n",
838 (unsigned int)(unsigned long) (*n->n_action));
846 if ((*n->n_L_child->n_action) (args(n->n_L_child)))
848 return (*n->n_R_child->n_action) (args(n->n_R_child));
856 if (!(*n->n_L_child->n_action) (args(n->n_L_child)))
858 return (*n->n_R_child->n_action) (args(n->n_R_child));
866 return (!(*n->n_L_child->n_action) (args(n->n_L_child)));
871 gcompile(struct nexus *n, char *astr)
875 unsigned char *ep, *dp, *sp, *lastep = 0;
877 dp = (ep = n->n_expbuf) + sizeof n->n_expbuf;
888 if ((c = *sp++) != '*')
915 if ((c = *sp++) == '^') {
925 if (c == '-' && *sp != '\0' && *sp != ']') {
926 for (c = ep[-1]+1; c < *sp; c++) {
929 if (c == '\0' || ep >= dp)
935 if (c == '\0' || ep >= dp)
938 } while ((c = *sp++) != ']');
945 if ((c = *sp++) == '\0')
965 char *p1, *p2, *ebp, *cbp;
968 fseek(fp, start, SEEK_SET);
972 if (body && n->n_header)
979 if (fgets(ibuf, sizeof ibuf, fp) == NULL
980 || (stop && pos >= stop)) {
985 pos += (long) strlen(ibuf);
987 ebp = ibuf + strlen(ibuf);
990 if (lf && c != '\n') {
991 if (c != ' ' && c != '\t') {
1010 if (c && p1 < &linebuf[LBSIZE - 1])
1020 if (advance(p1, p2))
1028 if (*p1 == c || cc[(unsigned char)*p1] == c)
1029 if (advance(p1, p2))
1036 if (advance(p1, p2))
1044 advance(char *alp, char *aep)
1046 unsigned char *lp, *ep, *curlp;
1048 lp = (unsigned char *)alp;
1049 ep = (unsigned char *)aep;
1053 if (*ep++ == *lp++ || ep[-1] == cc[lp[-1]])
1071 if (cclass(ep, *lp++, 1)) {
1078 if (cclass(ep, *lp++, 0)) {
1092 while (*lp++ == *ep || cc[lp[-1]] == *ep)
1100 while (cclass(ep, *lp++, ep[-1] == (CCL | STAR)))
1108 if (advance(lp, ep))
1110 } while (lp > curlp);
1114 admonish(NULL, "advance() botch -- you lose big");
1121 cclass(unsigned char *aset, int ac, int af)
1124 unsigned char c, *set;
1132 if (*set++ == c || set[-1] == cc[c])
1140 tcompile(char *ap, struct tws *tb, int isafter)
1144 if ((tw = tws_parse(ap, isafter)) == NULL)
1153 tws_parse(char *ap, int isafter)
1155 char buffer[BUFSIZ];
1156 struct tws *tw, *ts;
1158 if ((tw = tws_special(ap)) != NULL) {
1159 tw->tw_sec = tw->tw_min = isafter ? 59 : 0;
1160 tw->tw_hour = isafter ? 23 : 0;
1163 if ((tw = dparsetime(ap)) != NULL)
1166 if ((ts = dlocaltimenow()) == NULL)
1169 snprintf(buffer, sizeof(buffer), "%s %s", ap, dtwszone(ts));
1170 if ((tw = dparsetime(buffer)) != NULL)
1173 snprintf(buffer, sizeof(buffer), "%s %02d:%02d:%02d %s", ap,
1174 ts->tw_hour, ts->tw_min, ts->tw_sec, dtwszone(ts));
1175 if ((tw = dparsetime(buffer)) != NULL)
1178 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s",
1179 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year, ap);
1180 if ((tw = dparsetime(buffer)) != NULL)
1183 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s %s",
1184 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year,
1186 if ((tw = dparsetime(buffer)) != NULL)
1194 tws_special(char *ap)
1201 if (!mh_strcasecmp(ap, "today"))
1202 return dlocaltime(&clock);
1203 if (!mh_strcasecmp(ap, "yesterday")) {
1204 clock -= (long) (60 * 60 * 24);
1205 return dlocaltime(&clock);
1207 if (!mh_strcasecmp(ap, "tomorrow")) {
1208 clock += (long) (60 * 60 * 24);
1209 return dlocaltime(&clock);
1212 for (i = 0; tw_ldotw[i]; i++)
1213 if (!mh_strcasecmp(ap, tw_ldotw[i]))
1216 if ((tw = dlocaltime(&clock)) == NULL)
1218 if ((i -= tw->tw_wday) > 0)
1224 else /* -ddd days ago */
1225 i = atoi(ap); /* we should error check this */
1227 clock += (long) ((60 * 60 * 24) * i);
1228 return dlocaltime(&clock);
1238 char buf[BUFSIZ], name[NAMESZ];
1241 fseek(fp, start, SEEK_SET);
1242 for (state = FLD, bp = NULL;;) {
1243 switch (state = m_getfld(state, name, buf, sizeof buf, fp)) {
1252 while (state == FLDPLUS) {
1253 state = m_getfld(state, name, buf,
1257 if (!mh_strcasecmp(name, n->n_datef))
1259 if (state != FLDEOF)
1267 if (state == LENERR || state == FMTERR)
1268 advise(NULL, "format error in message %d", msgnum);
1274 adios(EX_SOFTWARE, NULL, "internal error -- you lose");
1279 if ((tw = dparsetime(bp)) == NULL)
1280 advise(NULL, "unable to parse %s field in message %d, matching...",
1281 n->n_datef, msgnum), state = 1;
1283 state = n->n_after ? (twsort(tw, &n->n_tws) > 0)
1284 : (twsort(tw, &n->n_tws) < 0);