2 ** pick.c -- search for messages by content
4 ** This code is Copyright (c) 2002, 2008, by the authors of nmh. See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
13 #ifdef HAVE_SYS_TIME_H
14 # include <sys/time.h>
18 static struct swit switches[] = {
32 { "date pattern", 0 },
34 { "from pattern", 0 },
36 { "search pattern", 0 },
38 { "subject pattern", 0 },
42 { "-othercomponent pattern", 0 },
48 { "datefield field", 5 }, /* 5 chars required to differ from -date */
50 { "sequence name", 0 },
73 static int pcompile(char **, char *);
74 static int pmatches(FILE *, int, long, long);
77 static int listsw = -1;
82 main(int argc, char **argv)
84 int publicsw = -1, zerosw = 1, vecp = 0;
85 unsigned int seqp = 0;
87 char *maildir, *folder = NULL, buf[100];
88 char *cp, **argp, **arguments;
89 char *seqs[NUMATTRS + 1], *vec[MAXARGS];
90 struct msgs_array msgs = { 0, 0, NULL };
94 if (atexit(putzero_done) != 0) {
95 adios(NULL, "atexit failed");
98 setlocale(LC_ALL, "");
99 invo_name = mhbasename(argv[0]);
101 /* read user profile/context */
104 arguments = getarguments(invo_name, argc, argv, 1);
107 while ((cp = *argp++)) {
113 switch (smatch(cp, switches)) {
115 ambigsw(cp, switches);
116 listsw = 0; /* HACK */
119 adios(NULL, "-%s unknown", cp);
122 snprintf(buf, sizeof(buf), "%s [+folder] [msgs] [switches]", invo_name);
123 print_help(buf, switches, 1);
124 listsw = 0; /* HACK */
127 print_version(invo_name);
128 listsw = 0; /* HACK */
142 if (!(cp = *argp++)) /* allow -xyz arguments */
143 adios(NULL, "missing argument to %s",
148 adios(NULL, "internal error!");
159 if (!(cp = *argp++) || *cp == '-')
160 adios(NULL, "missing argument to %s",
163 /* check if too many sequences specified */
164 if (seqp >= NUMATTRS)
165 adios(NULL, "too many sequences (more than %d) specified", NUMATTRS);
193 if (*cp == '+' || *cp == '@') {
195 adios(NULL, "only one folder at a time!");
197 folder = getcpy(expandfol(cp));
199 app_msgarg(&msgs, cp);
204 ** If we didn't specify which messages to search,
205 ** then search the whole folder.
208 app_msgarg(&msgs, seq_all);
211 folder = getcurfol();
212 maildir = toabsdir(folder);
214 if (chdir(maildir) == NOTOK)
215 adios(maildir, "unable to change directory to");
217 /* read folder and create message structure */
218 if (!(mp = folder_read(folder)))
219 adios(NULL, "unable to read folder %s", folder);
221 /* check for empty folder */
223 adios(NULL, "no messages in %s", folder);
225 /* parse all the message ranges/sequences and set SELECTED */
226 for (msgnum = 0; msgnum < msgs.size; msgnum++)
227 if (!m_convert(mp, msgs.msgs[msgnum]))
229 seq_setprev(mp); /* set the previous-sequence */
232 ** If we aren't saving the results to a sequence,
233 ** we default to list the results.
238 if (publicsw == 1 && is_readonly(mp))
239 adios(NULL, "folder %s is read-only, so -public not allowed",
242 if (!pcompile(vec, NULL))
249 ** If printing message numbers to standard out,
250 ** force line buffering on.
253 setvbuf(stdout, NULL, _IOLBF, 0);
256 ** Scan through all the SELECTED messages and check for a
257 ** match. If the message does not match, then unselect it.
259 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
260 if (is_selected(mp, msgnum)) {
261 if ((fp = fopen(cp = m_name(msgnum), "r")) == NULL)
262 admonish(cp, "unable to read message");
263 if (fp && pmatches(fp, msgnum, 0L, 0L)) {
270 printf("%s\n", m_name(msgnum));
272 /* if it doesn't match, then unselect it */
273 unset_selected(mp, msgnum);
285 adios(NULL, "no messages match specification");
290 ** Add the matching messages to sequences
292 for (seqp = 0; seqs[seqp]; seqp++)
293 if (!seq_addsel(mp, seqs[seqp], publicsw, zerosw))
297 ** Print total matched if not printing each matched message number.
300 printf("%d hit%s\n", mp->numsel, mp->numsel == 1 ? "" : "s");
303 context_replace(curfolder, folder); /* update current folder */
304 seq_save(mp); /* synchronize message sequences */
305 context_save(); /* save the context file */
306 folder_free(mp); /* free folder/message structure */
307 listsw = 0; /* HACK */
315 if (listsw && !isatty(fileno(stdout)))
320 static struct swit parswit[] = {
334 { "date pattern", 0 },
336 { "from pattern", 0 },
338 { "search pattern", 0 },
340 { "subject pattern", 0 },
344 { "-othercomponent pattern", 15 },
348 { "before date", 0 },
350 { "datefield field", 5 },
354 /* DEFINITIONS FOR PATTERN MATCHING */
357 ** We really should be using re_comp() and re_exec() here. Unfortunately,
358 ** pick advertises that lowercase characters matches characters of both
359 ** cases. Since re_exec() doesn't exhibit this behavior, we are stuck
360 ** with this version. Furthermore, we need to be able to save and restore
361 ** the state of the pattern matcher in order to do things "efficiently".
363 ** The matching power of this algorithm isn't as powerful as the re_xxx()
364 ** routines (no \(xxx\) and \n constructs). Such is life.
380 static char linebuf[LBSIZE + 1];
382 /* the magic array for case-independence */
384 0000,0001,0002,0003,0004,0005,0006,0007,
385 0010,0011,0012,0013,0014,0015,0016,0017,
386 0020,0021,0022,0023,0024,0025,0026,0027,
387 0030,0031,0032,0033,0034,0035,0036,0037,
388 0040,0041,0042,0043,0044,0045,0046,0047,
389 0050,0051,0052,0053,0054,0055,0056,0057,
390 0060,0061,0062,0063,0064,0065,0066,0067,
391 0070,0071,0072,0073,0074,0075,0076,0077,
392 0100,0141,0142,0143,0144,0145,0146,0147,
393 0150,0151,0152,0153,0154,0155,0156,0157,
394 0160,0161,0162,0163,0164,0165,0166,0167,
395 0170,0171,0172,0133,0134,0135,0136,0137,
396 0140,0141,0142,0143,0144,0145,0146,0147,
397 0150,0151,0152,0153,0154,0155,0156,0157,
398 0160,0161,0162,0163,0164,0165,0166,0167,
399 0170,0171,0172,0173,0174,0175,0176,0177,
401 0200,0201,0202,0203,0204,0205,0206,0207,
402 0210,0211,0212,0213,0214,0215,0216,0217,
403 0220,0221,0222,0223,0224,0225,0226,0227,
404 0230,0231,0232,0233,0234,0235,0236,0237,
405 0240,0241,0242,0243,0244,0245,0246,0247,
406 0250,0251,0252,0253,0254,0255,0256,0257,
407 0260,0261,0262,0263,0264,0265,0266,0267,
408 0270,0271,0272,0273,0274,0275,0276,0277,
409 0300,0301,0302,0303,0304,0305,0306,0307,
410 0310,0311,0312,0313,0314,0315,0316,0317,
411 0320,0321,0322,0323,0324,0325,0326,0327,
412 0330,0331,0332,0333,0334,0335,0336,0337,
413 0340,0341,0342,0343,0344,0345,0346,0347,
414 0350,0351,0352,0353,0354,0355,0356,0357,
415 0360,0361,0362,0363,0364,0365,0366,0367,
416 0370,0371,0372,0373,0374,0375,0376,0377,
420 ** DEFINITIONS FOR NEXUS
423 #define nxtarg() (*argp ? *argp++ : NULL)
424 #define prvarg() argp--
426 #define padvise if (!talked++) advise
432 /* for {OR,AND,NOT}action */
434 struct nexus *un_L_child;
435 struct nexus *un_R_child;
442 char un_expbuf[ESIZE];
455 #define n_L_child un.st1.un_L_child
456 #define n_R_child un.st1.un_R_child
458 #define n_header un.st2.un_header
459 #define n_circf un.st2.un_circf
460 #define n_expbuf un.st2.un_expbuf
461 #define n_patbuf un.st2.un_patbuf
463 #define n_datef un.st3.un_datef
464 #define n_after un.st3.un_after
465 #define n_tws un.st3.un_tws
468 static int pdebug = 0;
473 static struct nexus *head;
476 ** prototypes for date routines
478 static struct tws *tws_parse(char *, int);
479 static struct tws *tws_special(char *);
484 static void PRaction(struct nexus *, int);
485 static int gcompile(struct nexus *, char *);
486 static int advance(char *, char *);
487 static int cclass(unsigned char *, int, int);
488 static int tcompile(char *, struct tws *, int);
490 static struct nexus *parse(void);
491 static struct nexus *nexp1(void);
492 static struct nexus *nexp2(void);
493 static struct nexus *nexp3(void);
494 static struct nexus *newnexus(int (*)());
496 static int ORaction();
497 static int ANDaction();
498 static int NOTaction();
499 static int GREPaction();
500 static int TWSaction();
504 pcompile(char **vec, char *date)
508 if ((cp = getenv("MHPDEBUG")) && *cp)
512 if ((datesw = date) == NULL)
516 if ((head = parse()) == NULL)
517 return (talked ? 0 : 1);
520 padvise(NULL, "%s unexpected", *argp);
528 static struct nexus *
532 register struct nexus *n, *o;
534 if ((n = nexp1()) == NULL || (cp = nxtarg()) == NULL)
538 padvise(NULL, "%s unexpected", cp);
544 switch (smatch(cp, parswit)) {
546 ambigsw(cp, parswit);
550 fprintf(stderr, "-%s unknown\n", cp);
555 o = newnexus(ORaction);
557 if ((o->n_R_child = parse()))
559 padvise(NULL, "missing disjunctive");
569 static struct nexus *
573 register struct nexus *n, *o;
575 if ((n = nexp2()) == NULL || (cp = nxtarg()) == NULL)
579 padvise(NULL, "%s unexpected", cp);
585 switch (smatch(cp, parswit)) {
587 ambigsw(cp, parswit);
591 fprintf(stderr, "-%s unknown\n", cp);
596 o = newnexus(ANDaction);
598 if ((o->n_R_child = nexp1()))
600 padvise(NULL, "missing conjunctive");
611 static struct nexus *
615 register struct nexus *n;
617 if ((cp = nxtarg()) == NULL)
627 switch (smatch(cp, parswit)) {
629 ambigsw(cp, parswit);
633 fprintf(stderr, "-%s unknown\n", cp);
638 n = newnexus(NOTaction);
639 if ((n->n_L_child = nexp3()))
641 padvise(NULL, "missing negation");
651 static struct nexus *
655 register char *cp, *dp;
656 char buffer[BUFSIZ], temp[64];
657 register struct nexus *n;
659 if ((cp = nxtarg()) == NULL)
663 padvise(NULL, "%s unexpected", cp);
671 switch (i = smatch(cp, parswit)) {
673 ambigsw(cp, parswit);
677 fprintf(stderr, "-%s unknown\n", cp);
682 if ((n = parse()) == NULL) {
683 padvise(NULL, "missing group");
686 if ((cp = nxtarg()) == NULL) {
687 padvise(NULL, "missing -rbrace");
690 if (*cp++ == '-' && smatch(cp, parswit) == PRRBR)
692 padvise(NULL, "%s unexpected", --cp);
704 strncpy(temp, parswit[i].sw, sizeof(temp));
705 temp[sizeof(temp) - 1] = '\0';
706 dp = *brkstring(temp, " ", NULL);
708 if (!(cp = nxtarg())) { /* allow -xyz arguments */
709 padvise(NULL, "missing argument to %s", argp[-2]);
712 n = newnexus(GREPaction);
714 snprintf(buffer, sizeof(buffer), "^%s[ \t]*:.*%s", dp, cp);
719 n = newnexus(GREPaction);
721 if (!(cp = nxtarg())) { /* allow -xyz arguments */
722 padvise(NULL, "missing argument to %s", argp[-2]);
727 if (!gcompile(n, dp)) {
728 padvise(NULL, "pattern error in %s %s", argp[-2], cp);
731 n->n_patbuf = getcpy(dp);
735 padvise(NULL, "internal error!");
739 if (!(datesw = nxtarg()) || *datesw == '-') {
740 padvise(NULL, "missing argument to %s",
748 if (!(cp = nxtarg())) { /* allow -xyz arguments */
749 padvise(NULL, "missing argument to %s", argp[-2]);
752 n = newnexus(TWSaction);
754 if (!tcompile(cp, &n->n_tws, n->n_after = i == PRAFTR)) {
755 padvise(NULL, "unable to parse %s %s", argp[-2], cp);
763 static struct nexus *
764 newnexus(int (*action)())
766 register struct nexus *p;
768 if ((p = (struct nexus *) calloc((size_t) 1, sizeof *p)) == NULL)
769 adios(NULL, "unable to allocate component storage");
771 p->n_action = action;
776 #define args(a) a, fp, msgnum, start, stop
777 #define params args(n)
779 register struct nexus *n; \
786 pmatches(FILE *fp, int msgnum, long start, long stop)
791 if (!talked++ && pdebug)
794 return (*head->n_action) (args(head));
799 PRaction(struct nexus *n, int level)
803 for (i = 0; i < level; i++)
804 fprintf(stderr, "| ");
806 if (n->n_action == ORaction) {
807 fprintf(stderr, "OR\n");
808 PRaction(n->n_L_child, level + 1);
809 PRaction(n->n_R_child, level + 1);
812 if (n->n_action == ANDaction) {
813 fprintf(stderr, "AND\n");
814 PRaction(n->n_L_child, level + 1);
815 PRaction(n->n_R_child, level + 1);
818 if (n->n_action == NOTaction) {
819 fprintf(stderr, "NOT\n");
820 PRaction(n->n_L_child, level + 1);
823 if (n->n_action == GREPaction) {
824 fprintf(stderr, "PATTERN(%s) %s\n",
825 n->n_header ? "header" : "body", n->n_patbuf);
828 if (n->n_action == TWSaction) {
829 fprintf(stderr, "TEMPORAL(%s) %s: %s\n",
830 n->n_after ? "after" : "before", n->n_datef,
831 dasctime(&n->n_tws));
834 fprintf(stderr, "UNKNOWN(0x%x)\n",
835 (unsigned int)(unsigned long) (*n->n_action));
843 if ((*n->n_L_child->n_action) (args(n->n_L_child)))
845 return (*n->n_R_child->n_action) (args(n->n_R_child));
853 if (!(*n->n_L_child->n_action) (args(n->n_L_child)))
855 return (*n->n_R_child->n_action) (args(n->n_R_child));
863 return (!(*n->n_L_child->n_action) (args(n->n_L_child)));
868 gcompile(struct nexus *n, char *astr)
872 register unsigned char *ep, *dp, *sp, *lastep = 0;
874 dp = (ep = n->n_expbuf) + sizeof n->n_expbuf;
885 if ((c = *sp++) != '*')
912 if ((c = *sp++) == '^') {
922 if (c == '-' && *sp != '\0' && *sp != ']') {
923 for (c = ep[-1]+1; c < *sp; c++) {
926 if (c == '\0' || ep >= dp)
932 if (c == '\0' || ep >= dp)
935 } while ((c = *sp++) != ']');
942 if ((c = *sp++) == '\0')
962 register char *p1, *p2, *ebp, *cbp;
965 fseek(fp, start, SEEK_SET);
969 if (body && n->n_header)
976 if (fgets(ibuf, sizeof ibuf, fp) == NULL
977 || (stop && pos >= stop)) {
982 pos += (long) strlen(ibuf);
984 ebp = ibuf + strlen(ibuf);
987 if (lf && c != '\n') {
988 if (c != ' ' && c != '\t') {
1007 if (c && p1 < &linebuf[LBSIZE - 1])
1017 if (advance(p1, p2))
1025 if (*p1 == c || cc[(unsigned char)*p1] == c)
1026 if (advance(p1, p2))
1033 if (advance(p1, p2))
1041 advance(char *alp, char *aep)
1043 register unsigned char *lp, *ep, *curlp;
1045 lp = (unsigned char *)alp;
1046 ep = (unsigned char *)aep;
1050 if (*ep++ == *lp++ || ep[-1] == cc[lp[-1]])
1068 if (cclass(ep, *lp++, 1)) {
1075 if (cclass(ep, *lp++, 0)) {
1089 while (*lp++ == *ep || cc[lp[-1]] == *ep)
1097 while (cclass(ep, *lp++, ep[-1] == (CCL | STAR)))
1105 if (advance(lp, ep))
1107 } while (lp > curlp);
1111 admonish(NULL, "advance() botch -- you lose big");
1118 cclass(unsigned char *aset, int ac, int af)
1120 register unsigned int n;
1121 register unsigned char c, *set;
1129 if (*set++ == c || set[-1] == cc[c])
1137 tcompile(char *ap, struct tws *tb, int isafter)
1139 register struct tws *tw;
1141 if ((tw = tws_parse(ap, isafter)) == NULL)
1150 tws_parse(char *ap, int isafter)
1152 char buffer[BUFSIZ];
1153 register struct tws *tw, *ts;
1155 if ((tw = tws_special(ap)) != NULL) {
1156 tw->tw_sec = tw->tw_min = isafter ? 59 : 0;
1157 tw->tw_hour = isafter ? 23 : 0;
1160 if ((tw = dparsetime(ap)) != NULL)
1163 if ((ts = dlocaltimenow()) == NULL)
1166 snprintf(buffer, sizeof(buffer), "%s %s", ap, dtwszone(ts));
1167 if ((tw = dparsetime(buffer)) != NULL)
1170 snprintf(buffer, sizeof(buffer), "%s %02d:%02d:%02d %s", ap,
1171 ts->tw_hour, ts->tw_min, ts->tw_sec, dtwszone(ts));
1172 if ((tw = dparsetime(buffer)) != NULL)
1175 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s",
1176 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year, ap);
1177 if ((tw = dparsetime(buffer)) != NULL)
1180 snprintf(buffer, sizeof(buffer), "%02d %s %04d %s %s",
1181 ts->tw_mday, tw_moty[ts->tw_mon], ts->tw_year,
1183 if ((tw = dparsetime(buffer)) != NULL)
1191 tws_special(char *ap)
1195 register struct tws *tw;
1198 if (!mh_strcasecmp(ap, "today"))
1199 return dlocaltime(&clock);
1200 if (!mh_strcasecmp(ap, "yesterday")) {
1201 clock -= (long) (60 * 60 * 24);
1202 return dlocaltime(&clock);
1204 if (!mh_strcasecmp(ap, "tomorrow")) {
1205 clock += (long) (60 * 60 * 24);
1206 return dlocaltime(&clock);
1209 for (i = 0; tw_ldotw[i]; i++)
1210 if (!mh_strcasecmp(ap, tw_ldotw[i]))
1213 if ((tw = dlocaltime(&clock)) == NULL)
1215 if ((i -= tw->tw_wday) > 0)
1221 else /* -ddd days ago */
1222 i = atoi(ap); /* we should error check this */
1224 clock += (long) ((60 * 60 * 24) * i);
1225 return dlocaltime(&clock);
1235 char buf[BUFSIZ], name[NAMESZ];
1236 register struct tws *tw;
1238 fseek(fp, start, SEEK_SET);
1239 for (state = FLD, bp = NULL;;) {
1240 switch (state = m_getfld(state, name, buf, sizeof buf, fp)) {
1249 while (state == FLDPLUS) {
1250 state = m_getfld(state, name, buf,
1254 if (!mh_strcasecmp(name, n->n_datef))
1256 if (state != FLDEOF)
1264 if (state == LENERR || state == FMTERR)
1265 advise(NULL, "format error in message %d", msgnum);
1271 adios(NULL, "internal error -- you lose");
1276 if ((tw = dparsetime(bp)) == NULL)
1277 advise(NULL, "unable to parse %s field in message %d, matching...",
1278 n->n_datef, msgnum), state = 1;
1280 state = n->n_after ? (twsort(tw, &n->n_tws) > 0)
1281 : (twsort(tw, &n->n_tws) < 0);