Require at least 2 chars for `no' variants of switches.
[mmh] / uip / sortm.c
1 /*
2 ** sortm.c -- sort messages in a folder by date/time
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <h/tws.h>
11 #include <h/utils.h>
12
13 static struct swit switches[] = {
14 #define DATESW  0
15          { "datefield field", 0 },
16 #define TEXTSW  1
17          { "textfield field", 0 },
18 #define NSUBJSW  2
19          { "notextfield", 2 },
20 #define LIMSW  3
21          { "limit days", 0 },
22 #define NLIMSW  4
23          { "nolimit", 2 },
24 #define VERBSW  5
25          { "verbose", 0 },
26 #define NVERBSW  6
27          { "noverbose", 2 },
28 #define VERSIONSW  7
29          { "Version", 0 },
30 #define HELPSW  8
31          { "help", 0 },
32          { NULL, 0 }
33 };
34
35 struct smsg {
36         int s_msg;
37         time_t s_clock;
38         char *s_subj;
39 };
40
41 static struct smsg *smsgs;
42 int nmsgs;
43
44 char *subjsort = NULL;  /* sort on subject if != 0 */
45 time_t datelimit = 0;
46 int submajor = 0;  /* if true, sort on subject-major */
47 int verbose;
48
49 /* This keeps compiler happy on calls to qsort */
50 typedef int (*qsort_comp) (const void *, const void *);
51
52 /*
53 ** static prototypes
54 */
55 static int read_hdrs(struct msgs *, char *);
56 static int get_fields(char *, int, struct smsg *);
57 static int dsort(struct smsg **, struct smsg **);
58 static int subsort(struct smsg **, struct smsg **);
59 static int txtsort(struct smsg **, struct smsg **);
60 static void rename_chain(struct msgs *, struct smsg **, int, int);
61 static void rename_msgs(struct msgs *, struct smsg **);
62
63
64 int
65 main(int argc, char **argv)
66 {
67         int i, msgnum;
68         unsigned char *cp;
69         char *maildir, *datesw = NULL;
70         char *folder = NULL, buf[BUFSIZ], **argp;
71         char **arguments;
72         struct msgs_array msgs = { 0, 0, NULL };
73         struct msgs *mp;
74         struct smsg **dlist;
75
76 #ifdef LOCALE
77         setlocale(LC_ALL, "");
78 #endif
79         invo_name = mhbasename(argv[0]);
80
81         /* read user profile/context */
82         context_read();
83
84         arguments = getarguments(invo_name, argc, argv, 1);
85         argp = arguments;
86
87         /*
88         ** Parse arguments
89         */
90         while ((cp = *argp++)) {
91                 if (*cp == '-') {
92                         switch (smatch(++cp, switches)) {
93                         case AMBIGSW:
94                                 ambigsw(cp, switches);
95                                 done(1);
96                         case UNKWNSW:
97                                 adios(NULL, "-%s unknown", cp);
98
99                         case HELPSW:
100                                 snprintf(buf, sizeof(buf), "%s [+folder] [msgs] [switches]", invo_name);
101                                 print_help(buf, switches, 1);
102                                 done(1);
103                         case VERSIONSW:
104                                 print_version(invo_name);
105                                 done(1);
106
107                         case DATESW:
108                                 if (datesw)
109                                         adios(NULL, "only one date field at a time");
110                                 if (!(datesw = *argp++) || *datesw == '-')
111                                         adios(NULL, "missing argument to %s",
112                                                         argp[-2]);
113                                 continue;
114
115                         case TEXTSW:
116                                 if (subjsort)
117                                         adios(NULL, "only one text field at a time");
118                                 if (!(subjsort = *argp++) || *subjsort == '-')
119                                         adios(NULL, "missing argument to %s",
120                                                         argp[-2]);
121                                 continue;
122
123                         case NSUBJSW:
124                                 subjsort = NULL;
125                                 continue;
126
127                         case LIMSW:
128                                 if (!(cp = *argp++) || *cp == '-')
129                                                 adios(NULL, "missing argument to %s", argp[-2]);
130                                 while (*cp == '0')
131                                         cp++;  /* skip any leading zeros */
132                                 if (!*cp) {  /* hit end of string */
133                                         submajor++;  /* sort subject-major */
134                                         continue;
135                                 }
136                                 if (!isdigit(*cp) || !(datelimit = atoi(cp)))
137                                         adios(NULL, "impossible limit %s", cp);
138                                 datelimit *= 60*60*24;
139                                 continue;
140                         case NLIMSW:
141                                 submajor = 0;  /* use date-major, but */
142                                 datelimit = 0;  /* use no limit */
143                                 continue;
144
145                         case VERBSW:
146                                 verbose++;
147                                 continue;
148                         case NVERBSW:
149                                 verbose = 0;
150                                 continue;
151                         }
152                 }
153                 if (*cp == '+' || *cp == '@') {
154                         if (folder)
155                                 adios(NULL, "only one folder at a time!");
156                         else
157                                 folder = getcpy(expandfol(cp));
158                 } else
159                         app_msgarg(&msgs, cp);
160         }
161
162         if (!msgs.size)
163                 app_msgarg(&msgs, seq_all);
164         if (!datesw)
165                 datesw = "date";
166         if (!folder)
167                 folder = getcurfol();
168         maildir = toabsdir(folder);
169
170         if (chdir(maildir) == NOTOK)
171                 adios(maildir, "unable to change directory to");
172
173         /* read folder and create message structure */
174         if (!(mp = folder_read(folder)))
175                 adios(NULL, "unable to read folder %s", folder);
176
177         /* check for empty folder */
178         if (mp->nummsg == 0)
179                 adios(NULL, "no messages in %s", folder);
180
181         /* parse all the message ranges/sequences and set SELECTED */
182         for (msgnum = 0; msgnum < msgs.size; msgnum++)
183                 if (!m_convert(mp, msgs.msgs[msgnum]))
184                         done(1);
185         seq_setprev(mp);  /* set the previous sequence */
186
187         if ((nmsgs = read_hdrs(mp, datesw)) <= 0)
188                 adios(NULL, "no messages to sort");
189
190         /*
191         ** sort a list of pointers to our "messages to be sorted".
192         */
193         dlist = (struct smsg **) mh_xmalloc((nmsgs+1) * sizeof(*dlist));
194         for (i = 0; i < nmsgs; i++)
195                 dlist[i] = &smsgs[i];
196         dlist[nmsgs] = 0;
197
198         if (verbose) {  /* announce what we're doing */
199                 if (subjsort)
200                         printf("sorting by %s-major %s-minor\n",
201                                 submajor ? subjsort : datesw,
202                                 submajor ? datesw : subjsort);
203                 else
204                         printf("sorting by datefield %s\n", datesw);
205         }
206
207         /* first sort by date, or by subject-major, date-minor */
208         qsort((char *) dlist, nmsgs, sizeof(*dlist),
209                         (qsort_comp) (submajor && subjsort ? txtsort : dsort));
210
211         /*
212         ** if we're sorting on subject, we need another list
213         ** in subject order, then a merge pass to collate the
214         ** two sorts.
215         */
216         if (!submajor && subjsort) {  /* already date sorted */
217                 struct smsg **slist, **flist;
218                 register struct smsg ***il, **fp, **dp;
219
220                 slist = (struct smsg **)
221                                 mh_xmalloc((nmsgs+1) * sizeof(*slist));
222                 memcpy((char *)slist, (char *)dlist, (nmsgs+1)*sizeof(*slist));
223                 qsort((char *)slist, nmsgs, sizeof(*slist),
224                                 (qsort_comp) subsort);
225
226                 /*
227                 ** make an inversion list so we can quickly find
228                 ** the collection of messages with the same subj
229                 ** given a message number.
230                 */
231                 il = (struct smsg ***) calloc(mp->hghsel+1, sizeof(*il));
232                 if (! il)
233                         adios(NULL, "couldn't allocate msg list");
234                 for (i = 0; i < nmsgs; i++)
235                         il[slist[i]->s_msg] = &slist[i];
236                 /*
237                 ** make up the final list, chronological but with
238                 ** all the same subjects grouped together.
239                 */
240                 flist = (struct smsg **)
241                                 mh_xmalloc((nmsgs+1) * sizeof(*flist));
242                 fp = flist;
243                 for (dp = dlist; *dp;) {
244                         register struct smsg **s = il[(*dp++)->s_msg];
245
246                         /* see if we already did this guy */
247                         if (! s)
248                                 continue;
249
250                         *fp++ = *s++;
251                         /*
252                         ** take the next message(s) if there is one,
253                         ** its subject isn't null and its subject
254                         ** is the same as this one and it's not too
255                         ** far away in time.
256                         */
257                         while (*s && (*s)->s_subj[0] && strcmp((*s)->s_subj, s[-1]->s_subj) == 0 && (datelimit == 0 || (*s)->s_clock - s[-1]->s_clock <= datelimit)) {
258                                 il[(*s)->s_msg] = 0;
259                                 *fp++ = *s++;
260                         }
261                 }
262                 *fp = 0;
263                 free(slist);
264                 free(dlist);
265                 dlist = flist;
266         }
267
268         /*
269         ** At this point, dlist is a sorted array of pointers to smsg
270         ** structures, each of which contains a message number.
271         */
272
273         rename_msgs(mp, dlist);
274
275         context_replace(curfolder, folder);  /* update current folder */
276         seq_save(mp);  /* synchronize message sequences */
277         context_save();  /* save the context file */
278         folder_free(mp);  /* free folder/message structure */
279         done(0);
280         return 1;
281 }
282
283 static int
284 read_hdrs(struct msgs *mp, char *datesw)
285 {
286         int msgnum;
287         struct tws tb;
288         register struct smsg *s;
289
290         twscopy(&tb, dlocaltimenow());
291
292         smsgs = (struct smsg *) calloc((size_t) (mp->hghsel - mp->lowsel + 2),
293                         sizeof(*smsgs));
294         if (smsgs == NULL)
295                 adios(NULL, "unable to allocate sort storage");
296
297         s = smsgs;
298         for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
299                 if (is_selected(mp, msgnum)) {
300                         if (get_fields(datesw, msgnum, s)) {
301                                 s->s_msg = msgnum;
302                                 s++;
303                         }
304                 }
305         }
306         s->s_msg = 0;
307         return(s - smsgs);
308 }
309
310
311 /*
312 ** Parse the message and get the data or subject field,
313 ** if needed.
314 */
315
316 static int
317 get_fields(char *datesw, int msg, struct smsg *smsg)
318 {
319         register int state;
320         int compnum;
321         char *msgnam, buf[BUFSIZ], nam[NAMESZ];
322         register struct tws *tw;
323         register char *datecomp = NULL, *subjcomp = NULL;
324         register FILE *in;
325
326         if ((in = fopen(msgnam = m_name(msg), "r")) == NULL) {
327                 admonish(msgnam, "unable to read message");
328                 return (0);
329         }
330         for (compnum = 1, state = FLD;;) {
331                 switch (state = m_getfld(state, nam, buf, sizeof(buf), in)) {
332                 case FLD:
333                 case FLDEOF:
334                 case FLDPLUS:
335                         compnum++;
336                         if (!mh_strcasecmp(nam, datesw)) {
337                                 datecomp = add(buf, datecomp);
338                                 while (state == FLDPLUS) {
339                                         state = m_getfld(state, nam, buf,
340                                                         sizeof(buf), in);
341                                         datecomp = add(buf, datecomp);
342                                 }
343                                 if (!subjsort || subjcomp)
344                                         break;
345                         } else if (subjsort && !mh_strcasecmp(nam, subjsort)) {
346                                 subjcomp = add(buf, subjcomp);
347                                 while (state == FLDPLUS) {
348                                         state = m_getfld(state, nam, buf,
349                                                         sizeof(buf), in);
350                                         subjcomp = add(buf, subjcomp);
351                                 }
352                                 if (datecomp)
353                                         break;
354                         } else {
355                                 /* just flush this guy */
356                                 while (state == FLDPLUS)
357                                         state = m_getfld(state, nam, buf,
358                                                         sizeof(buf), in);
359                         }
360                         continue;
361
362                 case BODY:
363                 case BODYEOF:
364                 case FILEEOF:
365                         break;
366
367                 case LENERR:
368                 case FMTERR:
369                         if (state == LENERR || state == FMTERR)
370                                 admonish(NULL, "format error in message %d (header #%d)", msg, compnum);
371                         if (datecomp)
372                                 free(datecomp);
373                         if (subjcomp)
374                                 free(subjcomp);
375                         fclose(in);
376                         return (0);
377
378                 default:
379                         adios(NULL, "internal error -- you lose");
380                 }
381                 break;
382         }
383
384         /*
385         ** If no date component, then use the modification
386         ** time of the file as its date
387         */
388         if (!datecomp || (tw = dparsetime(datecomp)) == NULL) {
389                 struct stat st;
390
391                 admonish(NULL, "can't parse %s field in message %d",
392                                 datesw, msg);
393                 fstat(fileno(in), &st);
394                 smsg->s_clock = st.st_mtime;
395         } else {
396                 smsg->s_clock = dmktime(tw);
397         }
398
399         if (subjsort) {
400                 if (subjcomp) {
401                         /*
402                         ** try to make the subject "canonical": delete
403                         ** leading "re:", everything but letters & smash
404                         ** letters to lower case.
405                         */
406                         register char  *cp, *cp2;
407                         register unsigned char c;
408
409                         cp = subjcomp;
410                         cp2 = subjcomp;
411                         if (strcmp(subjsort, "subject") == 0) {
412                                 while ((c = *cp)) {
413                                         if (! isspace(c)) {
414                                                 if(uprf(cp, "re:"))
415                                                         cp += 2;
416                                                 else
417                                                         break;
418                                         }
419                                         cp++;
420                                 }
421                         }
422
423                         while ((c = *cp++)) {
424                                 if (isalnum(c))
425                                         *cp2++ = isupper(c) ? tolower(c) : c;
426                         }
427
428                         *cp2 = '\0';
429                 } else
430                         subjcomp = "";
431
432                 smsg->s_subj = subjcomp;
433         }
434         fclose(in);
435         if (datecomp)
436                 free(datecomp);
437
438         return (1);
439 }
440
441 /*
442 ** sort on dates.
443 */
444 static int
445 dsort(struct smsg **a, struct smsg **b)
446 {
447         if ((*a)->s_clock < (*b)->s_clock)
448                 return (-1);
449         else if ((*a)->s_clock > (*b)->s_clock)
450                 return (1);
451         else if ((*a)->s_msg < (*b)->s_msg)
452                 return (-1);
453         else
454                 return (1);
455 }
456
457 /*
458 ** sort on subjects.
459 */
460 static int
461 subsort(struct smsg **a, struct smsg **b)
462 {
463         register int i;
464
465         if ((i = strcmp((*a)->s_subj, (*b)->s_subj)))
466                 return (i);
467
468         return (dsort(a, b));
469 }
470
471 static int
472 txtsort(struct smsg **a, struct smsg **b)
473 {
474         register int i;
475
476         if ((i = strcmp((*a)->s_subj, (*b)->s_subj)))
477                 return (i);
478         else if ((*a)->s_msg < (*b)->s_msg)
479                 return (-1);
480         else
481                 return (1);
482 }
483
484 static void
485 rename_chain(struct msgs *mp, struct smsg **mlist, int msg, int endmsg)
486 {
487         int nxt, old, new;
488         char *newname, oldname[BUFSIZ];
489         char newbuf[MAXPATHLEN + 1];
490
491         for (;;) {
492                 nxt = mlist[msg] - smsgs;  /* mlist[msg] is a ptr into smsgs */
493                 mlist[msg] = (struct smsg *)0;
494                 old = smsgs[nxt].s_msg;
495                 new = smsgs[msg].s_msg;
496                 strncpy(oldname, m_name(old), sizeof(oldname));
497                 newname = m_name(new);
498                 if (verbose)
499                         printf("message %d becomes message %d\n", old, new);
500
501                 snprintf(oldname, sizeof (oldname), "%s/%d",
502                                 mp->foldpath, old);
503                 snprintf(newbuf, sizeof (newbuf), "%s/%d", mp->foldpath, new);
504                 ext_hook("ref-hook", oldname, newbuf);
505
506                 if (rename(oldname, newname) == NOTOK)
507                         adios(newname, "unable to rename %s to", oldname);
508
509                 copy_msg_flags(mp, new, old);
510                 if (mp->curmsg == old)
511                         seq_setcur(mp, new);
512
513                 if (nxt == endmsg)
514                         break;
515
516                 msg = nxt;
517         }
518 /* if (nxt != endmsg); */
519 /* rename_chain(mp, mlist, nxt, endmsg); */
520 }
521
522 static void
523 rename_msgs(struct msgs *mp, struct smsg **mlist)
524 {
525         int i, j, old, new;
526         seqset_t tmpset;
527         char f1[BUFSIZ], tmpfil[BUFSIZ];
528         char newbuf[MAXPATHLEN + 1];
529         struct smsg *sp;
530
531         strncpy(tmpfil, m_name(mp->hghmsg + 1), sizeof(tmpfil));
532
533         for (i = 0; i < nmsgs; i++) {
534                 if (! (sp = mlist[i]))
535                         continue;   /* did this one */
536
537                 j = sp - smsgs;
538                 if (j == i)
539                         continue;   /* this one doesn't move */
540
541                 /*
542                 ** the guy that was msg j is about to become msg i.
543                 ** rename 'j' to make a hole, then recursively rename
544                 ** guys to fill up the hole.
545                 */
546                 old = smsgs[j].s_msg;
547                 new = smsgs[i].s_msg;
548                 strncpy(f1, m_name(old), sizeof(f1));
549
550                 if (verbose)
551                         printf("renaming message chain from %d to %d\n",
552                                         old, new);
553
554                 /*
555                 ** Run the external hook to refile the old message as the
556                 ** temporary message number that is off of the end of the
557                 ** messages in the folder.
558                 */
559
560                 snprintf(f1, sizeof (f1), "%s/%d", mp->foldpath, old);
561                 snprintf(newbuf, sizeof (newbuf), "%s/%d",
562                                 mp->foldpath, mp->hghmsg + 1);
563                 ext_hook("ref-hook", f1, newbuf);
564
565                 if (rename(f1, tmpfil) == NOTOK)
566                         adios(tmpfil, "unable to rename %s to ", f1);
567
568                 get_msg_flags(mp, &tmpset, old);
569
570                 rename_chain(mp, mlist, j, i);
571
572                 /*
573                 ** Run the external hook to refile the temorary message number
574                 ** to the real place.
575                 */
576
577                 snprintf(f1, sizeof (f1), "%s/%d", mp->foldpath, new);
578                 ext_hook("ref-hook", newbuf, f1);
579
580                 if (rename(tmpfil, m_name(new)) == NOTOK)
581                         adios(m_name(new), "unable to rename %s to", tmpfil);
582
583                 set_msg_flags(mp, &tmpset, new);
584                 mp->msgflags |= SEQMOD;
585         }
586 }