Move #include from h/mh.h to source files
[mmh] / uip / sortm.c
1 /*
2 ** sortm.c -- sort messages in a folder by date/time
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <h/tws.h>
11 #include <h/utils.h>
12 #include <unistd.h>
13 #include <ctype.h>
14 #include <sys/stat.h>
15 #include <locale.h>
16
17 #ifdef HAVE_SYS_PARAM_H
18 # include <sys/param.h>
19 #endif
20
21 static struct swit switches[] = {
22 #define DATESW  0
23          { "datefield field", 0 },
24 #define TEXTSW  1
25          { "textfield field", 0 },
26 #define NSUBJSW  2
27          { "notextfield", 2 },
28 #define LIMSW  3
29          { "limit days", 0 },
30 #define NLIMSW  4
31          { "nolimit", 2 },
32 #define VERBSW  5
33          { "verbose", 0 },
34 #define NVERBSW  6
35          { "noverbose", 2 },
36 #define VERSIONSW  7
37          { "Version", 0 },
38 #define HELPSW  8
39          { "help", 0 },
40          { NULL, 0 }
41 };
42
43 struct smsg {
44         int s_msg;
45         time_t s_clock;
46         char *s_subj;
47 };
48
49 static struct smsg *smsgs;
50 int nmsgs;
51
52 char *subjsort = NULL;  /* sort on subject if != 0 */
53 time_t datelimit = 0;
54 int submajor = 0;  /* if true, sort on subject-major */
55 int verbose;
56
57 /* This keeps compiler happy on calls to qsort */
58 typedef int (*qsort_comp) (const void *, const void *);
59
60 /*
61 ** static prototypes
62 */
63 static int read_hdrs(struct msgs *, char *);
64 static int get_fields(char *, int, struct smsg *);
65 static int dsort(struct smsg **, struct smsg **);
66 static int subsort(struct smsg **, struct smsg **);
67 static int txtsort(struct smsg **, struct smsg **);
68 static void rename_chain(struct msgs *, struct smsg **, int, int);
69 static void rename_msgs(struct msgs *, struct smsg **);
70
71
72 int
73 main(int argc, char **argv)
74 {
75         int i, msgnum;
76         unsigned char *cp;
77         char *maildir, *datesw = NULL;
78         char *folder = NULL, buf[BUFSIZ], **argp;
79         char **arguments;
80         struct msgs_array msgs = { 0, 0, NULL };
81         struct msgs *mp;
82         struct smsg **dlist;
83
84         setlocale(LC_ALL, "");
85         invo_name = mhbasename(argv[0]);
86
87         /* read user profile/context */
88         context_read();
89
90         arguments = getarguments(invo_name, argc, argv, 1);
91         argp = arguments;
92
93         /*
94         ** Parse arguments
95         */
96         while ((cp = *argp++)) {
97                 if (*cp == '-') {
98                         switch (smatch(++cp, switches)) {
99                         case AMBIGSW:
100                                 ambigsw(cp, switches);
101                                 exit(1);
102                         case UNKWNSW:
103                                 adios(NULL, "-%s unknown", cp);
104
105                         case HELPSW:
106                                 snprintf(buf, sizeof(buf), "%s [+folder] [msgs] [switches]", invo_name);
107                                 print_help(buf, switches, 1);
108                                 exit(0);
109                         case VERSIONSW:
110                                 print_version(invo_name);
111                                 exit(0);
112
113                         case DATESW:
114                                 if (datesw)
115                                         adios(NULL, "only one date field at a time");
116                                 if (!(datesw = *argp++) || *datesw == '-')
117                                         adios(NULL, "missing argument to %s",
118                                                         argp[-2]);
119                                 continue;
120
121                         case TEXTSW:
122                                 if (subjsort)
123                                         adios(NULL, "only one text field at a time");
124                                 if (!(subjsort = *argp++) || *subjsort == '-')
125                                         adios(NULL, "missing argument to %s",
126                                                         argp[-2]);
127                                 continue;
128
129                         case NSUBJSW:
130                                 subjsort = NULL;
131                                 continue;
132
133                         case LIMSW:
134                                 if (!(cp = *argp++) || *cp == '-')
135                                                 adios(NULL, "missing argument to %s", argp[-2]);
136                                 while (*cp == '0')
137                                         cp++;  /* skip any leading zeros */
138                                 if (!*cp) {  /* hit end of string */
139                                         submajor++;  /* sort subject-major */
140                                         continue;
141                                 }
142                                 if (!isdigit(*cp) || !(datelimit = atoi(cp)))
143                                         adios(NULL, "impossible limit %s", cp);
144                                 datelimit *= 60*60*24;
145                                 continue;
146                         case NLIMSW:
147                                 submajor = 0;  /* use date-major, but */
148                                 datelimit = 0;  /* use no limit */
149                                 continue;
150
151                         case VERBSW:
152                                 verbose++;
153                                 continue;
154                         case NVERBSW:
155                                 verbose = 0;
156                                 continue;
157                         }
158                 }
159                 if (*cp == '+' || *cp == '@') {
160                         if (folder)
161                                 adios(NULL, "only one folder at a time!");
162                         else
163                                 folder = getcpy(expandfol(cp));
164                 } else
165                         app_msgarg(&msgs, cp);
166         }
167
168         if (!msgs.size)
169                 app_msgarg(&msgs, seq_all);
170         if (!datesw)
171                 datesw = "date";
172         if (!folder)
173                 folder = getcurfol();
174         maildir = toabsdir(folder);
175
176         if (chdir(maildir) == NOTOK)
177                 adios(maildir, "unable to change directory to");
178
179         /* read folder and create message structure */
180         if (!(mp = folder_read(folder)))
181                 adios(NULL, "unable to read folder %s", folder);
182
183         /* check for empty folder */
184         if (mp->nummsg == 0)
185                 adios(NULL, "no messages in %s", folder);
186
187         /* parse all the message ranges/sequences and set SELECTED */
188         for (msgnum = 0; msgnum < msgs.size; msgnum++)
189                 if (!m_convert(mp, msgs.msgs[msgnum]))
190                         exit(1);
191         seq_setprev(mp);  /* set the previous sequence */
192
193         if ((nmsgs = read_hdrs(mp, datesw)) <= 0)
194                 adios(NULL, "no messages to sort");
195
196         /*
197         ** sort a list of pointers to our "messages to be sorted".
198         */
199         dlist = (struct smsg **) mh_xmalloc((nmsgs+1) * sizeof(*dlist));
200         for (i = 0; i < nmsgs; i++)
201                 dlist[i] = &smsgs[i];
202         dlist[nmsgs] = 0;
203
204         if (verbose) {  /* announce what we're doing */
205                 if (subjsort)
206                         printf("sorting by %s-major %s-minor\n",
207                                 submajor ? subjsort : datesw,
208                                 submajor ? datesw : subjsort);
209                 else
210                         printf("sorting by datefield %s\n", datesw);
211         }
212
213         /* first sort by date, or by subject-major, date-minor */
214         qsort((char *) dlist, nmsgs, sizeof(*dlist),
215                         (qsort_comp) (submajor && subjsort ? txtsort : dsort));
216
217         /*
218         ** if we're sorting on subject, we need another list
219         ** in subject order, then a merge pass to collate the
220         ** two sorts.
221         */
222         if (!submajor && subjsort) {  /* already date sorted */
223                 struct smsg **slist, **flist;
224                 register struct smsg ***il, **fp, **dp;
225
226                 slist = (struct smsg **)
227                                 mh_xmalloc((nmsgs+1) * sizeof(*slist));
228                 memcpy((char *)slist, (char *)dlist, (nmsgs+1)*sizeof(*slist));
229                 qsort((char *)slist, nmsgs, sizeof(*slist),
230                                 (qsort_comp) subsort);
231
232                 /*
233                 ** make an inversion list so we can quickly find
234                 ** the collection of messages with the same subj
235                 ** given a message number.
236                 */
237                 il = (struct smsg ***) calloc(mp->hghsel+1, sizeof(*il));
238                 if (! il)
239                         adios(NULL, "couldn't allocate msg list");
240                 for (i = 0; i < nmsgs; i++)
241                         il[slist[i]->s_msg] = &slist[i];
242                 /*
243                 ** make up the final list, chronological but with
244                 ** all the same subjects grouped together.
245                 */
246                 flist = (struct smsg **)
247                                 mh_xmalloc((nmsgs+1) * sizeof(*flist));
248                 fp = flist;
249                 for (dp = dlist; *dp;) {
250                         register struct smsg **s = il[(*dp++)->s_msg];
251
252                         /* see if we already did this guy */
253                         if (! s)
254                                 continue;
255
256                         *fp++ = *s++;
257                         /*
258                         ** take the next message(s) if there is one,
259                         ** its subject isn't null and its subject
260                         ** is the same as this one and it's not too
261                         ** far away in time.
262                         */
263                         while (*s && (*s)->s_subj[0] && strcmp((*s)->s_subj, s[-1]->s_subj) == 0 && (datelimit == 0 || (*s)->s_clock - s[-1]->s_clock <= datelimit)) {
264                                 il[(*s)->s_msg] = 0;
265                                 *fp++ = *s++;
266                         }
267                 }
268                 *fp = 0;
269                 free(slist);
270                 free(dlist);
271                 dlist = flist;
272         }
273
274         /*
275         ** At this point, dlist is a sorted array of pointers to smsg
276         ** structures, each of which contains a message number.
277         */
278
279         rename_msgs(mp, dlist);
280
281         context_replace(curfolder, folder);  /* update current folder */
282         seq_save(mp);  /* synchronize message sequences */
283         context_save();  /* save the context file */
284         folder_free(mp);  /* free folder/message structure */
285         return 0;
286 }
287
288 static int
289 read_hdrs(struct msgs *mp, char *datesw)
290 {
291         int msgnum;
292         struct tws tb;
293         register struct smsg *s;
294
295         twscopy(&tb, dlocaltimenow());
296
297         smsgs = (struct smsg *) calloc((size_t) (mp->hghsel - mp->lowsel + 2),
298                         sizeof(*smsgs));
299         if (smsgs == NULL)
300                 adios(NULL, "unable to allocate sort storage");
301
302         s = smsgs;
303         for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
304                 if (is_selected(mp, msgnum)) {
305                         if (get_fields(datesw, msgnum, s)) {
306                                 s->s_msg = msgnum;
307                                 s++;
308                         }
309                 }
310         }
311         s->s_msg = 0;
312         return(s - smsgs);
313 }
314
315
316 /*
317 ** Parse the message and get the data or subject field,
318 ** if needed.
319 */
320
321 static int
322 get_fields(char *datesw, int msg, struct smsg *smsg)
323 {
324         register int state;
325         int compnum;
326         char *msgnam, buf[BUFSIZ], nam[NAMESZ];
327         register struct tws *tw;
328         register char *datecomp = NULL, *subjcomp = NULL;
329         register FILE *in;
330
331         if ((in = fopen(msgnam = m_name(msg), "r")) == NULL) {
332                 admonish(msgnam, "unable to read message");
333                 return (0);
334         }
335         for (compnum = 1, state = FLD;;) {
336                 switch (state = m_getfld(state, nam, buf, sizeof(buf), in)) {
337                 case FLD:
338                 case FLDEOF:
339                 case FLDPLUS:
340                         compnum++;
341                         if (!mh_strcasecmp(nam, datesw)) {
342                                 datecomp = add(buf, datecomp);
343                                 while (state == FLDPLUS) {
344                                         state = m_getfld(state, nam, buf,
345                                                         sizeof(buf), in);
346                                         datecomp = add(buf, datecomp);
347                                 }
348                                 if (!subjsort || subjcomp)
349                                         break;
350                         } else if (subjsort && !mh_strcasecmp(nam, subjsort)) {
351                                 subjcomp = add(buf, subjcomp);
352                                 while (state == FLDPLUS) {
353                                         state = m_getfld(state, nam, buf,
354                                                         sizeof(buf), in);
355                                         subjcomp = add(buf, subjcomp);
356                                 }
357                                 if (datecomp)
358                                         break;
359                         } else {
360                                 /* just flush this guy */
361                                 while (state == FLDPLUS)
362                                         state = m_getfld(state, nam, buf,
363                                                         sizeof(buf), in);
364                         }
365                         continue;
366
367                 case BODY:
368                 case BODYEOF:
369                 case FILEEOF:
370                         break;
371
372                 case LENERR:
373                 case FMTERR:
374                         if (state == LENERR || state == FMTERR)
375                                 admonish(NULL, "format error in message %d (header #%d)", msg, compnum);
376                         if (datecomp)
377                                 free(datecomp);
378                         if (subjcomp)
379                                 free(subjcomp);
380                         fclose(in);
381                         return (0);
382
383                 default:
384                         adios(NULL, "internal error -- you lose");
385                 }
386                 break;
387         }
388
389         /*
390         ** If no date component, then use the modification
391         ** time of the file as its date
392         */
393         if (!datecomp || (tw = dparsetime(datecomp)) == NULL) {
394                 struct stat st;
395
396                 admonish(NULL, "can't parse %s field in message %d",
397                                 datesw, msg);
398                 fstat(fileno(in), &st);
399                 smsg->s_clock = st.st_mtime;
400         } else {
401                 smsg->s_clock = dmktime(tw);
402         }
403
404         if (subjsort) {
405                 if (subjcomp) {
406                         /*
407                         ** try to make the subject "canonical": delete
408                         ** leading "re:", everything but letters & smash
409                         ** letters to lower case.
410                         */
411                         register char  *cp, *cp2;
412                         register unsigned char c;
413
414                         cp = subjcomp;
415                         cp2 = subjcomp;
416                         if (strcmp(subjsort, "subject") == 0) {
417                                 while ((c = *cp)) {
418                                         if (! isspace(c)) {
419                                                 if(uprf(cp, "re:"))
420                                                         cp += 2;
421                                                 else
422                                                         break;
423                                         }
424                                         cp++;
425                                 }
426                         }
427
428                         while ((c = *cp++)) {
429                                 if (isalnum(c))
430                                         *cp2++ = isupper(c) ? tolower(c) : c;
431                         }
432
433                         *cp2 = '\0';
434                 } else
435                         subjcomp = "";
436
437                 smsg->s_subj = subjcomp;
438         }
439         fclose(in);
440         if (datecomp)
441                 free(datecomp);
442
443         return (1);
444 }
445
446 /*
447 ** sort on dates.
448 */
449 static int
450 dsort(struct smsg **a, struct smsg **b)
451 {
452         if ((*a)->s_clock < (*b)->s_clock)
453                 return (-1);
454         else if ((*a)->s_clock > (*b)->s_clock)
455                 return (1);
456         else if ((*a)->s_msg < (*b)->s_msg)
457                 return (-1);
458         else
459                 return (1);
460 }
461
462 /*
463 ** sort on subjects.
464 */
465 static int
466 subsort(struct smsg **a, struct smsg **b)
467 {
468         register int i;
469
470         if ((i = strcmp((*a)->s_subj, (*b)->s_subj)))
471                 return (i);
472
473         return (dsort(a, b));
474 }
475
476 static int
477 txtsort(struct smsg **a, struct smsg **b)
478 {
479         register int i;
480
481         if ((i = strcmp((*a)->s_subj, (*b)->s_subj)))
482                 return (i);
483         else if ((*a)->s_msg < (*b)->s_msg)
484                 return (-1);
485         else
486                 return (1);
487 }
488
489 static void
490 rename_chain(struct msgs *mp, struct smsg **mlist, int msg, int endmsg)
491 {
492         int nxt, old, new;
493         char *newname, oldname[BUFSIZ];
494         char newbuf[MAXPATHLEN + 1];
495
496         for (;;) {
497                 nxt = mlist[msg] - smsgs;  /* mlist[msg] is a ptr into smsgs */
498                 mlist[msg] = (struct smsg *)0;
499                 old = smsgs[nxt].s_msg;
500                 new = smsgs[msg].s_msg;
501                 strncpy(oldname, m_name(old), sizeof(oldname));
502                 newname = m_name(new);
503                 if (verbose)
504                         printf("message %d becomes message %d\n", old, new);
505
506                 snprintf(oldname, sizeof (oldname), "%s/%d",
507                                 mp->foldpath, old);
508                 snprintf(newbuf, sizeof (newbuf), "%s/%d", mp->foldpath, new);
509                 ext_hook("ref-hook", oldname, newbuf);
510
511                 if (rename(oldname, newname) == NOTOK)
512                         adios(newname, "unable to rename %s to", oldname);
513
514                 copy_msg_flags(mp, new, old);
515                 if (mp->curmsg == old)
516                         seq_setcur(mp, new);
517
518                 if (nxt == endmsg)
519                         break;
520
521                 msg = nxt;
522         }
523 /* if (nxt != endmsg); */
524 /* rename_chain(mp, mlist, nxt, endmsg); */
525 }
526
527 static void
528 rename_msgs(struct msgs *mp, struct smsg **mlist)
529 {
530         int i, j, old, new;
531         seqset_t tmpset;
532         char f1[BUFSIZ], tmpfil[BUFSIZ];
533         char newbuf[MAXPATHLEN + 1];
534         struct smsg *sp;
535
536         strncpy(tmpfil, m_name(mp->hghmsg + 1), sizeof(tmpfil));
537
538         for (i = 0; i < nmsgs; i++) {
539                 if (! (sp = mlist[i]))
540                         continue;   /* did this one */
541
542                 j = sp - smsgs;
543                 if (j == i)
544                         continue;   /* this one doesn't move */
545
546                 /*
547                 ** the guy that was msg j is about to become msg i.
548                 ** rename 'j' to make a hole, then recursively rename
549                 ** guys to fill up the hole.
550                 */
551                 old = smsgs[j].s_msg;
552                 new = smsgs[i].s_msg;
553                 strncpy(f1, m_name(old), sizeof(f1));
554
555                 if (verbose)
556                         printf("renaming message chain from %d to %d\n",
557                                         old, new);
558
559                 /*
560                 ** Run the external hook to refile the old message as the
561                 ** temporary message number that is off of the end of the
562                 ** messages in the folder.
563                 */
564
565                 snprintf(f1, sizeof (f1), "%s/%d", mp->foldpath, old);
566                 snprintf(newbuf, sizeof (newbuf), "%s/%d",
567                                 mp->foldpath, mp->hghmsg + 1);
568                 ext_hook("ref-hook", f1, newbuf);
569
570                 if (rename(f1, tmpfil) == NOTOK)
571                         adios(tmpfil, "unable to rename %s to ", f1);
572
573                 get_msg_flags(mp, &tmpset, old);
574
575                 rename_chain(mp, mlist, j, i);
576
577                 /*
578                 ** Run the external hook to refile the temorary message number
579                 ** to the real place.
580                 */
581
582                 snprintf(f1, sizeof (f1), "%s/%d", mp->foldpath, new);
583                 ext_hook("ref-hook", newbuf, f1);
584
585                 if (rename(tmpfil, m_name(new)) == NOTOK)
586                         adios(m_name(new), "unable to rename %s to", tmpfil);
587
588                 set_msg_flags(mp, &tmpset, new);
589                 mp->msgflags |= SEQMOD;
590         }
591 }