0230495e316a3037e7da3cc06c844166e7783568
[mmh] / uip / sortm.c
1 /*
2 ** sortm.c -- sort messages in a folder by date/time
3 **
4 ** This code is Copyright (c) 2002, by the authors of nmh.  See the
5 ** COPYRIGHT file in the root directory of the nmh distribution for
6 ** complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <h/tws.h>
11 #include <h/utils.h>
12
13 static struct swit switches[] = {
14 #define DATESW  0
15          { "datefield field", 0 },
16 #define TEXTSW  1
17          { "textfield field", 0 },
18 #define NSUBJSW  2
19          { "notextfield", 2 },
20 #define LIMSW  3
21          { "limit days", 0 },
22 #define NLIMSW  4
23          { "nolimit", 2 },
24 #define VERBSW  5
25          { "verbose", 0 },
26 #define NVERBSW  6
27          { "noverbose", 2 },
28 #define VERSIONSW  7
29          { "Version", 0 },
30 #define HELPSW  8
31          { "help", 0 },
32          { NULL, 0 }
33 };
34
35 struct smsg {
36         int s_msg;
37         time_t s_clock;
38         char *s_subj;
39 };
40
41 static struct smsg *smsgs;
42 int nmsgs;
43
44 char *subjsort = NULL;  /* sort on subject if != 0 */
45 time_t datelimit = 0;
46 int submajor = 0;  /* if true, sort on subject-major */
47 int verbose;
48
49 /* This keeps compiler happy on calls to qsort */
50 typedef int (*qsort_comp) (const void *, const void *);
51
52 /*
53 ** static prototypes
54 */
55 static int read_hdrs(struct msgs *, char *);
56 static int get_fields(char *, int, struct smsg *);
57 static int dsort(struct smsg **, struct smsg **);
58 static int subsort(struct smsg **, struct smsg **);
59 static int txtsort(struct smsg **, struct smsg **);
60 static void rename_chain(struct msgs *, struct smsg **, int, int);
61 static void rename_msgs(struct msgs *, struct smsg **);
62
63
64 int
65 main(int argc, char **argv)
66 {
67         int i, msgnum;
68         unsigned char *cp;
69         char *maildir, *datesw = NULL;
70         char *folder = NULL, buf[BUFSIZ], **argp;
71         char **arguments;
72         struct msgs_array msgs = { 0, 0, NULL };
73         struct msgs *mp;
74         struct smsg **dlist;
75
76         setlocale(LC_ALL, "");
77         invo_name = mhbasename(argv[0]);
78
79         /* read user profile/context */
80         context_read();
81
82         arguments = getarguments(invo_name, argc, argv, 1);
83         argp = arguments;
84
85         /*
86         ** Parse arguments
87         */
88         while ((cp = *argp++)) {
89                 if (*cp == '-') {
90                         switch (smatch(++cp, switches)) {
91                         case AMBIGSW:
92                                 ambigsw(cp, switches);
93                                 exit(1);
94                         case UNKWNSW:
95                                 adios(NULL, "-%s unknown", cp);
96
97                         case HELPSW:
98                                 snprintf(buf, sizeof(buf), "%s [+folder] [msgs] [switches]", invo_name);
99                                 print_help(buf, switches, 1);
100                                 exit(0);
101                         case VERSIONSW:
102                                 print_version(invo_name);
103                                 exit(0);
104
105                         case DATESW:
106                                 if (datesw)
107                                         adios(NULL, "only one date field at a time");
108                                 if (!(datesw = *argp++) || *datesw == '-')
109                                         adios(NULL, "missing argument to %s",
110                                                         argp[-2]);
111                                 continue;
112
113                         case TEXTSW:
114                                 if (subjsort)
115                                         adios(NULL, "only one text field at a time");
116                                 if (!(subjsort = *argp++) || *subjsort == '-')
117                                         adios(NULL, "missing argument to %s",
118                                                         argp[-2]);
119                                 continue;
120
121                         case NSUBJSW:
122                                 subjsort = NULL;
123                                 continue;
124
125                         case LIMSW:
126                                 if (!(cp = *argp++) || *cp == '-')
127                                                 adios(NULL, "missing argument to %s", argp[-2]);
128                                 while (*cp == '0')
129                                         cp++;  /* skip any leading zeros */
130                                 if (!*cp) {  /* hit end of string */
131                                         submajor++;  /* sort subject-major */
132                                         continue;
133                                 }
134                                 if (!isdigit(*cp) || !(datelimit = atoi(cp)))
135                                         adios(NULL, "impossible limit %s", cp);
136                                 datelimit *= 60*60*24;
137                                 continue;
138                         case NLIMSW:
139                                 submajor = 0;  /* use date-major, but */
140                                 datelimit = 0;  /* use no limit */
141                                 continue;
142
143                         case VERBSW:
144                                 verbose++;
145                                 continue;
146                         case NVERBSW:
147                                 verbose = 0;
148                                 continue;
149                         }
150                 }
151                 if (*cp == '+' || *cp == '@') {
152                         if (folder)
153                                 adios(NULL, "only one folder at a time!");
154                         else
155                                 folder = getcpy(expandfol(cp));
156                 } else
157                         app_msgarg(&msgs, cp);
158         }
159
160         if (!msgs.size)
161                 app_msgarg(&msgs, seq_all);
162         if (!datesw)
163                 datesw = "date";
164         if (!folder)
165                 folder = getcurfol();
166         maildir = toabsdir(folder);
167
168         if (chdir(maildir) == NOTOK)
169                 adios(maildir, "unable to change directory to");
170
171         /* read folder and create message structure */
172         if (!(mp = folder_read(folder)))
173                 adios(NULL, "unable to read folder %s", folder);
174
175         /* check for empty folder */
176         if (mp->nummsg == 0)
177                 adios(NULL, "no messages in %s", folder);
178
179         /* parse all the message ranges/sequences and set SELECTED */
180         for (msgnum = 0; msgnum < msgs.size; msgnum++)
181                 if (!m_convert(mp, msgs.msgs[msgnum]))
182                         exit(1);
183         seq_setprev(mp);  /* set the previous sequence */
184
185         if ((nmsgs = read_hdrs(mp, datesw)) <= 0)
186                 adios(NULL, "no messages to sort");
187
188         /*
189         ** sort a list of pointers to our "messages to be sorted".
190         */
191         dlist = (struct smsg **) mh_xmalloc((nmsgs+1) * sizeof(*dlist));
192         for (i = 0; i < nmsgs; i++)
193                 dlist[i] = &smsgs[i];
194         dlist[nmsgs] = 0;
195
196         if (verbose) {  /* announce what we're doing */
197                 if (subjsort)
198                         printf("sorting by %s-major %s-minor\n",
199                                 submajor ? subjsort : datesw,
200                                 submajor ? datesw : subjsort);
201                 else
202                         printf("sorting by datefield %s\n", datesw);
203         }
204
205         /* first sort by date, or by subject-major, date-minor */
206         qsort((char *) dlist, nmsgs, sizeof(*dlist),
207                         (qsort_comp) (submajor && subjsort ? txtsort : dsort));
208
209         /*
210         ** if we're sorting on subject, we need another list
211         ** in subject order, then a merge pass to collate the
212         ** two sorts.
213         */
214         if (!submajor && subjsort) {  /* already date sorted */
215                 struct smsg **slist, **flist;
216                 register struct smsg ***il, **fp, **dp;
217
218                 slist = (struct smsg **)
219                                 mh_xmalloc((nmsgs+1) * sizeof(*slist));
220                 memcpy((char *)slist, (char *)dlist, (nmsgs+1)*sizeof(*slist));
221                 qsort((char *)slist, nmsgs, sizeof(*slist),
222                                 (qsort_comp) subsort);
223
224                 /*
225                 ** make an inversion list so we can quickly find
226                 ** the collection of messages with the same subj
227                 ** given a message number.
228                 */
229                 il = (struct smsg ***) calloc(mp->hghsel+1, sizeof(*il));
230                 if (! il)
231                         adios(NULL, "couldn't allocate msg list");
232                 for (i = 0; i < nmsgs; i++)
233                         il[slist[i]->s_msg] = &slist[i];
234                 /*
235                 ** make up the final list, chronological but with
236                 ** all the same subjects grouped together.
237                 */
238                 flist = (struct smsg **)
239                                 mh_xmalloc((nmsgs+1) * sizeof(*flist));
240                 fp = flist;
241                 for (dp = dlist; *dp;) {
242                         register struct smsg **s = il[(*dp++)->s_msg];
243
244                         /* see if we already did this guy */
245                         if (! s)
246                                 continue;
247
248                         *fp++ = *s++;
249                         /*
250                         ** take the next message(s) if there is one,
251                         ** its subject isn't null and its subject
252                         ** is the same as this one and it's not too
253                         ** far away in time.
254                         */
255                         while (*s && (*s)->s_subj[0] && strcmp((*s)->s_subj, s[-1]->s_subj) == 0 && (datelimit == 0 || (*s)->s_clock - s[-1]->s_clock <= datelimit)) {
256                                 il[(*s)->s_msg] = 0;
257                                 *fp++ = *s++;
258                         }
259                 }
260                 *fp = 0;
261                 free(slist);
262                 free(dlist);
263                 dlist = flist;
264         }
265
266         /*
267         ** At this point, dlist is a sorted array of pointers to smsg
268         ** structures, each of which contains a message number.
269         */
270
271         rename_msgs(mp, dlist);
272
273         context_replace(curfolder, folder);  /* update current folder */
274         seq_save(mp);  /* synchronize message sequences */
275         context_save();  /* save the context file */
276         folder_free(mp);  /* free folder/message structure */
277         return 0;
278 }
279
280 static int
281 read_hdrs(struct msgs *mp, char *datesw)
282 {
283         int msgnum;
284         struct tws tb;
285         register struct smsg *s;
286
287         twscopy(&tb, dlocaltimenow());
288
289         smsgs = (struct smsg *) calloc((size_t) (mp->hghsel - mp->lowsel + 2),
290                         sizeof(*smsgs));
291         if (smsgs == NULL)
292                 adios(NULL, "unable to allocate sort storage");
293
294         s = smsgs;
295         for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
296                 if (is_selected(mp, msgnum)) {
297                         if (get_fields(datesw, msgnum, s)) {
298                                 s->s_msg = msgnum;
299                                 s++;
300                         }
301                 }
302         }
303         s->s_msg = 0;
304         return(s - smsgs);
305 }
306
307
308 /*
309 ** Parse the message and get the data or subject field,
310 ** if needed.
311 */
312
313 static int
314 get_fields(char *datesw, int msg, struct smsg *smsg)
315 {
316         register int state;
317         int compnum;
318         char *msgnam, buf[BUFSIZ], nam[NAMESZ];
319         register struct tws *tw;
320         register char *datecomp = NULL, *subjcomp = NULL;
321         register FILE *in;
322
323         if ((in = fopen(msgnam = m_name(msg), "r")) == NULL) {
324                 admonish(msgnam, "unable to read message");
325                 return (0);
326         }
327         for (compnum = 1, state = FLD;;) {
328                 switch (state = m_getfld(state, nam, buf, sizeof(buf), in)) {
329                 case FLD:
330                 case FLDEOF:
331                 case FLDPLUS:
332                         compnum++;
333                         if (!mh_strcasecmp(nam, datesw)) {
334                                 datecomp = add(buf, datecomp);
335                                 while (state == FLDPLUS) {
336                                         state = m_getfld(state, nam, buf,
337                                                         sizeof(buf), in);
338                                         datecomp = add(buf, datecomp);
339                                 }
340                                 if (!subjsort || subjcomp)
341                                         break;
342                         } else if (subjsort && !mh_strcasecmp(nam, subjsort)) {
343                                 subjcomp = add(buf, subjcomp);
344                                 while (state == FLDPLUS) {
345                                         state = m_getfld(state, nam, buf,
346                                                         sizeof(buf), in);
347                                         subjcomp = add(buf, subjcomp);
348                                 }
349                                 if (datecomp)
350                                         break;
351                         } else {
352                                 /* just flush this guy */
353                                 while (state == FLDPLUS)
354                                         state = m_getfld(state, nam, buf,
355                                                         sizeof(buf), in);
356                         }
357                         continue;
358
359                 case BODY:
360                 case BODYEOF:
361                 case FILEEOF:
362                         break;
363
364                 case LENERR:
365                 case FMTERR:
366                         if (state == LENERR || state == FMTERR)
367                                 admonish(NULL, "format error in message %d (header #%d)", msg, compnum);
368                         if (datecomp)
369                                 free(datecomp);
370                         if (subjcomp)
371                                 free(subjcomp);
372                         fclose(in);
373                         return (0);
374
375                 default:
376                         adios(NULL, "internal error -- you lose");
377                 }
378                 break;
379         }
380
381         /*
382         ** If no date component, then use the modification
383         ** time of the file as its date
384         */
385         if (!datecomp || (tw = dparsetime(datecomp)) == NULL) {
386                 struct stat st;
387
388                 admonish(NULL, "can't parse %s field in message %d",
389                                 datesw, msg);
390                 fstat(fileno(in), &st);
391                 smsg->s_clock = st.st_mtime;
392         } else {
393                 smsg->s_clock = dmktime(tw);
394         }
395
396         if (subjsort) {
397                 if (subjcomp) {
398                         /*
399                         ** try to make the subject "canonical": delete
400                         ** leading "re:", everything but letters & smash
401                         ** letters to lower case.
402                         */
403                         register char  *cp, *cp2;
404                         register unsigned char c;
405
406                         cp = subjcomp;
407                         cp2 = subjcomp;
408                         if (strcmp(subjsort, "subject") == 0) {
409                                 while ((c = *cp)) {
410                                         if (! isspace(c)) {
411                                                 if(uprf(cp, "re:"))
412                                                         cp += 2;
413                                                 else
414                                                         break;
415                                         }
416                                         cp++;
417                                 }
418                         }
419
420                         while ((c = *cp++)) {
421                                 if (isalnum(c))
422                                         *cp2++ = isupper(c) ? tolower(c) : c;
423                         }
424
425                         *cp2 = '\0';
426                 } else
427                         subjcomp = "";
428
429                 smsg->s_subj = subjcomp;
430         }
431         fclose(in);
432         if (datecomp)
433                 free(datecomp);
434
435         return (1);
436 }
437
438 /*
439 ** sort on dates.
440 */
441 static int
442 dsort(struct smsg **a, struct smsg **b)
443 {
444         if ((*a)->s_clock < (*b)->s_clock)
445                 return (-1);
446         else if ((*a)->s_clock > (*b)->s_clock)
447                 return (1);
448         else if ((*a)->s_msg < (*b)->s_msg)
449                 return (-1);
450         else
451                 return (1);
452 }
453
454 /*
455 ** sort on subjects.
456 */
457 static int
458 subsort(struct smsg **a, struct smsg **b)
459 {
460         register int i;
461
462         if ((i = strcmp((*a)->s_subj, (*b)->s_subj)))
463                 return (i);
464
465         return (dsort(a, b));
466 }
467
468 static int
469 txtsort(struct smsg **a, struct smsg **b)
470 {
471         register int i;
472
473         if ((i = strcmp((*a)->s_subj, (*b)->s_subj)))
474                 return (i);
475         else if ((*a)->s_msg < (*b)->s_msg)
476                 return (-1);
477         else
478                 return (1);
479 }
480
481 static void
482 rename_chain(struct msgs *mp, struct smsg **mlist, int msg, int endmsg)
483 {
484         int nxt, old, new;
485         char *newname, oldname[BUFSIZ];
486         char newbuf[MAXPATHLEN + 1];
487
488         for (;;) {
489                 nxt = mlist[msg] - smsgs;  /* mlist[msg] is a ptr into smsgs */
490                 mlist[msg] = (struct smsg *)0;
491                 old = smsgs[nxt].s_msg;
492                 new = smsgs[msg].s_msg;
493                 strncpy(oldname, m_name(old), sizeof(oldname));
494                 newname = m_name(new);
495                 if (verbose)
496                         printf("message %d becomes message %d\n", old, new);
497
498                 snprintf(oldname, sizeof (oldname), "%s/%d",
499                                 mp->foldpath, old);
500                 snprintf(newbuf, sizeof (newbuf), "%s/%d", mp->foldpath, new);
501                 ext_hook("ref-hook", oldname, newbuf);
502
503                 if (rename(oldname, newname) == NOTOK)
504                         adios(newname, "unable to rename %s to", oldname);
505
506                 copy_msg_flags(mp, new, old);
507                 if (mp->curmsg == old)
508                         seq_setcur(mp, new);
509
510                 if (nxt == endmsg)
511                         break;
512
513                 msg = nxt;
514         }
515 /* if (nxt != endmsg); */
516 /* rename_chain(mp, mlist, nxt, endmsg); */
517 }
518
519 static void
520 rename_msgs(struct msgs *mp, struct smsg **mlist)
521 {
522         int i, j, old, new;
523         seqset_t tmpset;
524         char f1[BUFSIZ], tmpfil[BUFSIZ];
525         char newbuf[MAXPATHLEN + 1];
526         struct smsg *sp;
527
528         strncpy(tmpfil, m_name(mp->hghmsg + 1), sizeof(tmpfil));
529
530         for (i = 0; i < nmsgs; i++) {
531                 if (! (sp = mlist[i]))
532                         continue;   /* did this one */
533
534                 j = sp - smsgs;
535                 if (j == i)
536                         continue;   /* this one doesn't move */
537
538                 /*
539                 ** the guy that was msg j is about to become msg i.
540                 ** rename 'j' to make a hole, then recursively rename
541                 ** guys to fill up the hole.
542                 */
543                 old = smsgs[j].s_msg;
544                 new = smsgs[i].s_msg;
545                 strncpy(f1, m_name(old), sizeof(f1));
546
547                 if (verbose)
548                         printf("renaming message chain from %d to %d\n",
549                                         old, new);
550
551                 /*
552                 ** Run the external hook to refile the old message as the
553                 ** temporary message number that is off of the end of the
554                 ** messages in the folder.
555                 */
556
557                 snprintf(f1, sizeof (f1), "%s/%d", mp->foldpath, old);
558                 snprintf(newbuf, sizeof (newbuf), "%s/%d",
559                                 mp->foldpath, mp->hghmsg + 1);
560                 ext_hook("ref-hook", f1, newbuf);
561
562                 if (rename(f1, tmpfil) == NOTOK)
563                         adios(tmpfil, "unable to rename %s to ", f1);
564
565                 get_msg_flags(mp, &tmpset, old);
566
567                 rename_chain(mp, mlist, j, i);
568
569                 /*
570                 ** Run the external hook to refile the temorary message number
571                 ** to the real place.
572                 */
573
574                 snprintf(f1, sizeof (f1), "%s/%d", mp->foldpath, new);
575                 ext_hook("ref-hook", newbuf, f1);
576
577                 if (rename(tmpfil, m_name(new)) == NOTOK)
578                         adios(m_name(new), "unable to rename %s to", tmpfil);
579
580                 set_msg_flags(mp, &tmpset, new);
581                 mp->msgflags |= SEQMOD;
582         }
583 }