ezmlmx 0.68
ezmlmx
Loading...
Searching...
No Matches
unfold_hdr.c
Go to the documentation of this file.
1#include "stralloc.h"
2#include "case.h"
3#include "byte.h"
4#include "errtxt.h"
5#include "mime.h"
6#include "idx.h"
7#include "logmsg.h"
8#include "errtxt.h"
9
10#define WHO "unfold_hdr"
11
17
18static stralloc tmpdata = {0};
19
20static void die_nomem() { logmsg(WHO,111,FATAL,ERR_NOMEM); }
21
22static int trimre(char **cpp,char *cpend,stralloc *prefix)
23{
24 int r = 0;
25 char *cp;
26 char *cpnew;
27 int junk;
28 unsigned int i,j;
29 unsigned int serial;
30
31 cp = *cpp;
32 serial = prefix->len; /* pointer to serial number */
33 if (serial)
34 serial = byte_rchr(prefix->s,prefix->len,'#');
35
36 junk = 1;
37 while (junk) {
38 junk = 0;
39 while (cp <= cpend && (*cp == ' ' || *cp == '\t'))
40 cp++;
41 cpnew = cp;
42 while (++cpnew <= cpend) { /* /(..+:\s)/ is a reply indicator */
43 if (*cpnew == ' ') {
44 if (cpnew < cp + 3) break; /* at least 3 char before ' ' */
45 if (*(cpnew - 1) != ':') break; /* require ':' before ' ' */
46 if (cpnew > cp + 5) { /* if > 4 char before ':' require */
47 char ch;
48 ch = *(cpnew - 2); /* XX^3, XX[3], XX(3) */
49 if (ch != ')' && ch != ']' && (ch < '0' || ch > '9'))
50 break;
51 }
52 junk = 1;
53 r |= 1;
54 cp = cpnew + 1;
55 break;
56 }
57 }
58 /* prefix removal is complicated by the inconsistent handling of ' ' */
59 /* when there are rfc2047-encoded words in the subject. We first */
60 /* compare prefix before "serial" ignoring space, then skip the */
61 /* number, then compare after "serial". If both matched we've found */
62 /* the prefix. */
63
64 if (serial) {
65 cpnew = cp;
66 i = 0;
67 while (i < serial && cpnew <= cpend) {
68 if (*cpnew != ' ') {
69 if (prefix->s[i] == ' ') {
70 ++i;
71 continue;
72 }
73 if (*cpnew != prefix->s[i]) break;
74 ++i;
75 }
76 ++cpnew;
77 }
78 if (i == serial) { /* match before serial */
79 j = prefix->len;
80 if (serial != j) { /* got a '#' */
81 while (cpnew <= cpend && *cpnew == ' ' || (*cpnew <= '9' && *cpnew >= '0'))
82 ++cpnew; /* skip number/space */
83 i = serial + 1;
84 while (i < j && cpnew <= cpend) {
85 if (*cpnew != ' ') {
86 if (prefix->s[i] == ' ') {
87 ++i;
88 continue;
89 }
90 if (*cpnew != prefix->s[i]) break;
91 ++i;
92 }
93 ++cpnew;
94 }
95 }
96 if (i == j) {
97 cp = cpnew;
98 junk = 1;
99 r |= 2;
100 }
101 }
102 }
103 }
104 *cpp = cp;
105
106 return r;
107}
108
117
118static int trimend(char *indata,unsigned long *np)
119{
120 char *cplast;
121 int junk;
122 int r = 0;
123
124 if (*np == 0) return 0;
125
126 cplast = indata + *np - 1; /* points to last char on line */
127 junk = 1;
128 while (junk) {
129 junk = 0;
130 while (cplast >= indata && (*cplast == ' ' || *cplast == '\t' || *cplast == '\r' || *cplast == '\n'))
131 --cplast;
132 if (cplast - indata >= 5 && case_startb(cplast - 5,6,"-Reply")) {
133 cplast -= 6;
134 r = 1;
135 junk = 1;
136 }
137 }
138 *np = (unsigned int) (cplast - indata + 1); /* new length */
139 return r;
140}
141
159
160int unfold_hdr(char *indata,unsigned long n,stralloc *outdata,const char *charset,stralloc *prefix,int flagtrimsub)
161{
162 int r = 0;
163 char *cp, *cpesc, *cpnext, *cpend, *cpout;
164 char state, cset, newcset;
165 int reg, newreg;
166
167 cp = indata; /* JIS X 0201 -> ISO646 us-ascii */
168 cpend = cp + n - 1;
169 cpnext = cp;
170 if (!stralloc_copys(&tmpdata,"")) die_nomem();
171 if (!stralloc_ready(&tmpdata,n)) die_nomem();
172
173 if(!case_diffb(charset,11,"iso-2022-jp")) {
174 /* iso-2022-jp-2 (rfc1554) and its subset iso-2022-jp. The reg #s */
175 /* are from the rfc. Don't ask why they have multiple length G0 */
176 /* charset designations ... JIS X 0201-roman is identical to */
177 /* iso646 us-ascii except for currency and tilde. Making them the */
178 /* same increases hits without significant loss. JIS X 0208-1978 */
179 /* is superceded by JIS X 0208-1983 and converted here as well. */
180
181 while (cp < cpend) {
182 if (*cp++ != ESC) continue;
183 if (*cp == '(') {
184 if (++cp > cpend) break;
185 if (*cp == 'J') *cp = 'B';
186 ++cp;
187 } else if (*cp == '$') {
188 if (++cp > cpend) break;
189 if (*cp == '@') *cp = 'B';
190 ++cp;
191 }
192 }
193 cp = indata; /* eliminate redundant ESC seqs */
194 cpnext = cp;
195 reg = 6;
196 while (cp < cpend) {
197 if (*cp++ != ESC) continue;
198 cpesc = cp - 1;
199 if (*cp == '$') {
200 if (++cp > cpend) break;
201 if (*cp == 'B') newreg = 87;
202 else if (*cp == 'A') newreg = 58;
203 else if (*cp == '(') {
204 if (++cp > cpend) break;
205 if (*cp == 'C') newreg = 149;
206 else if (*cp == 'D') newreg = 159;
207 else continue;
208 } else continue;
209 } else if (*cp == '(') {
210 if (++cp > cpend) break;
211 if (*cp == 'B') newreg = 6;
212 else continue;
213 } else continue;
214 if (++cp > cpend) break;
215 while (*cp == ' ' || *cp == '\t')
216 if (++cp >= cpend) break; /* skip space */
217 if (*cp == ESC) /* maybe another G0 designation */
218 if (*(cp+1) == '(' || *(cp+1) == '$') { /* yep! */
219 if (!stralloc_catb(&tmpdata,cpnext,cpesc-cpnext)) die_nomem();
220 cpnext = cp;
221 continue;
222 }
223 if (reg == newreg) {
224 if (!stralloc_catb(&tmpdata,cpnext,cpesc-cpnext)) die_nomem();
225 cpnext = cp;
226 } else {
227 reg = newreg; /* copy remainder of line */
228 }
229 }
230 if (!stralloc_catb(&tmpdata,cpnext,cpend - cpnext + 1)) die_nomem();
231 if (reg != 6) { /* need to return to us-ascii at the end of the line */
232 if (!stralloc_cats(&tmpdata,TOASCII)) die_nomem();
233 } else { /* maybe "-Reply at the end?" */
234 r = trimend(tmpdata.s,&tmpdata.len);
235 }
236
237 } else if (!case_diffb(charset,11,"iso-2022-cn") ||
238 !case_diffb(charset,11,"iso-2022-kr")) {
239 /* these use SI/SO and ESC $ ) x as the SO designation. In -cn and */
240 /* -cn-ext, 'x' can be a number of different letters. In -kr it's */
241 /* always 'C'. This routine may work also for other iso-2022 sets */
242 /* also handles iso-2022-cn-ext */
243
244 cpesc = (char *) 0; /* points to latest ESC */
245 state = SI; /* us-ascii */
246 --cp; /* set up for loop */
247
248 while (++cp <= cpend) {
249 if (*cp == SI || *cp == SO) {
250 if (state == *cp) { /* already in state. Skip shift seq */
251 if (!stralloc_catb(&tmpdata,cpnext,cp-cpnext-1)) die_nomem();
252 cpnext = cp;
253 } else /* set new state */
254 state = *cp;
255 if (++cp > cpend) break;
256 continue;
257 }
258 if (*cp != ESC) continue;
259 if (cp + 3 > cpend) break; /* not space for full SO-designation */
260 cpesc = cp;
261 if (*cp != '$') continue;
262 if (++cp > cpend) break;
263 if (*cp != ')') continue;
264 if (++cp > cpend) break;
265 newcset = *cp;
266 if (++cp > cpend) break;
267 while (cp <= cpend && (*cp == ' ' || *cp == '\t')) ++cp;
268 if (cp + 3 > cpend) break; /* no space for full SO-designation */
269 if ((*cp == ESC && *(cp+1) == '$' && *(cp+2) == ')')
270 || (newcset == cset)) {
271 /* skip if a second SO-designation right after or */
272 /* this SO-designation is already active, skip */
273 if (!stralloc_catb(&tmpdata,cpnext,cpesc-cpnext)) die_nomem();
274 --cp; /* "unpeek" so that next iteration will see char */
275 cpnext = cpesc + 4;
276 continue;
277 } else {
278 cset = newcset;
279 continue;
280 }
281 }
282 if (!stralloc_catb(&tmpdata,cpnext,cpend - cpnext + 1)) die_nomem(); /* get remainder of line */
283 if (state != SI) /* need to end in ascii */
284 if (!stralloc_cats(&tmpdata,TOSI)) die_nomem();
285 else /* ascii end; maybe "-Reply" at the end? */
286 r = trimend(tmpdata.s,&tmpdata.len);
287
288 } else { /* other character sets = no special treatment */
289 r = trimend(cp,&n); /* -reply */
290 if (!stralloc_copyb(&tmpdata,cp,n)) die_nomem();
291 }
292
293 cp = tmpdata.s;
294 n = tmpdata.len;
295 cpend = cp + n - 1;
296 if (flagtrimsub) { /* remove leading reply indicators & prefix*/
297 r |= trimre(&cp,cpend,prefix);
298 n = (unsigned int) (cpend-cp+1);
299 }
300 /* there shouldn't be '\0' or '\n', but make sure as */
301 /* it would break the message index */
302
303 if (!stralloc_copys(outdata,"")) die_nomem();
304 if (!stralloc_ready(outdata,n)) die_nomem();
305 outdata->len = n;
306 cpout = outdata->s;
307 while (n--) { /* '\n' and '\0' would break the subject index */
308 if (!*cp || *cp == '\n') *cpout = ' ';
309 else *cpout = *cp;
310 ++cp; ++cpout;
311 }
312 return r;
313}
#define SO
Definition mime.h:19
#define ESC
Definition mime.h:17
#define TOSI
Definition mime.h:26
#define TOASCII
Definition mime.h:21
#define SI
Definition mime.h:18
Error messages. If you translate these, I would urge you to keep the English version as well....
#define ERR_NOMEM
Definition errtxt.h:14
void die_nomem()
Definition getconf.c:17
#define WHO
Definition author.c:1
#define outdata
Definition makehash.c:46
const char * cp
Definition ezmlm-cron.c:76
unsigned int serial
Definition ezmlm-send.c:99
int state
Definition ezmlm-cgi.c:138
const char * charset
Definition ezmlm-cgi.c:110
stralloc prefix
Definition ezmlm-idx.c:69
int unfold_hdr(char *indata, unsigned long n, stralloc *outdata, const char *charset, stralloc *prefix, int flagtrimsub)
Definition unfold_hdr.c:160
const char * logmsg(const char *dir, unsigned long num, unsigned long listno, unsigned long subs, int done)
Definition loginfo.c:32