1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
|
(function (root, fn) {
if (typeof define === 'function' && define.amd) {
define(fn);
} else if (typeof module !== 'undefined' && module.exports) {
module.exports = fn();
} else {
root.jsmime = fn();
}
}(this, function() {
var mods = {};
function req(id) {
return mods[id.replace(/^\.\//, '')];
}
function def(id, fn) {
mods[id] = fn(req);
}
def('mimeutils', function() {
"use strict";
/**
* Decode a quoted-printable buffer into a binary string.
*
* @param buffer {BinaryString} The string to decode.
* @param more {Boolean} This argument is ignored.
* @returns {Array(BinaryString, BinaryString)} The first element of the array
* is the decoded string. The second element is always the empty
* string.
*/
function decode_qp(buffer, more) {
// Unlike base64, quoted-printable isn't stateful across multiple lines, so
// there is no need to buffer input, so we can always ignore more.
let decoded = buffer.replace(
// Replace either =<hex><hex> or =<wsp>CRLF
/=([0-9A-F][0-9A-F]|[ \t]*(\r\n|[\r\n]|$))/gi,
function replace_chars(match, param) {
// If trailing text matches [ \t]*CRLF, drop everything, since it's a
// soft line break.
if (param.trim().length == 0)
return '';
return String.fromCharCode(parseInt(param, 16));
});
return [decoded, ''];
}
/**
* Decode a base64 buffer into a binary string. Unlike window.atob, the buffer
* may contain non-base64 characters that will be ignored.
*
* @param buffer {BinaryString} The string to decode.
* @param more {Boolean} If true, we expect that this function could be
* called again and should retain extra data. If
* false, we should flush all pending output.
* @returns {Array(BinaryString, BinaryString)} The first element of the array
* is the decoded string. The second element contains the data that
* could not be decoded and needs to be retained for the next call.
*/
function decode_base64(buffer, more) {
// Drop all non-base64 characters
let sanitize = buffer.replace(/[^A-Za-z0-9+\/=]/g,'');
// Remove harmful `=' chars in the middle.
sanitize = sanitize.replace(/=+([A-Za-z0-9+\/])/g, '$1');
// We need to encode in groups of 4 chars. If we don't have enough, leave the
// excess for later. If there aren't any more, drop enough to make it 4.
let excess = sanitize.length % 4;
if (excess != 0 && more)
buffer = sanitize.slice(-excess);
else
buffer = '';
sanitize = sanitize.substring(0, sanitize.length - excess);
// Use the atob function we (ought to) have in global scope.
return [atob(sanitize), buffer];
}
/**
* Converts a binary string into a Uint8Array buffer.
*
* @param buffer {BinaryString} The string to convert.
* @returns {Uint8Array} The converted data.
*/
function stringToTypedArray(buffer) {
var typedarray = new Uint8Array(buffer.length);
for (var i = 0; i < buffer.length; i++)
typedarray[i] = buffer.charCodeAt(i);
return typedarray;
}
/**
* Converts a Uint8Array buffer to a binary string.
*
* @param buffer {BinaryString} The string to convert.
* @returns {Uint8Array} The converted data.
*/
function typedArrayToString(buffer) {
var string = '';
for (var i = 0; i < buffer.length; i+= 100)
string += String.fromCharCode.apply(undefined, buffer.subarray(i, i + 100));
return string;
}
/** A list of month names for Date parsing. */
var kMonthNames = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug",
"Sep", "Oct", "Nov", "Dec"];
return {
decode_base64: decode_base64,
decode_qp: decode_qp,
kMonthNames: kMonthNames,
stringToTypedArray: stringToTypedArray,
typedArrayToString: typedArrayToString,
};
});
/**
* This file implements knowledge of how to encode or decode structured headers
* for several key headers. It is not meant to be used externally to jsmime.
*/
def('structuredHeaders', function (require) {
"use strict";
var structuredDecoders = new Map();
var structuredEncoders = new Map();
var preferredSpellings = new Map();
function addHeader(name, decoder, encoder) {
var lowerName = name.toLowerCase();
structuredDecoders.set(lowerName, decoder);
structuredEncoders.set(lowerName, encoder);
preferredSpellings.set(lowerName, name);
}
// Addressing headers: We assume that they can be specified in 1* form (this is
// false for From, but it's close enough to the truth that it shouldn't matter).
// There is no need to specialize the results for the header, so just pun it
// back to parseAddressingHeader.
function parseAddress(value) {
let results = [];
let headerparser = this;
return value.reduce(function (results, header) {
return results.concat(headerparser.parseAddressingHeader(header, true));
}, []);
}
function writeAddress(value) {
// Make sure the input is an array (accept a single entry)
if (!Array.isArray(value))
value = [value];
this.addAddresses(value);
}
// Addressing headers from RFC 5322:
addHeader("Bcc", parseAddress, writeAddress);
addHeader("Cc", parseAddress, writeAddress);
addHeader("From", parseAddress, writeAddress);
addHeader("Reply-To", parseAddress, writeAddress);
addHeader("Resent-Bcc", parseAddress, writeAddress);
addHeader("Resent-Cc", parseAddress, writeAddress);
addHeader("Resent-From", parseAddress, writeAddress);
addHeader("Resent-Reply-To", parseAddress, writeAddress);
addHeader("Resent-Sender", parseAddress, writeAddress);
addHeader("Resent-To", parseAddress, writeAddress);
addHeader("Sender", parseAddress, writeAddress);
addHeader("To", parseAddress, writeAddress);
// From RFC 5536:
addHeader("Approved", parseAddress, writeAddress);
// From RFC 3798:
addHeader("Disposition-Notification-To", parseAddress, writeAddress);
// Non-standard headers:
addHeader("Delivered-To", parseAddress, writeAddress);
addHeader("Return-Receipt-To", parseAddress, writeAddress);
// http://cr.yp.to/proto/replyto.html
addHeader("Mail-Reply-To", parseAddress, writeAddress);
addHeader("Mail-Followup-To", parseAddress, writeAddress);
// Parameter-based headers. Note that all parameters are slightly different, so
// we use slightly different variants here.
function parseParameterHeader(value, do2231, do2047) {
// Only use the first header for parameters; ignore subsequent redefinitions.
return this.parseParameterHeader(value[0], do2231, do2047);
}
// RFC 2045
function parseContentType(value) {
let params = parseParameterHeader.call(this, value, false, false);
let origtype = params.preSemi;
let parts = origtype.split('/');
if (parts.length != 2) {
// Malformed. Return to text/plain. Evil, ain't it?
params = new Map();
parts = ["text", "plain"];
}
let mediatype = parts[0].toLowerCase();
let subtype = parts[1].toLowerCase();
let type = mediatype + '/' + subtype;
let structure = new Map();
structure.mediatype = mediatype;
structure.subtype = subtype;
structure.type = type;
params.forEach(function (value, name) {
structure.set(name.toLowerCase(), value);
});
return structure;
}
structuredDecoders.set("Content-Type", parseContentType);
// Unstructured headers (just decode RFC 2047 for the first header value)
function parseUnstructured(values) {
return this.decodeRFC2047Words(values[0]);
}
function writeUnstructured(value) {
this.addUnstructured(value);
}
// Message-ID headers.
function parseMessageID(values) {
// TODO: Proper parsing support for these headers is currently unsupported).
return this.decodeRFC2047Words(values[0]);
}
function writeMessageID(value) {
// TODO: Proper parsing support for these headers is currently unsupported).
this.addUnstructured(value);
}
// RFC 5322
addHeader("Comments", parseUnstructured, writeUnstructured);
addHeader("Keywords", parseUnstructured, writeUnstructured);
addHeader("Subject", parseUnstructured, writeUnstructured);
// RFC 2045
addHeader("MIME-Version", parseUnstructured, writeUnstructured);
addHeader("Content-Description", parseUnstructured, writeUnstructured);
// RFC 7231
addHeader("User-Agent", parseUnstructured, writeUnstructured);
// Date headers
function parseDate(values) { return this.parseDateHeader(values[0]); }
function writeDate(value) { this.addDate(value); }
// RFC 5322
addHeader("Date", parseDate, writeDate);
addHeader("Resent-Date", parseDate, writeDate);
// RFC 5536
addHeader("Expires", parseDate, writeDate);
addHeader("Injection-Date", parseDate, writeDate);
addHeader("NNTP-Posting-Date", parseDate, writeDate);
// RFC 5322
addHeader("Message-ID", parseMessageID, writeMessageID);
addHeader("Resent-Message-ID", parseMessageID, writeMessageID);
// Miscellaneous headers (those that don't fall under the above schemes):
// RFC 2047
structuredDecoders.set("Content-Transfer-Encoding", function (values) {
return values[0].toLowerCase();
});
structuredEncoders.set("Content-Transfer-Encoding", writeUnstructured);
// Some clients like outlook.com send non-compliant References headers that
// separate values using commas. Also, some clients don't separate References
// with spaces, since these are optional according to RFC2822. So here we
// preprocess these headers (see bug 1154521 and bug 1197686).
function preprocessMessageIDs(values) {
let msgId = /<[^>]*>/g;
let match, ids = [];
while ((match = msgId.exec(values)) !== null) {
ids.push(match[0]);
}
return ids.join(' ');
}
structuredDecoders.set("References", preprocessMessageIDs);
structuredDecoders.set("In-Reply-To", preprocessMessageIDs);
return Object.freeze({
decoders: structuredDecoders,
encoders: structuredEncoders,
spellings: preferredSpellings,
});
});
def('headerparser', function(require) {
/**
* This file implements the structured decoding of message header fields. It is
* part of the same system as found in mimemimeutils.js, and occasionally makes
* references to globals defined in that file or other dependencies thereof. See
* documentation in that file for more information about external dependencies.
*/
"use strict";
var mimeutils = require('./mimeutils');
/**
* This is the API that we ultimately return.
*
* We define it as a global here, because we need to pass it as a |this|
* argument to a few functions.
*/
var headerparser = {};
/**
* Tokenizes a message header into a stream of tokens as a generator.
*
* The low-level tokens are meant to be loosely correspond to the tokens as
* defined in RFC 5322. For reasons of saner error handling, however, the two
* definitions are not exactly equivalent. The tokens we emit are the following:
* 1. Special delimiters: Any char in the delimiters string is emitted as a
* string by itself. Parsing parameter headers, for example, would use ";="
* for the delimiter string.
* 2. Quoted-strings (if opt.qstring is true): A string which is surrounded by
* double quotes. Escapes in the string are omitted when returning.
* 3. Domain Literals (if opt.dliteral is true): A string which matches the
* dliteral construct in RFC 5322. Escapes here are NOT omitted.
* 4. Comments (if opt.comments is true): Comments are handled specially. In
* practice, decoding the comments in To headers appears to be necessary, so
* comments are not stripped in the output value. Instead, they are emitted
* as if they are a special delimiter. However, all delimiters found within a
* comment are returned as if they were a quoted string, so that consumers
* ignore delimiters within comments. If ignoring comment text completely is
* desired, upon seeing a "(" token, consumers should ignore all tokens until
* a matching ")" is found (note that comments can be nested).
* 5. RFC 2047 encoded-words (if opts.rfc2047 is true): These are strings which
* are the decoded contents of RFC 2047's =?UTF-8?Q?blah?=-style words.
* 6. Atoms: Atoms are defined not in the RFC 5322 sense, but rather as the
* longest sequence of characters that is neither whitespace nor any of the
* special characters above.
*
* The intended interpretation of the stream of output tokens is that they are
* the portions of text which can be safely wrapped in whitespace with no ill
* effect. The output tokens are either strings (which represent individual
* delimiter tokens) or instances of a class that has a customized .toString()
* for output (for quoted strings, atoms, domain literals, and encoded-words).
* Checking for a delimiter MUST use the strictly equals operator (===). For
* example, the proper way to call this method is as follows:
*
* for (let token of getHeaderTokens(rest, ";=", opts)) {
* if (token === ';') {
* // This represents a literal ';' in the string
* } else if (token === '=') {
* // This represents a literal '=' in the string
* } else {
* // If a ";" qstring was parsed, we fall through to here!
* token = token.toString();
* }
* }
*
* This method does not properly tokenize 5322 in all corner cases; however,
* this is equivalent in those corner cases to an older header parsing
* algorithm, so the algorithm should be correct for all real-world cases. The
* corner cases are as follows:
* 1. Quoted-strings and domain literals are parsed even if they are within a
* comment block (we effectively treat ctext as containing qstring).
* 2. WSP need not be between a qstring and an atom (a"b" produces two tokens,
* a and b). This is an error case, though.
* 3. Legacy comments as display names: We recognize address fields with
* comments, and (a) either drop them if inside addr-spec or (b) preserve
* them as part of the display-name if not. If the display-name is empty
* while the last comment is not, we assume it's the legacy form above and
* take the comment content as the display-name.
*
* @param {String} value The header value, post charset conversion but
* before RFC 2047 decoding, to be parsed.
* @param {String} delimiters A set of delimiters to include as individual
* tokens.
* @param {Object} opts A set of options selecting what to parse.
* @param {Boolean} [opts.qstring] If true, recognize quoted strings.
* @param {Boolean} [opts.dliteral] If true, recognize domain literals.
* @param {Boolean} [opts.comments] If true, recognize comments.
* @param {Boolean} [opts.rfc2047] If true, parse and decode RFC 2047
* encoded-words.
* @returns {(Token|String)[]} An array of Token objects (which have a toString
* method returning their value) or String objects
* (representing delimiters).
*/
function getHeaderTokens(value, delimiters, opts) {
// The array of parsed tokens. This method used to be a generator, but it
// appears that generators are poorly optimized in current engines, so it was
// converted to not be one.
let tokenList = [];
/// Represents a non-delimiter token
function Token(token) {
// Unescape all quoted pairs. Any trailing \ is deleted.
this.token = token.replace(/\\(.?)/g, "$1");
}
Token.prototype.toString = function () { return this.token; };
// The start of the current token (e.g., atoms, strings)
let tokenStart = undefined;
// The set of whitespace characters, as defined by RFC 5322
let wsp = " \t\r\n";
// If we are a domain literal ([]) or a quoted string ("), this is set to the
// character to look for at the end.
let endQuote = undefined;
// The current depth of comments, since they can be nested. A value 0 means we
// are not in a comment.
let commentDepth = 0;
// Iterate over every character one character at a time.
let length = value.length;
for (let i = 0; i < length; i++) {
let ch = value[i];
// If we see a \, no matter what context we are in, ignore the next
// character.
if (ch == '\\') {
i++;
continue;
}
// If we are in a qstring or a dliteral, process the character only if it is
// what we are looking for to end the quote.
if (endQuote !== undefined) {
if (ch == endQuote && ch == '"') {
// Quoted strings don't include their delimiters.
let text = value.slice(tokenStart + 1, i);
// If RFC 2047 is enabled, always decode the qstring.
if (opts.rfc2047)
text = decodeRFC2047Words(text);
tokenList.push(new Token(text));
endQuote = undefined;
tokenStart = undefined;
} else if (ch == endQuote && ch == ']') {
// Domain literals include their delimiters.
tokenList.push(new Token(value.slice(tokenStart, i + 1)));
endQuote = undefined;
tokenStart = undefined;
}
// Avoid any further processing.
continue;
}
// If we can match the RFC 2047 encoded-word pattern, we need to decode the
// entire word or set of words.
if (opts.rfc2047 && ch == '=' && i + 1 < value.length && value[i + 1] == '?') {
// RFC 2047 tokens separated only by whitespace are conceptually part of
// the same output token, so we need to decode them all at once.
let encodedWordsRE = /([ \t\r\n]*=\?[^?]*\?[BbQq]\?[^?]*\?=)+/;
let result = encodedWordsRE.exec(value.slice(i));
if (result !== null) {
// If we were in the middle of a prior token (i.e., something like
// foobar=?UTF-8?Q?blah?=), yield the previous segment as a token.
if (tokenStart !== undefined) {
tokenList.push(new Token(value.slice(tokenStart, i)));
tokenStart = undefined;
}
// Find out how much we need to decode...
let encWordsLen = result[0].length;
let string = decodeRFC2047Words(value.slice(i, i + encWordsLen),
"UTF-8");
// Don't make a new Token variable, since we do not want to unescape the
// decoded string.
tokenList.push({ toString: function() { return string; }});
// Skip everything we decoded. The -1 is because we don't want to
// include the starting character.
i += encWordsLen - 1;
continue;
}
// If we are here, then we failed to match the simple 2047 encoded-word
// regular expression, despite the fact that it matched the =? at the
// beginning. Fall through and treat the text as if we aren't trying to
// decode RFC 2047.
}
// If we reach this point, we're not inside of quoted strings, domain
// literals, or RFC 2047 encoded-words. This means that the characters we
// parse are potential delimiters (unless we're in comments, where
// everything starts to go really wonky). Several things could happen,
// depending on the kind of character we read and whether or not we were in
// the middle of a token. The three values here tell us what we could need
// to do at this point:
// tokenIsEnding: The current character is not able to be accumulated to an
// atom, so we need to flush the atom if there is one.
// tokenIsStarting: The current character could begin an atom (or
// anything that requires us to mark the starting point), so we need to save
// the location.
// isSpecial: The current character is a delimiter that needs to be output.
let tokenIsEnding = false, tokenIsStarting = false, isSpecial = false;
if (wsp.includes(ch)) {
// Whitespace ends current tokens, doesn't emit anything.
tokenIsEnding = true;
} else if (commentDepth == 0 && delimiters.includes(ch)) {
// Delimiters end the current token, and need to be output. They do not
// apply within comments.
tokenIsEnding = true;
isSpecial = true;
} else if (opts.qstring && ch == '"') {
// Quoted strings end the last token and start a new one.
tokenIsEnding = true;
tokenIsStarting = true;
endQuote = ch;
} else if (opts.dliteral && ch == '[') {
// Domain literals end the last token and start a new one.
tokenIsEnding = true;
tokenIsStarting = true;
endQuote = ']';
} else if (opts.comments && ch == '(') {
// Comments are nested (oh joy). We only really care for the outer
// delimiter, though, which also ends the prior token and needs to be
// output if the consumer requests it.
commentDepth++;
if (commentDepth == 1) {
tokenIsEnding = true;
isSpecial = true;
} else {
tokenIsStarting = true;
}
} else if (opts.comments && ch == ')') {
// Comments are nested (oh joy). We only really care for the outer
// delimiter, though, which also ends the prior token and needs to be
// output if the consumer requests it.
if (commentDepth > 0)
commentDepth--;
if (commentDepth == 0) {
tokenIsEnding = true;
isSpecial = true;
} else {
tokenIsStarting = true;
}
} else {
// Not a delimiter, whitespace, comment, domain literal, or quoted string.
// Must be part of an atom then!
tokenIsStarting = true;
}
// If our analysis concluded that we closed an open token, and there is an
// open token, then yield that token.
if (tokenIsEnding && tokenStart !== undefined) {
tokenList.push(new Token(value.slice(tokenStart, i)));
tokenStart = undefined;
}
// If we need to output a delimiter, do so.
if (isSpecial)
tokenList.push(ch);
// If our analysis concluded that we could open a token, and no token is
// opened yet, then start the token.
if (tokenIsStarting && tokenStart === undefined) {
tokenStart = i;
}
}
// That concludes the loop! If there is a currently open token, close that
// token now.
if (tokenStart !== undefined) {
// Error case: a partially-open quoted string is assumed to have a trailing
// " character.
if (endQuote == '"')
tokenList.push(new Token(value.slice(tokenStart + 1)));
else
tokenList.push(new Token(value.slice(tokenStart)));
}
return tokenList;
}
/**
* Convert a header value into UTF-16 strings by attempting to decode as UTF-8
* or another legacy charset. If the header is valid UTF-8, it will be decoded
* as UTF-8; if it is not, the fallbackCharset will be attempted instead.
*
* @param {String} headerValue The header (as a binary string) to attempt
* to convert to UTF-16.
* @param {String} [fallbackCharset] The optional charset to try if UTF-8
* doesn't work.
* @returns {String} The UTF-16 representation of the string above.
*/
function convert8BitHeader(headerValue, fallbackCharset) {
// Only attempt to convert the headerValue if it contains non-ASCII
// characters.
if (/[\x80-\xff]/.exec(headerValue)) {
// First convert the value to a typed-array for TextDecoder.
let typedarray = mimeutils.stringToTypedArray(headerValue);
// Don't try UTF-8 as fallback (redundant), and don't try UTF-16 or UTF-32
// either, since they radically change header interpretation.
// If we have a fallback charset, we want to know if decoding will fail;
// otherwise, we want to replace with substitution chars.
let hasFallback = fallbackCharset &&
!fallbackCharset.toLowerCase().startsWith("utf");
let utf8Decoder = new TextDecoder("utf-8", {fatal: hasFallback});
try {
headerValue = utf8Decoder.decode(typedarray);
} catch (e) {
// Failed, try the fallback
let decoder = new TextDecoder(fallbackCharset, {fatal: false});
headerValue = decoder.decode(typedarray);
}
}
return headerValue;
}
/**
* Decodes all RFC 2047 encoded-words in the input string. The string does not
* necessarily have to contain any such words. This is useful, for example, for
* parsing unstructured headers.
*
* @param {String} headerValue The header which may contain RFC 2047 encoded-
* words.
* @returns {String} A full UTF-16 string with all encoded words expanded.
*/
function decodeRFC2047Words(headerValue) {
// Unfortunately, many implementations of RFC 2047 encoding are actually wrong
// in that they split over-long encoded words without regard for whether or
// not the split point is in the middle of a multibyte character. Therefore,
// we need to be able to handle these situations gracefully. This is done by
// using the decoder in streaming mode so long as the next token is another
// 2047 token with the same charset.
let lastCharset = '', currentDecoder = undefined;
/**
* Decode a single RFC 2047 token. This function is inline so that we can
* easily close over the lastCharset/currentDecoder variables, needed for
* handling bad RFC 2047 productions properly.
*/
function decode2047Token(token, isLastToken) {
let tokenParts = token.split("?");
// If it's obviously not a valid token, return false immediately.
if (tokenParts.length != 5 || tokenParts[4] != '=')
return false;
// The charset parameter is defined in RFC 2231 to be charset or
// charset*language. We only care about the charset here, so ignore any
// language parameter that gets passed in.
let charset = tokenParts[1].split('*', 1)[0];
let encoding = tokenParts[2], text = tokenParts[3];
let buffer;
if (encoding == 'B' || encoding == 'b') {
// Decode base64. If there's any non-base64 data, treat the string as
// an illegal token.
if (/[^A-Za-z0-9+\/=]/.exec(text))
return false;
// Decode the string
buffer = mimeutils.decode_base64(text, false)[0];
} else if (encoding == 'Q' || encoding == 'q') {
// Q encoding here looks a lot like quoted-printable text. The differences
// between quoted-printable and this are that quoted-printable allows you
// to quote newlines (this doesn't), while this replaces spaces with _.
// We can reuse the decode_qp code here, since newlines are already
// stripped from the header. There is one edge case that could trigger a
// false positive, namely when you have a single = or an = followed by
// whitespace at the end of the string. Such an input string is already
// malformed to begin with, so stripping the = and following input in that
// case should not be an important loss.
buffer = mimeutils.decode_qp(text.replace(/_/g, ' '), false)[0];
} else {
return false;
}
// Make the buffer be a typed array for what follows
let stringBuffer = buffer;
buffer = mimeutils.stringToTypedArray(buffer);
// If we cannot reuse the last decoder, flush out whatever remains.
var output = '';
if (charset != lastCharset && currentDecoder) {
output += currentDecoder.decode();
currentDecoder = null;
}
// Initialize the decoder for this token.
lastCharset = charset;
if (!currentDecoder) {
try {
currentDecoder = new TextDecoder(charset, {fatal: false});
} catch (e) {
// We don't recognize the charset, so give up.
return false;
}
}
// Convert this token with the buffer. Note the stream parameter--although
// RFC 2047 tokens aren't supposed to break in the middle of a multibyte
// character, a lot of software messes up and does so because it's hard not
// to (see headeremitter.js for exactly how hard!).
// We must not stream ISO-2022-JP if the buffer switches back to
// the ASCII state, that is, ends in "ESC(B".
// Also, we shouldn't do streaming on the last token.
let doStreaming;
if (isLastToken ||
(charset.toUpperCase() == "ISO-2022-JP" &&
stringBuffer.endsWith("\x1B(B")))
doStreaming = {stream: false};
else
doStreaming = {stream: true};
return output + currentDecoder.decode(buffer, doStreaming);
}
// The first step of decoding is to split the string into RFC 2047 and
// non-RFC 2047 tokens. RFC 2047 tokens look like the following:
// =?charset?c?text?=, where c is one of B, b, Q, and q. The split regex does
// some amount of semantic checking, so that malformed RFC 2047 tokens will
// get ignored earlier.
let components = headerValue.split(/(=\?[^?]*\?[BQbq]\?[^?]*\?=)/);
// Find last RFC 2047 token.
let lastRFC2047Index = -1;
for (let i = 0; i < components.length; i++) {
if (components[i].substring(0, 2) == "=?")
lastRFC2047Index = i;
}
for (let i = 0; i < components.length; i++) {
if (components[i].substring(0, 2) == "=?") {
let decoded = decode2047Token(components[i], i == lastRFC2047Index);
if (decoded !== false) {
// If 2047 decoding succeeded for this bit, rewrite the original value
// with the proper decoding.
components[i] = decoded;
// We're done processing, so continue to the next link.
continue;
}
} else if (/^[ \t\r\n]*$/.exec(components[i])) {
// Whitespace-only tokens get squashed into nothing, so 2047 tokens will
// be concatenated together.
components[i] = '';
continue;
}
// If there was stuff left over from decoding the last 2047 token, flush it
// out.
lastCharset = '';
if (currentDecoder) {
components[i] = currentDecoder.decode() + components[i];
currentDecoder = null;
}
}
// After the for loop, we'll have a set of decoded strings. Concatenate them
// together to make the return value.
return components.join('');
}
///////////////////////////////
// Structured field decoders //
///////////////////////////////
/**
* Extract a list of addresses from a header which matches the RFC 5322
* address-list production, possibly doing RFC 2047 decoding along the way.
*
* The output of this method is an array of elements corresponding to the
* addresses and the groups in the input header. An address is represented by
* an object of the form:
* {
* name: The display name of the address
* email: The address of the object
* }
* while a group is represented by an object of the form:
* {
* name: The display name of the group
* group: An array of address object for members in the group.
* }
*
* @param {String} header The MIME header text to be parsed
* @param {Boolean} doRFC2047 If true, decode RFC 2047 parameters found in the
* header.
* @returns {(Address|Group)[]} An array of the addresses found in the header,
* where each element is of the form mentioned
* above.
*/
function parseAddressingHeader(header, doRFC2047) {
// Default to true
if (doRFC2047 === undefined)
doRFC2047 = true;
// The final (top-level) results list to append to.
let results = [];
// Temporary results
let addrlist = [];
// Build up all of the values
let name = '', groupName = '', localPart = '', address = '', comment = '';
// Indicators of current state
let inAngle = false, inComment = false, needsSpace = false;
let preserveSpace = false;
let commentClosed = false;
// RFC 5322 §3.4 notes that legacy implementations exist which use a simple
// recipient form where the addr-spec appears without the angle brackets,
// but includes the name of the recipient in parentheses as a comment
// following the addr-spec. While we do not create this format, we still
// want to recognize it, though.
// Furthermore, despite allowing comments in addresses, RFC 5322 §3.4 notes
// that legacy implementations may interpret the comment, and thus it
// recommends not to use them. (Also, they may be illegal as per RFC 5321.)
// While we do not create address fields with comments, we recognize such
// comments during parsing and (a) either drop them if inside addr-spec or
// (b) preserve them as part of the display-name if not.
// If the display-name is empty while the last comment is not, we assume it's
// the legacy form above and take the comment content as the display-name.
//
// When parsing the address field, we at first do not know whether any
// strings belong to the display-name (which may include comments) or to the
// local-part of an addr-spec (where we ignore comments) until we find an
// '@' or an '<' token. Thus, we collect both variants until the fog lifts,
// plus the last comment seen.
let lastComment = '';
/**
* Add the parsed mailbox object to the address list.
* If it's in the legacy form above, correct the display-name.
* Also reset any faked flags.
* @param {String} displayName display-name as per RFC 5322
* @param {String} addrSpec addr-spec as per RFC 5322
*/
function addToAddrList(displayName, addrSpec) {
// Keep the local-part quoted if it needs to be.
let lp = addrSpec.substring(0, addrSpec.lastIndexOf("@"));
if (/[ !()<>\[\]:;@\\,"]/.exec(lp) !== null) {
addrSpec = '"' + lp.replace(/([\\"])/g, "\\$1") + '"' +
addrSpec.substring(addrSpec.lastIndexOf("@"));
}
if (displayName === '' && lastComment !== '') {
// Take last comment content as the display-name.
let offset = lastComment[0] === ' ' ? 2 : 1;
displayName = lastComment.substr(offset, lastComment.length - offset - 1);
}
if (displayName !== '' || addrSpec !== '')
addrlist.push({name: displayName, email: addrSpec});
// Clear pending flags and variables.
name = localPart = address = lastComment = '';
inAngle = inComment = needsSpace = false;
}
// Main parsing loop
for (let token of getHeaderTokens(header, ":,;<>@",
{qstring: true, comments: true, dliteral: true, rfc2047: doRFC2047})) {
if (token === ':') {
groupName = name;
name = '';
localPart = '';
// If we had prior email address results, commit them to the top-level.
if (addrlist.length > 0)
results = results.concat(addrlist);
addrlist = [];
} else if (token === '<') {
if (inAngle) {
// Interpret the address we were parsing as a name.
if (address.length > 0) {
name = address;
}
localPart = address = '';
} else {
inAngle = true;
}
} else if (token === '>') {
inAngle = false;
// Forget addr-spec comments.
lastComment = '';
} else if (token === '(') {
inComment = true;
// The needsSpace flag may not always be set even if it should be,
// e.g. for a comment behind an angle-addr.
// Also, we need to restore the needsSpace flag if we ignore the comment.
preserveSpace = needsSpace;
if (!needsSpace)
needsSpace = name !== '' && name.substr(-1) !== ' ';
comment = needsSpace ? ' (' : '(';
commentClosed = false;
} else if (token === ')') {
inComment = false;
comment += ')';
lastComment = comment;
// The comment may be part of the name, but not of the local-part.
// Enforce a space behind the comment only when not ignoring it.
if (inAngle) {
needsSpace = preserveSpace;
} else {
name += comment;
needsSpace = true;
}
commentClosed = true;
continue;
} else if (token === '@') {
// An @ means we see an email address. If we're not within <> brackets,
// then we just parsed an email address instead of a display name. Empty
// out the display name for the current production.
if (!inAngle) {
address = localPart;
name = '';
localPart = '';
// The remainder of this mailbox is part of an addr-spec.
inAngle = true;
}
address += '@';
} else if (token === ',') {
// A comma ends the current name. If we have something that's kind of a
// name, add it to the result list. If we don't, then our input looks like
// To: , , -> don't bother adding an empty entry.
addToAddrList(name, address);
} else if (token === ';') {
// Add pending name to the list
addToAddrList(name, address);
// If no group name was found, treat the ';' as a ','. In any case, we
// need to copy the results of addrlist into either a new group object or
// the main list.
if (groupName === '') {
results = results.concat(addrlist);
} else {
results.push({
name: groupName,
group: addrlist
});
}
// ... and reset every other variable.
addrlist = [];
groupName = '';
} else {
// This is either comment content, a quoted-string, or some span of
// dots and atoms.
// Ignore the needs space if we're a "close" delimiter token.
let spacedToken = token;
if (needsSpace && token.toString()[0] != '.')
spacedToken = ' ' + spacedToken;
// Which field do we add this data to?
if (inComment) {
comment += spacedToken;
} else if (inAngle) {
address += spacedToken;
} else {
name += spacedToken;
// Never add a space to the local-part, if we just ignored a comment.
if (commentClosed) {
localPart += token;
commentClosed = false;
} else {
localPart += spacedToken;
}
}
// We need space for the next token if we aren't some kind of comment or
// . delimiter.
needsSpace = token.toString()[0] != '.';
// The fall-through case after this resets needsSpace to false, and we
// don't want that!
continue;
}
// If we just parsed a delimiter, we don't need any space for the next
// token.
needsSpace = false;
}
// If we're missing the final ';' of a group, assume it was present. Also, add
// in the details of any email/address that we previously saw.
addToAddrList(name, address);
if (groupName !== '') {
results.push({name: groupName, group: addrlist});
addrlist = [];
}
// Add the current address list build-up to the list of addresses, and return
// the whole array to the caller.
return results.concat(addrlist);
}
/**
* Extract parameters from a header which is a series of ;-separated
* attribute=value tokens.
*
* @param {String} headerValue The MIME header value to parse.
* @param {Boolean} doRFC2047 If true, decode RFC 2047 encoded-words.
* @param {Boolean} doRFC2231 If true, decode RFC 2231 encoded parameters.
* @return {Map(String -> String)} A map of parameter names to parameter values.
* The property preSemi is set to the token that
* precedes the first semicolon.
*/
function parseParameterHeader(headerValue, doRFC2047, doRFC2231) {
// The basic syntax of headerValue is token [; token = token-or-qstring]*
// Copying more or less liberally from nsMIMEHeaderParamImpl:
// The first token is the text to the first whitespace or semicolon.
var semi = headerValue.indexOf(";");
if (semi < 0) {
var start = headerValue;
var rest = '';
} else {
var start = headerValue.substring(0, semi);
var rest = headerValue.substring(semi); // Include the semicolon
}
// Strip start to be <WSP><nowsp><WSP>.
start = start.trim().split(/[ \t\r\n]/)[0];
// Decode the the parameter tokens.
let opts = {qstring: true, rfc2047: doRFC2047};
// Name is the name of the parameter, inName is true iff we don't have a name
// yet.
let name = '', inName = true;
// Matches is a list of [name, value] pairs, where we found something that
// looks like name=value in the input string.
let matches = [];
for (let token of getHeaderTokens(rest, ";=", opts)) {
if (token === ';') {
// If we didn't find a name yet (we have ... tokenA; tokenB), push the
// name with an empty token instead.
if (name != '' && inName == false)
matches.push([name, '']);
name = '';
inName = true;
} else if (token === '=') {
inName = false;
} else if (inName && name == '') {
name = token.toString();
} else if (!inName && name != '') {
token = token.toString();
// RFC 2231 doesn't make it clear if %-encoding is supposed to happen
// within a quoted string, but this is very much required in practice. If
// it ends with a '*', then the string is an extended-value, which means
// that its value may be %-encoded.
if (doRFC2231 && name.endsWith('*')) {
token = token.replace(/%([0-9A-Fa-f]{2})/g,
function percent_deencode(match, hexchars) {
return String.fromCharCode(parseInt(hexchars, 16));
});
}
matches.push([name, token]);
// Clear the name, so we ignore anything afterwards.
name = '';
} else if (inName) {
// We have ...; tokenA tokenB ... -> ignore both tokens
name = ''; // Error recovery, ignore this one
}
}
// If we have a leftover ...; tokenA, push the tokenA
if (name != '' && inName == false)
matches.push([name, '']);
// Now matches holds the parameters, so clean up for RFC 2231. There are three
// cases: param=val, param*=us-ascii'en-US'blah, and param*n= variants. The
// order of preference is to pick the middle, then the last, then the first.
// Note that we already unpacked %-encoded values.
// simpleValues is just a straight parameter -> value map.
// charsetValues is the parameter -> value map, although values are stored
// before charset decoding happens.
// continuationValues maps parameter -> array of values, with extra properties
// valid (if we decided we couldn't do anything anymore) and hasCharset (which
// records if we need to decode the charset parameter or not).
var simpleValues = new Map(), charsetValues = new Map(),
continuationValues = new Map();
for (let pair of matches) {
let name = pair[0];
let value = pair[1];
// Get first index, not last index, so we match param*0*= like param*0=.
let star = name.indexOf('*');
if (star == -1) {
// This is the case of param=val. Select the first value here, if there
// are multiple ones.
if (!simpleValues.has(name))
simpleValues.set(name, value);
} else if (star == name.length - 1) {
// This is the case of param*=us-ascii'en-US'blah.
name = name.substring(0, star);
// Again, select only the first value here.
if (!charsetValues.has(name))
charsetValues.set(name, value);
} else {
// This is the case of param*0= or param*0*=.
let param = name.substring(0, star);
let entry = continuationValues.get(param);
// Did we previously find this one to be bungled? Then ignore it.
if (continuationValues.has(param) && !entry.valid)
continue;
// If we haven't seen it yet, set up entry already. Note that entries are
// not straight string values but rather [valid, hasCharset, param0, ... ]
if (!continuationValues.has(param)) {
entry = new Array();
entry.valid = true;
entry.hasCharset = undefined;
continuationValues.set(param, entry);
}
// When the string ends in *, we need to charset decoding.
// Note that the star is only meaningful for the *0*= case.
let lastStar = name[name.length - 1] == '*';
let number = name.substring(star + 1, name.length - (lastStar ? 1 : 0));
if (number == '0')
entry.hasCharset = lastStar;
// Is the continuation number illegal?
else if ((number[0] == '0' && number != '0') ||
!(/^[0-9]+$/.test(number))) {
entry.valid = false;
continue;
}
// Normalize to an integer
number = parseInt(number, 10);
// Is this a repeat? If so, bail.
if (entry[number] !== undefined) {
entry.valid = false;
continue;
}
// Set the value for this continuation index. JS's magic array setter will
// expand the array if necessary.
entry[number] = value;
}
}
// Build the actual parameter array from the parsed values
var values = new Map();
// Simple values have lowest priority, so just add everything into the result
// now.
for (let pair of simpleValues) {
values.set(pair[0], pair[1]);
}
if (doRFC2231) {
// Continuation values come next
for (let pair of continuationValues) {
let name = pair[0];
let entry = pair[1];
// If we never saw a param*0= or param*0*= value, then we can't do any
// reasoning about what it looks like, so bail out now.
if (entry.hasCharset === undefined) continue;
// Use as many entries in the array as are valid--if we are missing an
// entry, stop there.
let valid = true;
for (var i = 0; valid && i < entry.length; i++)
if (entry[i] === undefined)
valid = false;
// Concatenate as many parameters as are valid. If we need to decode thec
// charset, do so now.
var value = entry.slice(0, i).join('');
if (entry.hasCharset) {
try {
value = decode2231Value(value);
} catch (e) {
// Bad charset, don't add anything.
continue;
}
}
// Finally, add this to the output array.
values.set(name, value);
}
// Highest priority is the charset conversion.
for (let pair of charsetValues) {
try {
values.set(pair[0], decode2231Value(pair[1]));
} catch (e) {
// Bad charset, don't add anything.
}
}
}
// Finally, return the values computed above.
values.preSemi = start;
return values;
}
/**
* Convert a RFC 2231-encoded string parameter into a Unicode version of the
* string. This assumes that percent-decoding has already been applied.
*
* @param {String} value The RFC 2231-encoded string to decode.
* @return The Unicode version of the string.
*/
function decode2231Value(value) {
let quote1 = value.indexOf("'");
let quote2 = quote1 >= 0 ? value.indexOf("'", quote1 + 1) : -1;
let charset = (quote1 >= 0 ? value.substring(0, quote1) : "");
// It turns out that the language isn't useful anywhere in our codebase for
// the present time, so we will safely ignore it.
//var language = (quote2 >= 0 ? value.substring(quote1 + 2, quote2) : "");
value = value.substring(Math.max(quote1, quote2) + 1);
// Convert the value into a typed array for decoding
let typedarray = mimeutils.stringToTypedArray(value);
// Decode the charset. If the charset isn't found, we throw an error. Try to
// fallback in that case.
return new TextDecoder(charset, {fatal: true})
.decode(typedarray, {stream: false});
}
// This is a map of known timezone abbreviations, for fallback in obsolete Date
// productions.
var kKnownTZs = {
// The following timezones are explicitly listed in RFC 5322.
"UT": "+0000", "GMT": "+0000",
"EST": "-0500", "EDT": "-0400",
"CST": "-0600", "CDT": "-0500",
"MST": "-0700", "MDT": "-0600",
"PST": "-0800", "PDT": "-0700",
// The following are time zones copied from NSPR's prtime.c
"AST": "-0400", // Atlantic Standard Time
"NST": "-0330", // Newfoundland Standard Time
"BST": "+0100", // British Summer Time
"MET": "+0100", // Middle Europe Time
"EET": "+0200", // Eastern Europe Time
"JST": "+0900" // Japan Standard Time
};
/**
* Parse a header that contains a date-time definition according to RFC 5322.
* The result is a JS date object with the same timestamp as the header.
*
* The dates returned by this parser cannot be reliably converted back into the
* original header for two reasons. First, JS date objects cannot retain the
* timezone information they were initialized with, so reserializing a date
* header would necessarily produce a date in either the current timezone or in
* UTC. Second, JS dates measure time as seconds elapsed from the POSIX epoch
* excluding leap seconds. Any timestamp containing a leap second is instead
* converted into one that represents the next second.
*
* Dates that do not match the RFC 5322 production are instead attempted to
* parse using the Date.parse function. The strings that are accepted by
* Date.parse are not fully defined by the standard, but most implementations
* should accept strings that look rather close to RFC 5322 strings. Truly
* invalid dates produce a formulation that results in an invalid date,
* detectable by having its .getTime() method return NaN.
*
* @param {String} header The MIME header value to parse.
* @returns {Date} The date contained within the header, as described
* above.
*/
function parseDateHeader(header) {
let tokens = getHeaderTokens(header, ",:", {}).map(x => x.toString());
// What does a Date header look like? In practice, most date headers devolve
// into Date: [dow ,] dom mon year hh:mm:ss tzoff [(abbrev)], with the day of
// week mostly present and the timezone abbreviation mostly absent.
// First, ignore the day-of-the-week if present. This would be the first two
// tokens.
if (tokens.length > 1 && tokens[1] === ',')
tokens = tokens.slice(2);
// If there are too few tokens, the date is obviously invalid.
if (tokens.length < 8)
return new Date(NaN);
// Save off the numeric tokens
let day = parseInt(tokens[0]);
// month is tokens[1]
let year = parseInt(tokens[2]);
let hours = parseInt(tokens[3]);
// tokens[4] === ':'
let minutes = parseInt(tokens[5]);
// tokens[6] === ':'
let seconds = parseInt(tokens[7]);
// Compute the month. Check only the first three digits for equality; this
// allows us to accept, e.g., "January" in lieu of "Jan."
let month = mimeutils.kMonthNames.indexOf(tokens[1].slice(0, 3));
// If the month name is not recognized, make the result illegal.
if (month < 0)
month = NaN;
// Compute the full year if it's only 2 digits. RFC 5322 states that the
// cutoff is 50 instead of 70.
if (year < 100) {
year += year < 50 ? 2000 : 1900;
}
// Compute the timezone offset. If it's not in the form ±hhmm, convert it to
// that form.
let tzoffset = tokens[8];
if (tzoffset in kKnownTZs)
tzoffset = kKnownTZs[tzoffset];
let decompose = /^([+-])(\d\d)(\d\d)$/.exec(tzoffset);
// Unknown? Make it +0000
if (decompose === null)
decompose = ['+0000', '+', '00', '00'];
let tzOffsetInMin = parseInt(decompose[2]) * 60 + parseInt(decompose[3]);
if (decompose[1] == '-')
tzOffsetInMin = -tzOffsetInMin;
// How do we make the date at this point? Well, the JS date's constructor
// builds the time in terms of the local timezone. To account for the offset
// properly, we need to build in UTC.
let finalDate = new Date(Date.UTC(year, month, day, hours, minutes, seconds)
- tzOffsetInMin * 60 * 1000);
// Suppose our header was mangled and we couldn't read it--some of the fields
// became undefined. In that case, the date would become invalid, and the
// indication that it is so is that the underlying number is a NaN. In that
// scenario, we could build attempt to use JS Date parsing as a last-ditch
// attempt. But it's not clear that such messages really exist in practice,
// and the valid formats for Date in ES6 are unspecified.
return finalDate;
}
////////////////////////////////////////
// Structured header decoding support //
////////////////////////////////////////
// Load the default structured decoders
var structuredDecoders = new Map();
var structuredHeaders = require('./structuredHeaders');
var preferredSpellings = structuredHeaders.spellings;
var forbiddenHeaders = new Set();
for (let pair of structuredHeaders.decoders) {
addStructuredDecoder(pair[0], pair[1]);
forbiddenHeaders.add(pair[0].toLowerCase());
}
/**
* Use an already-registered structured decoder to parse the value of the header
* into a structured representation.
*
* As this method is designed to be used for the internal MIME Parser to convert
* the raw header values to well-structured values, value is intended to be an
* array consisting of all occurences of the header in order. However, for ease
* of use by other callers, it can also be treated as a string.
*
* If the decoder for the header is not found, an exception will be thrown.
*
* A large set of headers have pre-defined structured decoders; these decoders
* cannot be overrided with addStructuredDecoder, as doing so could prevent the
* MIME or message parsers from working properly. The pre-defined structured
* headers break down into five clases of results, plus some ad-hoc
* representations. They are:
*
* Addressing headers (results are the same as parseAddressingHeader):
* - Approved
* - Bcc
* - Cc
* - Delivered-To
* - Disposition-Notification-To
* - From
* - Mail-Reply-To
* - Mail-Followup-To
* - Reply-To
* - Resent-Bcc
* - Resent-Cc
* - Resent-From
* - Resent-Reply-To
* - Resent-Sender
* - Resent-To
* - Return-Receipt-To
* - Sender
* - To
*
* Date headers (results are the same as parseDateHeader):
* - Date
* - Expires
* - Injection-Date
* - NNTP-Posting-Date
* - Resent-Date
*
* References headers (results are the same as parseReferencesHeader):
* - (TODO: Parsing support for these headers is currently unsupported)
*
* Message-ID headers (results are the first entry of the result of
* parseReferencesHeader):
* - (TODO: Parsing support for these headers is currently unsupported)
*
* Unstructured headers (results are merely decoded according to RFC 2047):
* - Comments
* - Content-Description
* - Keywords
* - Subject
*
* The ad-hoc headers and their resulting formats are as follows:
* Content-Type: returns a JS Map of parameter names (in lower case) to their
* values, along with the following extra properties defined on the map:
* - mediatype: the type to the left of '/' (e.g., 'text', 'message')
* - subtype: the type to the right of '/' (e.g., 'plain', 'rfc822')
* - type: the full typename (e.g., 'text/plain')
* RFC 2047 and RFC 2231 decoding is applied where appropriate. The values of
* the type, mediatype, and subtype attributes are all normalized to lower-case,
* as are the names of all parameters.
*
* Content-Transfer-Encoding: the first value is converted to lower-case.
*
* @param {String} header The name of the header of the values.
* @param {String|Array} value The value(s) of the headers, after charset
* conversion (if any) has been applied. If it is
* an array, the headers are listed in the order
* they appear in the message.
* @returns {Object} A structured representation of the header values.
*/
function parseStructuredHeader(header, value) {
// Enforce that the parameter is an array. If it's a string, make it a
// 1-element array.
if (typeof value === "string" || value instanceof String)
value = [value];
if (!Array.isArray(value))
throw new TypeError("Header value is not an array: " + value);
// Lookup the header in our decoders; if present, use that to decode the
// header.
let lowerHeader = header.toLowerCase();
if (structuredDecoders.has(lowerHeader)) {
return structuredDecoders.get(lowerHeader).call(headerparser, value);
}
// If not present, throw an exception.
throw new Error("Unknown structured header: " + header);
}
/**
* Add a custom structured MIME decoder to the set of known decoders. These
* decoders are used for {@link parseStructuredHeader} and similar functions to
* encode richer, more structured values instead of relying on string
* representations everywhere.
*
* Structured decoders are functions which take in a single parameter consisting
* of an array of the string values of the header, in order that they appear in
* the message. These headers have had the charset conversion (if necessary)
* applied to them already. The this parameter of the function is set to be the
* jsmime.headerparser module.
*
* There is a large set of structured decoders built-in to the jsmime library
* already. As these headers are fundamental to the workings of jsmime,
* attempting to replace them with a custom version will instead produce an
* exception.
*
* @param {String} header The header name (in any case)
* for which the decoder will be
* used.
* @param {Function(String[] -> Object)} decoder The structured decoder
* function.
*/
function addStructuredDecoder(header, decoder) {
let lowerHeader = header.toLowerCase();
if (forbiddenHeaders.has(lowerHeader))
throw new Error("Cannot override header: " + header);
structuredDecoders.set(lowerHeader, decoder);
if (!preferredSpellings.has(lowerHeader))
preferredSpellings.set(lowerHeader, header);
}
headerparser.addStructuredDecoder = addStructuredDecoder;
headerparser.convert8BitHeader = convert8BitHeader;
headerparser.decodeRFC2047Words = decodeRFC2047Words;
headerparser.getHeaderTokens = getHeaderTokens;
headerparser.parseAddressingHeader = parseAddressingHeader;
headerparser.parseDateHeader = parseDateHeader;
headerparser.parseParameterHeader = parseParameterHeader;
headerparser.parseStructuredHeader = parseStructuredHeader;
return Object.freeze(headerparser);
});
////////////////////////////////////////////////////////////////////////////////
// JavaScript Raw MIME Parser //
////////////////////////////////////////////////////////////////////////////////
/**
* The parser implemented in this file produces a MIME part tree for a given
* input message via a streaming callback interface. It does not, by itself,
* understand concepts like attachments (hence the term 'Raw'); the consumer
* must translate output into such a format.
*
* Charsets:
* The MIME specifications permit a single message to contain multiple charsets
* (or perhaps none) as raw octets. As JavaScript strings are implicitly
* implemented in UTF-16, it is possible that some engines will attempt to
* convert these strings using an incorrect charset or simply fail to convert
* them at all. This parser assumes that its input is in the form of a "binary
* string", a string that uses only the first 256 characters of Unicode to
* represent the individual octets. To verify that charsets are not getting
* mangled elsewhere in the pipeline, the auxiliary test file test/data/charsets
* can be used.
*
* This parser attempts to hide the charset details from clients as much as
* possible. The resulting values of structured headers are always converted
* into proper Unicode strings before being exposed to clients; getting at the
* raw binary string data can only be done via getRawHeader. The .charset
* parameter on header objects, if changed, changes the fallback charset used
* for headers. It is initialized to the presumed charset of the corresponding
* part, taking into account the charset and force-charset options of the
* parser. Body parts are only converted into Unicode strings if the strformat
* option is set to Unicode. Even then, only the bodies of parts with a media
* type of text are converted to Unicode strings using available charset data;
* other parts are retained as Uint8Array objects.
*
* Part numbering:
* Since the output is a streaming format, individual parts are identified by a
* numbering scheme. The intent of the numbering scheme for parts is to comply
* with the part numbers as dictated by RFC 3501 as much possible; however,
* that scheme does have several edge cases which would, if strictly followed,
* make it impossible to refer to certain parts of the message. In addition, we
* wish to make it possible to refer to parts which are not discoverable in the
* original MIME tree but are still viewable as parts. The part numbering
* scheme is as follows:
* - Individual sections of a multipart/* body are numbered in increasing order
* sequentially, starting from 1. Note that the prologue and the epilogue of
* a multipart/* body are not considered entities and are therefore not
* included in the part numbering scheme (there is no way to refer to them).
* - The numbers of multipart/* parts are separated by `.' characters.
* - The outermost message is referred to by use of the empty string.
* --> The following segments are not accounted for by IMAP part numbering. <--
* - The body of any message/rfc822 or similar part is distinguished from the
* message part as a whole by appending a `$' character. This does not apply
* to the outermost message/rfc822 envelope.
*/
def('mimeparser', function(require) {
"use strict";
var mimeutils = require('./mimeutils');
var headerparser = require('./headerparser');
var spellings = require('./structuredHeaders').spellings;
/**
* An object that represents the structured MIME headers for a message.
*
* This class is primarily used as the 'headers' parameter in the startPart
* callback on handlers for MimeParser. As such, it is designed to do the right
* thing in common cases as much as possible, with some advanced customization
* possible for clients that need such flexibility.
*
* In a nutshell, this class stores the raw headers as an internal Map. The
* structured headers are not computed until they are actually used, which means
* that potentially expensive structuring (e.g., doing manual DKIM validation)
* can be performed as a structured decoder without impeding performance for
* those who just want a few common headers.
*
* The outer API of this class is intended to be similar to a read-only Map
* object (complete with iterability support), with a few extra properties to
* represent things that are hard to determine properly from headers. The keys
* used are "preferred spellings" of the headers, although the get and has
* methods will accept header parameters of any case. Preferred spellings are
* derived from the name passed to addStructuredDecoder/addStructuredEncoder; if
* no structured decoder has been registered, then the name capitalizes the
* first letter of every word in the header name.
*
* Extra properties compared to a Map object are:
* - charset: This field represents the assumed charset of the associated MIME
* body. It is prefilled using a combination of the charset and force-charset
* options on the associated MimeParser instance as well as attempting to find
* a charset parameter in the Content-Type header.
*
* If the force-charset option is false, the charset is guessed first using
* the Content-Type header's charset parameter, falling back to the charset
* option if it is present. If the force-charset option is true, the charset
* is initially set to the charset option. This initial guessed value can be
* overridden at any time by simply setting the field on this object.
*
* The charset is better reflected as a parameter of the body rather than the
* headers; this is ultimately the charset parameter that will be used if a
* body part is being converted to a Unicode strformat. Headers are converted
* using headerparser.convert8BitHeader, and this field is used as the
* fallbackCharset parameter, which will always to attempt to decode as UTF-8
* first (in accordance with RFC 6532) and will refuse to decode as UTF-16 or
* UTF-32, as ASCII is not a subset of those charsets.
*
* - rawHeaderText: This read-only field contains the original header text from
* which headers were parsed, preserving case and whitespace (including
* alternate line endings instead of CRLF) exactly. If the header text begins
* with the mbox delimiter (i.e., a line that begins with "From "), then that
* is excluded from the rawHeaderText value and is not reflected anywhere in
* this object.
*
* - contentType: This field contains the structured representation of the
* Content-Type header, if it is present. If it is not present, it is set to
* the structured representation of the default Content-Type for a part (as
* this data is not easily guessed given only MIME tree events).
*
* The constructor for these objects is not externally exported, and thus they
* can only be created via MimeParser.
*
* @param rawHeaderText {BinaryString} The contents of the MIME headers to be
* parsed.
* @param options {Object} Options for the header parser.
* @param options.stripcontinuations {Boolean} If true, elide CRLFs from the
* raw header output.
*/
function StructuredHeaders(rawHeaderText, options) {
// An individual header is terminated by a CRLF, except if the CRLF is
// followed by a SP or TAB. Use negative lookahead to capture the latter case,
// and don't capture the strings or else split results get nasty.
let values = rawHeaderText.split(/(?:\r\n|\n)(?![ \t])|\r(?![ \t\n])/);
// Ignore the first "header" if it begins with an mbox delimiter
if (values.length > 0 && values[0].substring(0, 5) == "From ") {
values.shift();
// Elide the mbox delimiter from this._headerData
if (values.length == 0)
rawHeaderText = '';
else
rawHeaderText = rawHeaderText.substring(rawHeaderText.indexOf(values[0]));
}
let headers = new Map();
for (let i = 0; i < values.length; i++) {
// Look for a colon. If it's not present, this header line is malformed,
// perhaps by premature EOF or similar.
let colon = values[i].indexOf(":");
if (colon >= 0) {
var header = values[i].substring(0, colon);
var val = values[i].substring(colon + 1).trim();
if (options.stripcontinuations)
val = val.replace(/[\r\n]/g, '');
} else {
var header = values[i];
var val = '';
}
// Canonicalize the header in lower-case form.
header = header.trim().toLowerCase();
// Omit "empty" headers
if (header == '')
continue;
// We keep an array of values for each header, since a given header may be
// repeated multiple times.
if (headers.has(header)) {
headers.get(header).push(val);
} else {
headers.set(header, [val]);
}
}
/**
* A map of header names to arrays of raw values found in this header block.
* @private
*/
this._rawHeaders = headers;
/**
* Cached results of structured header parsing.
* @private
*/
this._cachedHeaders = new Map();
Object.defineProperty(this, "rawHeaderText",
{get: function () { return rawHeaderText; }});
Object.defineProperty(this, "size",
{get: function () { return this._rawHeaders.size; }});
Object.defineProperty(this, "charset", {
get: function () { return this._charset; },
set: function (value) {
this._charset = value;
// Clear the cached headers, since this could change their values
this._cachedHeaders.clear();
}
});
// Default to the charset, until the message parser overrides us.
if ('charset' in options)
this._charset = options.charset;
else
this._charset = null;
// If we have a Content-Type header, set contentType to return the structured
// representation. We don't set the value off the bat, since we want to let
// someone who changes the charset affect the values of 8-bit parameters.
Object.defineProperty(this, "contentType", {
configurable: true,
get: function () { return this.get('Content-Type'); }
});
}
/**
* Get a raw header.
*
* Raw headers are an array of the header values, listed in order that they were
* specified in the header block, and without any attempt to convert charsets or
* apply RFC 2047 decoding. For example, in the following message (where the
* <XX> is meant to represent binary-octets):
*
* X-Header: Value A
* X-Header: V<C3><A5>lue B
* Header2: Q
*
* the result of calling getRawHeader('X-Header') or getRawHeader('x-header')
* would be ['Value A', 'V\xC3\xA5lue B'] and the result of
* getRawHeader('Header2') would be ['Q'].
*
* @param headerName {String} The header name for which to get header values.
* @returns {BinaryString[]} The raw header values (with no charset conversion
* applied).
*/
StructuredHeaders.prototype.getRawHeader = function (headerName) {
return this._rawHeaders.get(headerName.toLowerCase());
};
/**
* Retrieve a structured version of the header.
*
* If there is a registered structured decoder (registration happens via
* headerparser.addStructuredDecoder), then the result of calling that decoder
* on the charset-corrected version of the header is returned. Otherwise, the
* values are charset-corrected and RFC 2047 decoding is applied as if the
* header were an unstructured header.
*
* A substantial set of headers have pre-registed structured decoders, which, in
* some cases, are unable to be overridden due to their importance in the
* functioning of the parser code itself.
*
* @param headerName {String} The header name for which to get the header value.
* @returns The structured header value of the output.
*/
StructuredHeaders.prototype.get = function (headerName) {
// Normalize the header name to lower case
headerName = headerName.toLowerCase();
// First, check the cache for the header value
if (this._cachedHeaders.has(headerName))
return this._cachedHeaders.get(headerName);
// Not cached? Grab it [propagating lack of header to caller]
let headerValue = this._rawHeaders.get(headerName);
if (headerValue === undefined)
return headerValue;
// Convert the header to Unicode
let charset = this.charset;
headerValue = headerValue.map(function (value) {
return headerparser.convert8BitHeader(value, charset);
});
// If there is a structured decoder, use that; otherwise, assume that the
// header is unstructured and only do RFC 2047 conversion
let structured;
try {
structured = headerparser.parseStructuredHeader(headerName, headerValue);
} catch (e) {
structured = headerValue.map(function (value) {
return headerparser.decodeRFC2047Words(value);
});
}
// Cache the result and return it
this._cachedHeaders.set(headerName, structured);
return structured;
};
/**
* Check if the message has the given header.
*
* @param headerName {String} The header name for which to get the header value.
* @returns {Boolean} True if the header is present in this header block.
*/
StructuredHeaders.prototype.has = function (headerName) {
// Check for presence in the raw headers instead of cached headers.
return this._rawHeaders.has(headerName.toLowerCase());
};
// Make a custom iterator. Presently, support for Symbol isn't yet present in
// SpiderMonkey (or V8 for that matter), so type-pun the name for now.
var JS_HAS_SYMBOLS = typeof Symbol === "function";
var ITERATOR_SYMBOL = JS_HAS_SYMBOLS ? Symbol.iterator : "@@iterator";
/**
* An equivalent of Map.@@iterator, applied to the structured header
* representations. This is the function that makes
* for (let [header, value] of headers) work properly.
*/
StructuredHeaders.prototype[ITERATOR_SYMBOL] = function*() {
// Iterate over all the raw headers, and use the cached headers to retrieve
// them.
for (let headerName of this.keys()) {
yield [headerName, this.get(headerName)];
}
};
/**
* An equivalent of Map.forEach, applied to the structured header
* representations.
*
* @param callback {Function(value, name, headers)} The callback to call for
* each header/value combo.
* @param thisarg {Object} The parameter that will be
* the |this| of the callback.
*/
StructuredHeaders.prototype.forEach = function (callback, thisarg) {
for (let [header, value] of this) {
callback.call(thisarg, value, header, this);
}
};
/**
* An equivalent of Map.entries, applied to the structured header
* representations.
*/
StructuredHeaders.prototype.entries =
StructuredHeaders.prototype[Symbol.iterator];
/// This function maps lower case names to a pseudo-preferred spelling.
function capitalize(headerName) {
return headerName.replace(/\b[a-z]/g, function (match) {
return match.toUpperCase();
});
}
/**
* An equivalent of Map.keys, applied to the structured header representations.
*/
StructuredHeaders.prototype.keys = function*() {
for (let name of this._rawHeaders.keys()) {
yield spellings.get(name) || capitalize(name);
}
};
/**
* An equivalent of Map.values, applied to the structured header
* representations.
*/
StructuredHeaders.prototype.values = function* () {
for (let [, value] of this) {
yield value;
}
};
/**
* A MIME parser.
*
* The inputs to the constructor consist of a callback object which receives
* information about the output data and an optional object containing the
* settings for the parser.
*
* The first parameter, emitter, is an object which contains several callbacks.
* Note that any and all of these methods are optional; the parser will not
* crash if one is missing. The callbacks are as follows:
* startMessage()
* Called when the stream to be parsed has started delivering data. This
* will be called exactly once, before any other call.
* endMessage()
* Called after all data has been delivered and the message parsing has
* been completed. This will be called exactly once, after any other call.
* startPart(string partNum, object headers)
* Called after the headers for a body part (including the top-level
* message) have been parsed. The first parameter is the part number (see
* the discussion on part numbering). The second parameter is an instance
* of StructuredHeaders that represents all of the headers for the part.
* endPart(string partNum)
* Called after all of the data for a body part (including sub-parts) has
* been parsed. The first parameter is the part number.
* deliverPartData(string partNum, {string,typedarray} data)
* Called when some data for a body part has been delivered. The first
* parameter is the part number. The second parameter is the data which is
* being delivered; the exact type of this data depends on the options
* used. Note that data is only delivered for leaf body parts.
*
* The second parameter, options, is an optional object containing the options
* for the parser. The following are the options that the parser may use:
* pruneat: <string> [default=""]
* Treat the message as starting at the given part number, so that no parts
* above <string> are returned.
* bodyformat: one of {none, raw, nodecode, decode} [default=nodecode]
* How to return the bodies of parts:
* none: no part data is returned
* raw: the body of the part is passed through raw
* nodecode: the body is passed through without decoding QP/Base64
* decode: quoted-printable and base64 are fully decoded
* strformat: one of {binarystring, unicode, typedarray} [default=binarystring]
* How to treat output strings:
* binarystring: Data is a JS string with chars in the range [\x00-\xff]
* unicode: Data for text parts is converted to UTF-16; data for other
* parts is a typed array buffer, akin to typedarray.
* typedarray: Data is a JS typed array buffer
* charset: <string> [default=""]
* What charset to assume if no charset information is explicitly provided.
* This only matters if strformat is unicode. See above note on charsets
* for more details.
* force-charset: <boolean> [default=false]
* If true, this coerces all types to use the charset option, even if the
* message specifies a different content-type.
* stripcontinuations: <boolean> [default=true]
* If true, then the newlines in headers are removed in the returned
* header objects.
* onerror: <function(thrown error)> [default = nop-function]
* An error function that is called if an emitter callback throws an error.
* By default, such errors are swallowed by the parser. If you want the
* parser itself to throw an error, rethrow it via the onerror function.
*/
function MimeParser(emitter, options) {
/// The actual emitter
this._emitter = emitter;
/// Options for the parser (those listed here are defaults)
this._options = {
pruneat: "",
bodyformat: "nodecode",
strformat: "binarystring",
stripcontinuations: true,
charset: "",
"force-charset": false,
onerror: function swallow(error) {}
};
// Load the options as a copy here (prevents people from changing on the fly).
if (options)
for (var opt in options) {
this._options[opt] = options[opt];
}
// Ensure that the error function is in fact a function
if (typeof this._options.onerror != "function")
throw new Exception("onerror callback must be a function");
// Reset the parser
this.resetParser();
}
/**
* Resets the parser to read a new message. This method need not be called
* immediately after construction.
*/
MimeParser.prototype.resetParser = function () {
/// Current parser state
this._state = PARSING_HEADERS;
/// Input data that needs to be held for buffer conditioning
this._holdData = '';
/// Complete collection of headers (also used to accumulate _headerData)
this._headerData = '';
/// Whether or not emitter.startMessage has been called
this._triggeredCall = false;
/// Splitting input
this._splitRegex = this._handleSplit = undefined;
/// Subparsing
this._subparser = this._subPartNum = undefined;
/// Data that has yet to be consumed by _convertData
this._savedBuffer = '';
/// Convert data
this._convertData = undefined;
/// String decoder
this._decoder = undefined;
};
/**
* Deliver a buffer of data to the parser.
*
* @param buffer {BinaryString} The raw data to add to the message.
*/
MimeParser.prototype.deliverData = function (buffer) {
// In ideal circumstances, we'd like to parse the message all at once. In
// reality, though, data will be coming to us in packets. To keep the amount
// of saved state low, we want to make basic guarantees about how packets get
// delivered. Our basic model is a twist on line-buffering, as the format of
// MIME and messages make it hard to not do so: we can handle multiple lines
// at once. To ensure this, we start by conditioning the packet by
// withholding data to make sure that the internal deliveries have the
// guarantees. This implies that we need to do the following steps:
// 1. We don't know if a `\r' comes from `\r\n' or the old mac line ending
// until we see the next character. So withhold the last `\r'.
// 2. Ensure that every packet ends on a newline. So scan for the end of the
// line and withhold until the \r\n comes through.
// [Note that this means that an input message that uses \r line endings and
// is being passed to us via a line-buffered input is going to have most of
// its data being withhold until the next buffer. Since \r is so uncommon of
// a line ending in modern times, this is acceptable lossage.]
// 3. Eliminate empty packets.
// Add in previously saved data
if (this._holdData) {
buffer = this._holdData + buffer;
this._holdData = '';
}
// Condition the input, so that we get the multiline-buffering mentioned in
// the above comment.
if (buffer.length > 0) {
[buffer, this._holdData] = conditionToEndOnCRLF(buffer);
}
// Ignore 0-length buffers.
if (buffer.length == 0)
return;
// Signal the beginning, if we haven't done so.
if (!this._triggeredCall) {
this._callEmitter("startMessage");
this._triggeredCall = true;
}
// Finally, send it the internal parser.
this._dispatchData("", buffer, true);
}
/**
* Ensure that a set of data always ends in an end-of-line character.
*
* @param buffer {BinaryString} The data with no guarantees about where it ends.
* @returns {BinaryString[]} An array of 2 binary strings where the first string
* ends in a newline and the last string contains the
* text in buffer following the first string.
*/
function conditionToEndOnCRLF(buffer) {
// Find the last occurrence of '\r' or '\n' to split the string. However, we
// don't want to consider '\r' if it is the very last character, as we need
// the next packet to tell if the '\r' is the beginning of a CRLF or a line
// ending by itself.
let lastCR = buffer.lastIndexOf('\r', buffer.length - 2);
let lastLF = buffer.lastIndexOf('\n');
let end = lastLF > lastCR ? lastLF : lastCR;
return [buffer.substring(0, end + 1), buffer.substring(end + 1)];
};
/**
* Tell the parser that all of the data has been delivered.
*
* This will flush all of the internal state of the parser.
*/
MimeParser.prototype.deliverEOF = function () {
// Start of input buffered too long? Call start message now.
if (!this._triggeredCall) {
this._triggeredCall = true;
this._callEmitter("startMessage");
}
// Force a flush of all of the data.
if (this._holdData)
this._dispatchData("", this._holdData, true);
this._dispatchEOF("");
// Signal to the emitter that we're done.
this._callEmitter("endMessage");
};
/**
* Calls a method on the emitter safely.
*
* This method ensures that errors in the emitter call won't cause the parser
* to exit with an error, unless the user wants it to.
*
* @param funcname {String} The function name to call on the emitter.
* @param args... Extra arguments to pass into the emitter callback.
*/
MimeParser.prototype._callEmitter = function (funcname) {
if (this._emitter && funcname in this._emitter) {
let args = Array.prototype.splice.call(arguments, 1);
if (args.length > 0 && this._willIgnorePart(args[0])) {
// partNum is always the first argument, so check to make sure that it
// satisfies our emitter's pruneat requirement.
return;
}
try {
this._emitter[funcname].apply(this._emitter, args);
} catch (e) {
// We ensure that the onerror attribute in options is a function, so this
// is always safe.
this._options.onerror(e);
}
}
};
/**
* Helper function to decide if a part's output will never be seen.
*
* @param part {String} The number of the part.
* @returns {Boolean} True if the emitter is not interested in this part.
*/
MimeParser.prototype._willIgnorePart = function (part) {
if (this._options["pruneat"]) {
let match = this._options["pruneat"];
let start = part.substr(0, match.length);
// It needs to start with and follow with a new part indicator
// (i.e., don't let 10 match with 1, but let 1.1 or 1$ do so)
if (start != match || (match.length < part.length &&
"$.".indexOf(part[match.length]) == -1))
return true;
}
return false;
};
//////////////////////
// MIME parser core //
//////////////////////
// This MIME parser is a stateful parser; handling of the MIME tree is mostly
// done by creating new parsers and feeding data to them manually. In parallel
// to the externally-visible deliverData and deliverEOF, the two methods
// _dispatchData and _dispatchEOF are the internal counterparts that do the
// main work of moving data to where it needs to go; helper functions are used
// to handle translation.
//
// The overall flow of the parser is this. First, it buffers all of the data
// until the dual-CRLF pattern is noticed. Once that is found, it parses the
// entire header chunk at once. As a result of header parsing, the parser enters
// one of three modes for handling data, and uses a special regex to change
// modes and handle state changes. Specific details about the states the parser
// can be in are as follows:
// PARSING_HEADERS: The input buffer is concatenated to the currently-received
// text, which is then searched for the CRLFCRLF pattern. If found, the data
// is split at this boundary; the first chunk is parsed using _parseHeaders,
// and the second chunk will fall through to buffer processing. After
// splitting, the headers are deliverd via the emitter, and _startBody is
// called to set up state for the parser.
// SEND_TO_BLACK_HOLE: All data in the input is ignored.
// SEND_TO_EMITTER: All data is passed into the emitter, if it is desired.
// Data can be optionally converted with this._convertData.
// SEND_TO_SUBPARSER: All data is passed into the subparser's _dispatchData
// method, using _subPartNum as the part number and _subparser as the object
// to call. Data can be optionally converted first with this._convertData.
//
// Additional state modifications can be done using a regex in _splitRegex and
// the callback method this._handleSplit(partNum, regexResult). The _handleSplit
// callback is free to do any modification to the current parser, including
// modifying the _splitRegex value. Packet conditioning guarantees that every
// buffer string passed into _dispatchData will have started immediately after a
// newline character in the fully assembled message.
//
// The this._convertData method, if present, is expected to return an array of
// two values, [{typedarray, string} decoded_buffer, string unused_buffer], and
// has as its arguments (string buffer, bool moreToCome).
//
// The header parsing by itself does very little parsing, only parsing as if all
// headers were unstructured fields. Values are munged so that embedded newlines
// are stripped and the result is also trimmed. Headers themselves are
// canonicalized into lower-case.
// Parser states. See the large comment above.
var PARSING_HEADERS = 1;
var SEND_TO_BLACK_HOLE = 2;
var SEND_TO_EMITTER = 3;
var SEND_TO_SUBPARSER = 4;
/**
* Main dispatch for incoming packet data.
*
* The incoming data needs to have been sanitized so that each packet begins on
* a newline boundary. The part number for the current parser also needs to be
* passed in. The checkSplit parameter controls whether or not the data in
* buffer needs to be checked against _splitRegex; this is used internally for
* the mechanics of splitting and should otherwise always be true.
*
* @param partNum {String} The part number being currently parsed.
* @param buffer {BinaryString} The text (conditioned as mentioned above) to
* pass to the parser.
* @param checkSplit {Boolean} If true, split the text using _splitRegex.
* This is set to false internally to handle
* low-level splitting details.
*/
MimeParser.prototype._dispatchData = function (partNum, buffer, checkSplit) {
// Are we parsing headers?
if (this._state == PARSING_HEADERS) {
this._headerData += buffer;
// Find the end of the headers--either it's a CRLF at the beginning (in
// which case we have no headers), or it's a pair of CRLFs.
let result = /(?:^(?:\r\n|[\r\n]))|(\r\n|[\r\n])\1/.exec(this._headerData);
if (result != null) {
// If we found the end of headers, split the data at this point and send
// the stuff after the double-CRLF into the later body parsing.
let headers = this._headerData.substr(0, result.index);
buffer = this._headerData.substring(result.index + result[0].length);
this._headerData = headers;
this._headers = this._parseHeaders();
this._callEmitter("startPart", partNum, this._headers);
this._startBody(partNum);
} else {
return;
}
}
// We're in the middle of the body. Start by testing the split regex, to see
// if there are many things that need to be done.
if (checkSplit && this._splitRegex) {
let splitResult = this._splitRegex.exec(buffer);
if (splitResult) {
// Pass the text before the split through the current state.
let start = splitResult.index, len = splitResult[0].length;
if (start > 0)
this._dispatchData(partNum, buffer.substr(0, start), false);
// Tell the handler that we've seen the split. Note that this can change
// any method on `this'.
this._handleSplit(partNum, splitResult);
// Send the rest of the data to where it needs to go. There could be more
// splits in the data, so watch out!
buffer = buffer.substring(start + len);
if (buffer.length > 0)
this._dispatchData(partNum, buffer, true);
return;
}
}
// Where does the data go?
if (this._state == SEND_TO_BLACK_HOLE) {
// Don't send any data when going to the black hole.
return;
} else if (this._state == SEND_TO_EMITTER) {
// Don't pass body data if the format is to be none
let passData = this._options["bodyformat"] != "none";
if (!passData || this._willIgnorePart(partNum))
return;
buffer = this._applyDataConversion(buffer, this._options["strformat"]);
if (buffer.length > 0)
this._callEmitter("deliverPartData", partNum, buffer);
} else if (this._state == SEND_TO_SUBPARSER) {
buffer = this._applyDataConversion(buffer, "binarystring");
if (buffer.length > 0)
this._subparser._dispatchData(this._subPartNum, buffer, true);
}
};
/**
* Output data using the desired output format, saving data if data conversion
* needs extra data to be saved.
*
* @param buf {BinaryString} The data to be sent to the output.
* @param type {String} The type of the data to output. Valid values are
* the same as the strformat option.
* @returns Coerced and converted data that can be sent to the emitter or
* subparser.
*/
MimeParser.prototype._applyDataConversion = function (buf, type) {
// If we need to convert data, do so.
if (this._convertData) {
// Prepend leftover data from the last conversion.
buf = this._savedBuffer + buf;
[buf, this._savedBuffer] = this._convertData(buf, true);
}
return this._coerceData(buf, type, true);
};
/**
* Coerce the input buffer into the given output type.
*
* @param buffer {BinaryString|Uint8Array} The data to be converted.
* @param type {String} The type to convert the data to.
* @param more {boolean} If true, this function will never be
* called again.
* @returns {BinaryString|String|Uint8Array} The desired output format.
*/
/// Coerces the buffer (a string or typedarray) into a given type
MimeParser.prototype._coerceData = function (buffer, type, more) {
if (typeof buffer == "string") {
// string -> binarystring is a nop
if (type == "binarystring")
return buffer;
// Either we're going to array or unicode. Both people need the array
var typedarray = mimeutils.stringToTypedArray(buffer);
// If it's unicode, do the coercion from the array
// If its typedarray, just return the synthesized one
return type == "unicode" ? this._coerceData(typedarray, "unicode", more)
: typedarray;
} else if (type == "binarystring") {
// Doing array -> binarystring
return mimeutils.typedArrayToString(buffer);
} else if (type == "unicode") {
// Doing array-> unicode: Use the decoder set up earlier to convert
if (this._decoder)
return this._decoder.decode(buffer, {stream: more});
// If there is no charset, just return the typed array instead.
return buffer;
}
throw new Error("Invalid type: " + type);
};
/**
* Signal that no more data will be dispatched to this parser.
*
* @param partNum {String} The part number being currently parsed.
*/
MimeParser.prototype._dispatchEOF = function (partNum) {
if (this._state == PARSING_HEADERS) {
// Unexpected EOF in headers. Parse them now and call startPart/endPart
this._headers = this._parseHeaders();
this._callEmitter("startPart", partNum, this._headers);
} else if (this._state == SEND_TO_SUBPARSER) {
// Pass in any lingering data
if (this._convertData && this._savedBuffer)
this._subparser._dispatchData(this._subPartNum,
this._convertData(this._savedBuffer, false)[0], true);
this._subparser._dispatchEOF(this._subPartNum);
// Clean up after ourselves
this._subparser = null;
} else if (this._convertData && this._savedBuffer) {
// Convert lingering data
let [buffer, ] = this._convertData(this._savedBuffer, false);
buffer = this._coerceData(buffer, this._options["strformat"], false);
if (buffer.length > 0)
this._callEmitter("deliverPartData", partNum, buffer);
}
// We've reached EOF for this part; tell the emitter
this._callEmitter("endPart", partNum);
};
/**
* Produce a dictionary of all headers as if they were unstructured fields.
*
* @returns {StructuredHeaders} The structured header objects for the header
* block.
*/
MimeParser.prototype._parseHeaders = function () {
let headers = new StructuredHeaders(this._headerData, this._options);
// Fill the headers.contentType parameter of headers.
let contentType = headers.get('Content-Type');
if (typeof contentType === "undefined") {
contentType = headerparser.parseStructuredHeader('Content-Type',
this._defaultContentType || 'text/plain');
Object.defineProperty(headers, "contentType", {
get: function () { return contentType; }
});
} else {
Object.defineProperty(headers, "contentType", { configurable: false });
}
// Find the charset for the current part. If the user requested a forced
// conversion, use that first. Otherwise, check the content-type for one and
// fallback to a default if it is not present.
let charset = '';
if (this._options["force-charset"])
charset = this._options["charset"];
else if (contentType.has("charset"))
charset = contentType.get("charset");
else
charset = this._options["charset"];
headers.charset = charset;
// Retain a copy of the charset so that users don't override our decision for
// decoding body parts.
this._charset = charset;
return headers;
};
/**
* Initialize the parser state for the body of this message.
*
* @param partNum {String} The part number being currently parsed.
*/
MimeParser.prototype._startBody = function Parser_startBody(partNum) {
let contentType = this._headers.contentType;
// Should the bodyformat be raw, we just want to pass through all data without
// trying to interpret it.
if (this._options["bodyformat"] == "raw" &&
partNum == this._options["pruneat"]) {
this._state = SEND_TO_EMITTER;
return;
}
// The output depents on the content-type. Basic rule of thumb:
// 1. Discrete media types (text, video, audio, image, application) are passed
// through with no alterations beyond Content-Transfer-Encoding unpacking.
// 2. Everything with a media type of multipart is treated the same.
// 3. Any message/* type that acts like a mail message (rfc822, news, global)
// is parsed as a header/body pair again. Most of the other message/* types
// have similar structures, but they don't have cascading child subparts,
// so it's better to pass their entire contents to the emitter and let the
// consumer deal with them.
// 4. For untyped data, there needs to be no Content-Type header. This helps
// avoid false positives.
if (contentType.mediatype == 'multipart') {
// If there's no boundary type, everything will be part of the prologue of
// the multipart message, so just feed everything into a black hole.
if (!contentType.has('boundary')) {
this._state = SEND_TO_BLACK_HOLE;
return;
}
// The boundary of a multipart message needs to start with -- and be at the
// beginning of the line. If -- is after the boundary, it represents the
// terminator of the multipart. After the line, there may be only whitespace
// and then the CRLF at the end. Since the CRLFs in here are necessary for
// distinguishing the parts, they are not included in the subparts, so we
// need to capture them in the regex as well to prevent them leaking out.
this._splitRegex = new RegExp('(\r\n|[\r\n]|^)--' +
contentType.get('boundary').replace(/[\\^$*+?.()|{}[\]]/g, '\\$&') +
'(--)?[ \t]*(?:\r\n|[\r\n]|$)');
this._handleSplit = this._whenMultipart;
this._subparser = new MimeParser(this._emitter, this._options);
// multipart/digest defaults to message/rfc822 instead of text/plain
if (contentType.subtype == "digest")
this._subparser._defaultContentType = "message/rfc822";
// All text before the first boundary and after the closing boundary are
// supposed to be ignored ("must be ignored", according to RFC 2046 §5.1.1);
// in accordance with these wishes, ensure they don't get passed to any
// deliverPartData.
this._state = SEND_TO_BLACK_HOLE;
// Multipart MIME messages stipulate that the final CRLF before the boundary
// delimiter is not matched. When the packet ends on a CRLF, we don't know
// if the next text could be the boundary. Therefore, we need to withhold
// the last line of text to be sure of what's going on. The _convertData is
// how we do this, even though we're not really converting any data.
this._convertData = function mpart_no_leak_crlf(buffer, more) {
let splitPoint = buffer.length;
if (more) {
if (buffer.charAt(splitPoint - 1) == '\n')
splitPoint--;
if (splitPoint >= 0 && buffer.charAt(splitPoint - 1) == '\r')
splitPoint--;
}
let res = conditionToEndOnCRLF(buffer.substring(0, splitPoint));
let preLF = res[0];
let rest = res[1];
return [preLF, rest + buffer.substring(splitPoint)];
}
} else if (contentType.type == 'message/rfc822' ||
contentType.type == 'message/global' ||
contentType.type == 'message/news') {
// The subpart is just another header/body pair that goes to EOF, so just
// return the parse from that blob
this._state = SEND_TO_SUBPARSER;
this._subPartNum = partNum + "$";
this._subparser = new MimeParser(this._emitter, this._options);
// So, RFC 6532 happily allows message/global types to have CTE applied.
// This means that subparts would need to be decoded to determine their
// contents properly. There seems to be some evidence that message/rfc822
// that is illegally-encoded exists in the wild, so be lenient and decode
// for any message/* type that gets here.
let cte = this._extractHeader('content-transfer-encoding', '');
if (cte in ContentDecoders)
this._convertData = ContentDecoders[cte];
} else {
// Okay, we just have to feed the data into the output
this._state = SEND_TO_EMITTER;
if (this._options["bodyformat"] == "decode") {
// If we wish to decode, look it up in one of our decoders.
let cte = this._extractHeader('content-transfer-encoding', '');
if (cte in ContentDecoders)
this._convertData = ContentDecoders[cte];
}
}
// Set up the encoder for charset conversions; only do this for text parts.
// Other parts are almost certainly binary, so no translation should be
// applied to them.
if (this._options["strformat"] == "unicode" &&
contentType.mediatype == "text") {
// If the charset is nonempty, initialize the decoder
if (this._charset !== "") {
this._decoder = new TextDecoder(this._charset);
} else {
// There's no charset we can use for decoding, so pass through as an
// identity encoder or otherwise this._coerceData will complain.
this._decoder = {
decode: function identity_decoder(buffer) {
return MimeParser.prototype._coerceData(buffer, "binarystring", true);
}
};
}
} else {
this._decoder = null;
}
};
// Internal split handling for multipart messages.
/**
* When a multipary boundary is found, handle the process of managing the
* subparser state. This is meant to be used as a value for this._handleSplit.
*
* @param partNum {String} The part number being currently parsed.
* @param lastResult {Array} The result of the regular expression match.
*/
MimeParser.prototype._whenMultipart = function (partNum, lastResult) {
// Fix up the part number (don't do '' -> '.4' and don't do '1' -> '14')
if (partNum != "") partNum += ".";
if (!this._subPartNum) {
// No count? This means that this is the first time we've seen the boundary,
// so do some initialization for later here.
this._count = 1;
} else {
// If we did not match a CRLF at the beginning of the line, strip CRLF from
// the saved buffer. We do this in the else block because it is not
// necessary for the prologue, since that gets ignored anyways.
if (this._savedBuffer != '' && lastResult[1] === '') {
let useEnd = this._savedBuffer.length - 1;
if (this._savedBuffer[useEnd] == '\n')
useEnd--;
if (useEnd >= 0 && this._savedBuffer[useEnd] == '\r')
useEnd--;
this._savedBuffer = this._savedBuffer.substring(0, useEnd + 1);
}
// If we have saved data and we matched a CRLF, pass the saved data in.
if (this._savedBuffer != '')
this._subparser._dispatchData(this._subPartNum, this._savedBuffer, true);
// We've seen the boundary at least once before, so this must end a subpart.
// Tell that subpart that it has reached EOF.
this._subparser._dispatchEOF(this._subPartNum);
}
this._savedBuffer = '';
// The regex feeder has a capture on the (--)?, so if its result is present,
// then we have seen the terminator. Alternatively, the message may have been
// mangled to exclude the terminator, so also check if EOF has occurred.
if (lastResult[2] == undefined) {
this._subparser.resetParser();
this._state = SEND_TO_SUBPARSER;
this._subPartNum = partNum + this._count;
this._count += 1;
} else {
// Ignore the epilogue
this._splitRegex = null;
this._state = SEND_TO_BLACK_HOLE;
}
};
/**
* Return the structured header from the current header block, or a default if
* it is not present.
*
* @param name {String} The header name to get.
* @param dflt {String} The default MIME value of the header.
* @returns The structured representation of the header.
*/
MimeParser.prototype._extractHeader = function (name, dflt) {
name = name.toLowerCase(); // Normalize name
return this._headers.has(name) ? this._headers.get(name) :
headerparser.parseStructuredHeader(name, [dflt]);
};
var ContentDecoders = {};
ContentDecoders['quoted-printable'] = mimeutils.decode_qp;
ContentDecoders['base64'] = mimeutils.decode_base64;
return MimeParser;
});
def('headeremitter', function(require) {
/**
* This module implements the code for emitting structured representations of
* MIME headers into their encoded forms. The code here is a companion to,
* but completely independent of, jsmime.headerparser: the structured
* representations that are used as input to the functions in this file are the
* same forms that would be parsed.
*/
"use strict";
var mimeutils = require('./mimeutils');
// Get the default structured encoders and add them to the map
var structuredHeaders = require('./structuredHeaders');
var encoders = new Map();
var preferredSpellings = structuredHeaders.spellings;
for (let [header, encoder] of structuredHeaders.encoders) {
addStructuredEncoder(header, encoder);
}
/// Clamp a value in the range [min, max], defaulting to def if it is undefined.
function clamp(value, min, max, def) {
if (value === undefined)
return def;
if (value < min)
return min;
if (value > max)
return max;
return value;
}
/**
* An object that can assemble structured header representations into their MIME
* representation.
*
* The character-counting portion of this class operates using individual JS
* characters as its representation of logical character, which is not the same
* as the number of octets used as UTF-8. If non-ASCII characters are to be
* included in headers without some form of encoding, then care should be taken
* to set the maximum line length to account for the mismatch between character
* counts and octet counts: the maximum line is 998 octets, which could be as
* few as 332 JS characters (non-BMP characters, although they take up 4 octets
* in UTF-8, count as 2 in JS strings).
*
* This code takes care to only insert line breaks at the higher-level breaking
* points in a header (as recommended by RFC 5322), but it may need to resort to
* including them more aggressively if this is not possible. If even aggressive
* line-breaking cannot allow a header to be emitted without violating line
* length restrictions, the methods will throw an exception to indicate this
* situation.
*
* In general, this code does not attempt to modify its input; for example, it
* does not attempt to change the case of any input characters, apply any
* Unicode normalization algorithms, or convert email addresses to ACE where
* applicable. The biggest exception to this rule is that most whitespace is
* collapsed to a single space, even in unstructured headers, while most leading
* and trailing whitespace is trimmed from inputs.
*
* @param {StreamHandler} handler The handler to which all output is sent.
* @param {Function(String)} handler.deliverData Receives encoded data.
* @param {Function()} handler.deliverEOF Sent when all text is sent.
* @param {Object} options Options for the emitter.
* @param [options.softMargin=78] {30 <= Integer <= 900}
* The ideal maximum number of logical characters to include in a line, not
* including the final CRLF pair. Lines may exceed this margin if parameters
* are excessively long.
* @param [options.hardMargin=332] {softMargin <= Integer <= 998}
* The maximum number of logical characters that can be included in a line,
* not including the final CRLF pair. If this count would be exceeded, then
* an error will be thrown and encoding will not be possible.
* @param [options.useASCII=true] {Boolean}
* If true, then RFC 2047 and RFC 2231 encoding of headers will be performed
* as needed to retain headers as ASCII.
*/
function HeaderEmitter(handler, options) {
/// The inferred value of options.useASCII
this._useASCII = options.useASCII === undefined ? true : options.useASCII;
/// The handler to use.
this._handler = handler;
/**
* The current line being built; note that we may insert a line break in the
* middle to keep under the maximum line length.
*
* @type String
* @private
*/
this._currentLine = "";
// Our bounds for soft and margins are not completely arbitrary. The minimum
// amount we need to encode is 20 characters, which can encode a single
// non-BMP character with RFC 2047. The value of 30 is chosen to give some
// breathing room for delimiters or other unbreakable characters. The maximum
// length is 998 octets, per RFC 5322; soft margins are slightly lower to
// allow for breathing room as well. The default of 78 for the soft margin is
// recommended by RFC 5322; the default of 332 for the hard margin ensures
// that UTF-8 encoding the output never violates the 998 octet limit.
this._softMargin = clamp(options.softMargin, 30, 900, 78);
this._hardMargin = clamp(options.hardMargin, this._softMargin, 998, 332);
/**
* The index of the last preferred breakable position in the current line.
*
* @type Integer
* @private
*/
this._preferredBreakpoint = 0;
}
///////////////////////
// Low-level methods //
///////////////////////
// Explanation of the emitter internals:
// RFC 5322 requires that we wrap our lines, ideally at 78 characters and at
// least by 998 octets. We can't wrap in arbitrary places, but wherever CFWS is
// valid... and ideally wherever clients are likely to expect it. In theory, we
// can break between every token (this is how RFC 822 operates), but, in RFC
// 5322, many of those breaks are relegated to obsolete productions, mostly
// because it is common to not properly handle breaks in those locations.
//
// So how do we do line breaking? The algorithm we implement is greedy, to
// simplify implementation. There are two margins: the soft margin, which we
// want to keep within, and the hard margin, which we absolutely have to keep
// within. There are also two kinds of break points: preferred and emergency.
// As long as we keep the line within the hard margin, we will only break at
// preferred breakpoints; emergency breakpoints are only used if we would
// otherwise exceed the hard margin.
//
// For illustration, here is an example header and where these break points are
// located:
//
// To: John "The Rock" Smith <jsmith@a.long.domain.invalid>
// Preferred: ^ ^ ^
// Emergency: ^ ^ ^ ^^ ^ ^ ^ ^ ^
//
// Preferred breakpoints are indicated by setting the mayBreakAfter parameter of
// addText to true, while emergency breakpoints are set after every token passed
// into addText. This is handled implicitly by only adding text to _currentLine
// if it ends in an emergency breakpoint.
//
// Internally, the code keeps track of margins by use of two variables. The
// _softMargin and _hardMargin variables encode the positions at which code must
// absolutely break, and are set up from the initial options parameter. Breaking
// happens when _currentLine.length approaches these values, as mentioned above.
/**
* Send a header line consisting of the first N characters to the handler.
*
* If the count parameter is missing, then we presume that the current header
* value being emitted is done and therefore we should not send a continuation
* space. Otherwise, we presume that we're still working, so we will send the
* continuation space.
*
* @private
* @param [count] {Integer} The number of characters in the current line to
* include before wrapping.
*/
HeaderEmitter.prototype._commitLine = function (count) {
let isContinuing = typeof count !== "undefined";
// Split at the point, and lop off whitespace immediately before and after.
if (isContinuing) {
var firstN = this._currentLine.slice(0, count).trimRight();
var lastN = this._currentLine.slice(count).trimLeft();
} else {
var firstN = this._currentLine.trimRight();
var lastN = "";
}
// How many characters do we need to shift preferred/emergency breakpoints?
let shift = this._currentLine.length - lastN.length;
// Send the line plus the final CRLF.
this._handler.deliverData(firstN + '\r\n');
// Fill the start of the line with the new data.
this._currentLine = lastN;
// If this is a continuation, add an extra space at the beginning of the line.
// Adjust the breakpoint shift amount as well.
if (isContinuing) {
this._currentLine = ' ' + this._currentLine;
shift++;
}
// We will always break at a point at or after the _preferredBreakpoint, if it
// exists, so this always gets reset to 0.
this._preferredBreakpoint = 0;
};
/**
* Reserve at least length characters in the current line. If there aren't
* enough characters, insert a line break.
*
* @private
* @param length {Integer} The number of characters to reserve space for.
* @return {Boolean} Whether or not there is enough space for length characters.
*/
HeaderEmitter.prototype._reserveTokenSpace = function (length) {
// We are not going to do a sanity check that length is within the wrap
// margins. The rationale is that this lets code simply call this function to
// force a higher-level line break than normal preferred line breaks (see
// addAddress for an example use). The text that would be added may need to be
// itself broken up, so it might not need all the length anyways, but it
// starts the break already.
// If we have enough space, we don't need to do anything.
if (this._currentLine.length + length <= this._softMargin)
return true;
// If we have a preferred breakpoint, commit the line at that point, and see
// if that is sufficient line-breaking.
if (this._preferredBreakpoint > 0) {
this._commitLine(this._preferredBreakpoint);
if (this._currentLine.length + length <= this._softMargin)
return true;
}
// At this point, we can no longer keep within the soft margin. Let us see if
// we can fit within the hard margin.
if (this._currentLine.length + length <= this._hardMargin) {
return true;
}
// Adding the text to length would violate the hard margin as well. Break at
// the last emergency breakpoint.
if (this._currentLine.length > 0) {
this._commitLine(this._currentLine.length);
}
// At this point, if there is still insufficient room in the hard margin, we
// can no longer do anything to encode this word. Bail.
return this._currentLine.length + length <= this._hardMargin;
};
/**
* Adds a block of text to the current header, inserting a break if necessary.
* If mayBreakAfter is true and text does not end in whitespace, a single space
* character may be added to the output. If the text could not be added without
* violating line length restrictions, an error is thrown instead.
*
* @protected
* @param {String} text The text to add to the output.
* @param {Boolean} mayBreakAfter If true, the end of this text is a preferred
* breakpoint.
*/
HeaderEmitter.prototype.addText = function (text, mayBreakAfter) {
// Try to reserve space for the tokens. If we can't, give up.
if (!this._reserveTokenSpace(text.length))
throw new Error("Cannot encode " + text + " due to length.");
this._currentLine += text;
if (mayBreakAfter) {
// Make sure that there is an extra space if text could break afterwards.
this._preferredBreakpoint = this._currentLine.length;
if (text[text.length - 1] != ' ') {
this._currentLine += ' ';
}
}
};
/**
* Adds a block of text that may need quoting if it contains some character in
* qchars. If it is already quoted, no quoting will be applied. If the text
* cannot be added without violating maximum line length, an error is thrown
* instead.
*
* @protected
* @param {String} text The text to add to the output.
* @param {String} qchars The set of characters that cannot appear
* outside of a quoted string.
* @param {Boolean} mayBreakAfter If true, the end of this text is a preferred
* breakpoint.
*/
HeaderEmitter.prototype.addQuotable = function (text, qchars, mayBreakAfter) {
// No text -> no need to be quoted (prevents strict warning errors).
if (text.length == 0)
return;
// Figure out if we need to quote the string. Don't quote a string which
// already appears to be quoted.
let needsQuote = false;
if (!(text[0] == '"' && text[text.length - 1] == '"') && qchars != '') {
for (let i = 0; i < text.length; i++) {
if (qchars.includes(text[i])) {
needsQuote = true;
break;
}
}
}
if (needsQuote)
text = '"' + text.replace(/["\\]/g, "\\$&") + '"';
this.addText(text, mayBreakAfter);
};
/**
* Adds a block of text that corresponds to the phrase production in RFC 5322.
* Such text is a sequence of atoms, quoted-strings, or RFC-2047 encoded-words.
* This method will preprocess input to normalize all space sequences to a
* single space. If the text cannot be added without violating maximum line
* length, an error is thrown instead.
*
* @protected
* @param {String} text The text to add to the output.
* @param {String} qchars The set of characters that cannot appear
* outside of a quoted string.
* @param {Boolean} mayBreakAfter If true, the end of this text is a preferred
* breakpoint.
*/
HeaderEmitter.prototype.addPhrase = function (text, qchars, mayBreakAfter) {
// Collapse all whitespace spans into a single whitespace node.
text = text.replace(/[ \t\r\n]+/g, " ");
// If we have non-ASCII text, encode it using RFC 2047.
if (this._useASCII && nonAsciiRe.test(text)) {
this.encodeRFC2047Phrase(text, mayBreakAfter);
return;
}
// If quoting the entire string at once could fit in the line length, then do
// so. The check here is very loose, but this will inform is if we are going
// to definitely overrun the soft margin.
if ((this._currentLine.length + text.length) < this._softMargin) {
try {
this.addQuotable(text, qchars, mayBreakAfter);
// If we don't have a breakpoint, and the text is encoded as a sequence of
// atoms (and not a quoted-string), then make the last space we added a
// breakpoint, regardless of the mayBreakAfter setting.
if (this._preferredBreakpoint == 0 && text.includes(" ")) {
if (this._currentLine[this._currentLine.length - 1] != '"')
this._preferredBreakpoint = this._currentLine.lastIndexOf(" ");
}
return;
} catch (e) {
// If we get an error at this point, we failed to add the quoted string
// because the string was too long. Fall through to the case where we know
// that the input was too long to begin with.
}
}
// If the text is too long, split the quotable string at space boundaries and
// add each word invidually. If we still can't add all those words, there is
// nothing that we can do.
let words = text.split(' ');
for (let i = 0; i < words.length; i++) {
this.addQuotable(words[i], qchars,
i == words.length - 1 ? mayBreakAfter : true);
}
};
/// A regular expression for characters that need to be encoded.
var nonAsciiRe = /[^\x20-\x7e]/;
/// The beginnings of RFC 2047 encoded-word
var b64Prelude = "=?UTF-8?B?", qpPrelude = "=?UTF-8?Q?";
/// A list of ASCII characters forbidden in RFC 2047 encoded-words
var qpForbidden = "=?_()\",";
var hexString = "0123456789abcdef";
/**
* Add a block of text as a single RFC 2047 encoded word. This does not try to
* split words if they are too long.
*
* @private
* @param {Uint8Array} encodedText The octets to encode.
* @param {Boolean} useQP If true, use quoted-printable; if false,
* use base64.
* @param {Boolean} mayBreakAfter If true, the end of this text is a
* preferred breakpoint.
*/
HeaderEmitter.prototype._addRFC2047Word = function (encodedText, useQP,
mayBreakAfter) {
let binaryString = mimeutils.typedArrayToString(encodedText);
if (useQP) {
var token = qpPrelude;
for (let i = 0; i < encodedText.length; i++) {
if (encodedText[i] < 0x20 || encodedText[i] >= 0x7F ||
qpForbidden.includes(binaryString[i])) {
let ch = encodedText[i];
token += "=" + hexString[(ch & 0xf0) >> 4] + hexString[ch & 0x0f];
} else if (binaryString[i] == " ") {
token += "_";
} else {
token += binaryString[i];
}
}
token += "?=";
} else {
var token = b64Prelude + btoa(binaryString) + "?=";
}
this.addText(token, mayBreakAfter);
};
/**
* Add a block of text as potentially several RFC 2047 encoded-word tokens.
*
* @protected
* @param {String} text The text to add to the output.
* @param {Boolean} mayBreakAfter If true, the end of this text is a preferred
* breakpoint.
*/
HeaderEmitter.prototype.encodeRFC2047Phrase = function (text, mayBreakAfter) {
// Start by encoding the text into UTF-8 directly.
let encodedText = new TextEncoder("UTF-8").encode(text);
// Make sure there's enough room for a single token.
let minLineLen = b64Prelude.length + 10; // Eight base64 characters plus ?=
if (!this._reserveTokenSpace(minLineLen)) {
this._commitLine(this._currentLine.length);
}
// Try to encode as much UTF-8 text as possible in each go.
let b64Len = 0, qpLen = 0, start = 0;
let maxChars = (this._softMargin - this._currentLine.length) -
(b64Prelude.length + 2);
for (let i = 0; i < encodedText.length; i++) {
let b64Inc = 0, qpInc = 0;
// The length we need for base64 is ceil(length / 3) * 4...
if ((i - start) % 3 == 0)
b64Inc += 4;
// The length for quoted-printable is 3 chars only if encoded
if (encodedText[i] < 0x20 || encodedText[i] >= 0x7f ||
qpForbidden.includes(String.fromCharCode(encodedText[i]))) {
qpInc = 3;
} else {
qpInc = 1;
}
if (b64Len + b64Inc > maxChars && qpLen + qpInc > maxChars) {
// Oops, we have too many characters! We need to encode everything through
// the current character. However, we can't split in the middle of a
// multibyte character. In UTF-8, characters that start with 10xx xxxx are
// the middle of multibyte characters, so backtrack until the start
// character is legal.
while ((encodedText[i] & 0xC0) == 0x80)
--i;
// Add this part of the word and then make a continuation.
this._addRFC2047Word(encodedText.subarray(start, i), b64Len >= qpLen,
true);
// Reset the array for parsing.
start = i;
--i; // Reparse this character as well
b64Len = qpLen = 0;
maxChars = this._softMargin - b64Prelude.length - 3;
} else {
// Add the counts for the current variable to the count to encode.
b64Len += b64Inc;
qpLen += qpInc;
}
}
// Add the entire array at this point.
this._addRFC2047Word(encodedText.subarray(start), b64Len >= qpLen,
mayBreakAfter);
};
////////////////////////
// High-level methods //
////////////////////////
/**
* Add the header name, with the colon and trailing space, to the output.
*
* @public
* @param {String} name The name of the header.
*/
HeaderEmitter.prototype.addHeaderName = function (name) {
this._currentLine = this._currentLine.trimRight();
if (this._currentLine.length > 0) {
this._commitLine();
}
this.addText(name + ": ", false);
};
/**
* Add a header and its structured value to the output.
*
* The name can be any case-insensitive variant of a known structured header;
* the output will include the preferred name of the structure instead of the
* case put into the name. If no structured encoder can be found, and the input
* value is a string, then the header is assumed to be unstructured and the
* value is added as if {@link addUnstructured} were called.
*
* @public
* @param {String} name The name of the header.
* @param value The structured value of the header.
*/
HeaderEmitter.prototype.addStructuredHeader = function (name, value) {
let lowerName = name.toLowerCase();
if (encoders.has(lowerName)) {
this.addHeaderName(preferredSpellings.get(lowerName));
encoders.get(lowerName).call(this, value);
} else if (typeof value === "string") {
// Assume it's an unstructured header.
// All-lower-case-names are ugly, so capitalize first letters.
name = name.replace(/(^|-)[a-z]/g, function(match) {
return match.toUpperCase();
});
this.addHeaderName(name);
this.addUnstructured(value);
} else {
throw new Error("Unknown header " + name);
}
};
/**
* Add a single address to the header. The address is an object consisting of a
* possibly-empty display name and an email address.
*
* @public
* @param Address addr The address to be added.
* @param {String} addr.name The (possibly-empty) name of the address to add.
* @param {String} addr.email The email of the address to add.
* @see headerparser.parseAddressingHeader
*/
HeaderEmitter.prototype.addAddress = function (addr) {
// If we have a display name, add that first.
if (addr.name) {
// This is a simple estimate that keeps names on one line if possible.
this._reserveTokenSpace(addr.name.length + addr.email.length + 3);
this.addPhrase(addr.name, ",()<>[]:;@.\"", true);
// If we don't have an email address, don't write out the angle brackets for
// the address. It's already an abnormal situation should this appear, and
// this has better round-tripping properties.
if (!addr.email)
return;
this.addText("<", false);
}
// Find the local-part and domain of the address, since the local-part may
// need to be quoted separately. Note that the @ goes to the domain, so that
// the local-part may be quoted if it needs to be.
let at = addr.email.lastIndexOf("@");
let localpart = "", domain = ""
if (at == -1)
localpart = addr.email;
else {
localpart = addr.email.slice(0, at);
domain = addr.email.slice(at);
}
this.addQuotable(localpart, "()<>[]:;@\\,\" !", false);
this.addText(domain + (addr.name ? ">" : ""), false);
};
/**
* Add an array of addresses and groups to the output. Such an array may be
* found as the output of {@link headerparser.parseAddressingHeader}. Each
* element is either an address (an object with properties name and email), or a
* group (an object with properties name and group).
*
* @public
* @param {(Address|Group)[]} addrs A collection of addresses to add.
* @param {String} addrs[i].name The (possibly-empty) name of the
* address or the group to add.
* @param {String} [addrs[i].email] The email of the address to add.
* @param {Address[]} [addrs[i].group] A list of email addresses in the group.
* @see HeaderEmitter.addAddress
* @see headerparser.parseAddressingHeader
*/
HeaderEmitter.prototype.addAddresses = function (addresses) {
let needsComma = false;
for (let addr of addresses) {
// Add a comma if this is not the first element.
if (needsComma)
this.addText(", ", true);
needsComma = true;
if ("email" in addr) {
this.addAddress(addr);
} else {
// A group has format name: member, member;
// Note that we still add a comma after the group is completed.
this.addPhrase(addr.name, ",()<>[]:;@.\"", false);
this.addText(":", true);
this.addAddresses(addr.group);
this.addText(";", true);
}
}
};
/**
* Add an unstructured header value to the output. This effectively means only
* inserting line breaks were necessary, and using RFC 2047 encoding where
* necessary.
*
* @public
* @param {String} text The text to add to the output.
*/
HeaderEmitter.prototype.addUnstructured = function (text) {
if (text.length == 0)
return;
// Unstructured text is basically a phrase that can't be quoted. So, if we
// have nothing in qchars, nothing should be quoted.
this.addPhrase(text, "", false);
};
/** RFC 822 labels for days of the week. */
var kDaysOfWeek = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
/**
* Formatting helper to output numbers between 0-9 as 00-09 instead.
*/
function padTo2Digits(num) {
return num < 10 ? "0" + num : num.toString();
}
/**
* Add a date/time field to the output, using the JS date object as the time
* representation. The value will be output using the timezone offset of the
* date object, which is usually the timezone of the user (modulo timezone and
* DST changes).
*
* Note that if the date is an invalid date (its internal date parameter is a
* NaN value), this method throws an error instead of generating an invalid
* string.
*
* @public
* @param {Date} date The date to be added to the output string.
*/
HeaderEmitter.prototype.addDate = function (date) {
// Rather than make a header plastered with NaN values, throw an error on
// specific invalid dates.
if (isNaN(date.getTime()))
throw new Error("Cannot encode an invalid date");
// RFC 5322 says years can't be before 1900. The after 9999 is a bit that
// derives from the specification saying that years have 4 digits.
if (date.getFullYear() < 1900 || date.getFullYear() > 9999)
throw new Error("Date year is out of encodable range");
// Start by computing the timezone offset for a day. We lack a good format, so
// the the 0-padding is done by hand. Note that the tzoffset we output is in
// the form ±hhmm, so we need to separate the offset (in minutes) into an hour
// and minute pair.
let tzOffset = date.getTimezoneOffset();
let tzOffHours = Math.abs(Math.trunc(tzOffset / 60));
let tzOffMinutes = Math.abs(tzOffset) % 60;
let tzOffsetStr = (tzOffset > 0 ? "-" : "+") +
padTo2Digits(tzOffHours) + padTo2Digits(tzOffMinutes);
// Convert the day-time figure into a single value to avoid unwanted line
// breaks in the middle.
let dayTime = [
kDaysOfWeek[date.getDay()] + ",",
date.getDate(),
mimeutils.kMonthNames[date.getMonth()],
date.getFullYear(),
padTo2Digits(date.getHours()) + ":" +
padTo2Digits(date.getMinutes()) + ":" +
padTo2Digits(date.getSeconds()),
tzOffsetStr
].join(" ");
this.addText(dayTime, false);
};
/**
* Signal that the current header has been finished encoding.
*
* @public
* @param {Boolean} deliverEOF If true, signal to the handler that no more text
* will be arriving.
*/
HeaderEmitter.prototype.finish = function (deliverEOF) {
this._commitLine();
if (deliverEOF)
this._handler.deliverEOF();
};
/**
* Make a streaming header emitter that outputs on the given handler.
*
* @param {StreamHandler} handler The handler to consume output
* @param options Options to pass into the HeaderEmitter
* constructor.
* @returns {HeaderEmitter} A header emitter constructed with the given options.
*/
function makeStreamingEmitter(handler, options) {
return new HeaderEmitter(handler, options);
}
function StringHandler() {
this.value = "";
this.deliverData = function (str) { this.value += str; };
this.deliverEOF = function () { };
}
/**
* Given a header name and its structured value, output a string containing its
* MIME-encoded value. The trailing CRLF for the header is included.
*
* @param {String} name The name of the structured header.
* @param value The value of the structured header.
* @param options Options for the HeaderEmitter constructor.
* @returns {String} A MIME-encoded representation of the structured header.
* @see HeaderEmitter.addStructuredHeader
*/
function emitStructuredHeader(name, value, options) {
let handler = new StringHandler();
let emitter = new HeaderEmitter(handler, options);
emitter.addStructuredHeader(name, value);
emitter.finish(true);
return handler.value;
}
/**
* Given a map of header names and their structured values, output a string
* containing all of their headers and their MIME-encoded values.
*
* This method is designed to be able to emit header values given the headerData
* values produced by MIME parsing. Thus, the values of the map are arrays
* corresponding to header multiplicity.
*
* @param {Map(String->Object[])} headerValues A map of header names to arrays
* of their structured values.
* @param options Options for the HeaderEmitter
* constructor.
* @returns {String} A MIME-encoded representation of the structured header.
* @see HeaderEmitter.addStructuredHeader
*/
function emitStructuredHeaders(headerValues, options) {
let handler = new StringHandler();
let emitter = new HeaderEmitter(handler, options);
for (let instance of headerValues) {
instance[1].forEach(function (e) {
emitter.addStructuredHeader(instance[0], e)
});
}
emitter.finish(true);
return handler.value;
}
/**
* Add a custom structured MIME encoder to the set of known encoders. These
* encoders are used for {@link emitStructuredHeader} and similar functions to
* encode richer, more structured values instead of relying on string
* representations everywhere.
*
* Structured encoders are functions which take in a single parameter
* representing their structured value. The this parameter is set to be an
* instance of {@link HeaderEmitter}, and it is intended that the several public
* or protected methods on that class are useful for encoding values.
*
* There is a large set of structured encoders built-in to the jsmime library
* already.
*
* @param {String} header The header name (in its preferred case) for
* which the encoder will be used.
* @param {Function(Value)} encoder The structured encoder function.
*/
function addStructuredEncoder(header, encoder) {
let lowerName = header.toLowerCase();
encoders.set(lowerName, encoder);
if (!preferredSpellings.has(lowerName))
preferredSpellings.set(lowerName, header);
}
return Object.freeze({
addStructuredEncoder: addStructuredEncoder,
emitStructuredHeader: emitStructuredHeader,
emitStructuredHeaders: emitStructuredHeaders,
makeStreamingEmitter: makeStreamingEmitter
});
});
def('jsmime', function(require) {
return {
MimeParser: require('./mimeparser'),
headerparser: require('./headerparser'),
headeremitter: require('./headeremitter')
}
});
return mods['jsmime'];
}));
|