summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt A. Tobin <email@mattatobin.com>2019-11-11 01:48:09 -0500
committerMatt A. Tobin <email@mattatobin.com>2019-11-11 01:48:09 -0500
commit01d03f41f10197e4344e66a0fb973e61ffabe5de (patch)
tree54489aab685d3bf24f90a30001f4e60a66ae5a37
parent113058fcaf8526adbfaf02fe8691f4c280bc4b37 (diff)
downloadaura-central-01d03f41f10197e4344e66a0fb973e61ffabe5de.tar.gz
Bugs 1437282, 1438590, 1506587, and 1498795
* strip unnecessary ==== padding in base64 text. * Extension of the characters to be encoded (quoted-printable) according to RFC2047. * Remove unwanted characters from headers. Compact extraneous white space in display name to avoid sender address spoofing. * Be more tolerant of spaces in base64-encoded RFC 2047 tokens. Tag mcp-graveyard/UXP%1273
-rw-r--r--mailnews/mime/jsmime/jsmime.js58
1 files changed, 53 insertions, 5 deletions
diff --git a/mailnews/mime/jsmime/jsmime.js b/mailnews/mime/jsmime/jsmime.js
index 253b5da0f..9e0682428 100644
--- a/mailnews/mime/jsmime/jsmime.js
+++ b/mailnews/mime/jsmime/jsmime.js
@@ -68,6 +68,8 @@ function decode_base64(buffer, more) {
else
buffer = '';
sanitize = sanitize.substring(0, sanitize.length - excess);
+ // Delete all unnecessary '====' in padding.
+ sanitize = sanitize.replace(/(====)+$/g, '');
// Use the atob function we (ought to) have in global scope.
return [atob(sanitize), buffer];
}
@@ -379,8 +381,49 @@ function getHeaderTokens(value, delimiters, opts) {
// converted to not be one.
let tokenList = [];
- /// Represents a non-delimiter token
+ // Represents a non-delimiter token.
function Token(token) {
+ // Replace problematic characters so we don't get unexpected behavior
+ // down the line. These fall into a few categories:
+ // A) "Separator, space" (Zs),
+ // B) "Mark, Nonspacing" (Mn)
+ // C) "Other, Control" (Cc)
+ // D) "Other, Format" (Cf)
+ // Unfortuantely, no support for the needed regexp Unicode property escapes
+ // in our engine. So we need to hand-roll it. Used the regexpu tool for
+ // that: https://mothereff.in/regexpu.
+ // This should be updated regularly, to take into account new additions
+ // to the unicode standard. Last updated July 2019.
+ // For a full list of categories, see http://unicode.org/Public//5.0.0/ucd/UCD.html.
+
+ // -- case A: /\p{Zs}/u
+ // https://www.fileformat.info/info/unicode/category/Zs/list.htm
+ // https://mothereff.in/regexpu#input=/\p{Zs}/u&unicodePropertyEscape=1
+ token = token.replace(/[\xA0\u1680\u2000-\u200A\u202F\u205F\u3000]/g, " ");
+
+ // -- case B: /\p{Mn}/u
+ // https://www.fileformat.info/info/unicode/category/Mn/list.htm
+ // https://mothereff.in/regexpu#input=/\p{Mn}/u&unicodePropertyEscape=1
+ // This is a bit more complicated as some of them could be "real", so we'll
+ // only remove the ones that are known to show as blank.
+ token = token.replace(/[\u034F\u17B4\u17B5\u180B-\u180D\uFE00-\uFE0F]/g, "");
+ // \uE0100-\uE01EF need to be written using their surrogate code point pairs
+ // until extended Unicode escapes are supported in regexps.
+ // https://www.fileformat.info/info/unicode/char/e0100/index.htm says \uDB40\uDD00.
+ // https://www.fileformat.info/info/unicode/char/e01ef/index.htm says \uDB40\uDDEF.
+ token = token.replace(/\uDB40[\uDD00-\uDDEF]/g, "");
+
+ // -- case C: /\p{Cc}/u, except Tab/LF/CR
+ // https://www.fileformat.info/info/unicode/category/Cc/list.htm
+ // https://mothereff.in/regexpu#input=/\p{Cc}/u&unicodePropertyEscape=1
+ // eslint-disable-next-line no-control-regex
+ token = token.replace(/(?![\t\n\r])[\0-\x1F\x7F-\x9F]/g, "");
+
+ // -- case D: /\p{Cf}/u
+ // https://www.fileformat.info/info/unicode/category/Cf/list.htm
+ // https://mothereff.in/regexpu#input=/\p{Cf}/u&unicodePropertyEscape=1
+ // Remove all of these except for \u0600-\u0605.
+ token = token.replace(/(?:[\xAD\u061C\u06DD\u070F\u08E2\u180E\u200B-\u200F\u202A-\u202E\u2060-\u2064\u2066-\u206F\uFEFF\uFFF9-\uFFFB]|\uD804[\uDCBD\uDCCD]|\uD80D[\uDC30-\uDC38]|\uD82F[\uDCA0-\uDCA3]|\uD834[\uDD73-\uDD7A]|\uDB40[\uDC01\uDC20-\uDC7F])/g, "");
// Unescape all quoted pairs. Any trailing \ is deleted.
this.token = token.replace(/\\(.?)/g, "$1");
}
@@ -634,7 +677,7 @@ function decodeRFC2047Words(headerValue) {
if (encoding == 'B' || encoding == 'b') {
// Decode base64. If there's any non-base64 data, treat the string as
// an illegal token.
- if (/[^A-Za-z0-9+\/=]/.exec(text))
+ if (/[^ A-Za-z0-9+\/=]/.exec(text))
return false;
// Decode the string
@@ -819,13 +862,18 @@ function parseAddressingHeader(header, doRFC2047) {
addrSpec.substring(addrSpec.lastIndexOf("@"));
}
+ // Replace all whitespace characters with a single whitespace,
+ // to avoid consecutive whitespace and also to normalize tabs and newlines.
+ displayName = displayName.replace(/\s+/g, " ").trim();
+
if (displayName === '' && lastComment !== '') {
// Take last comment content as the display-name.
let offset = lastComment[0] === ' ' ? 2 : 1;
displayName = lastComment.substr(offset, lastComment.length - offset - 1);
}
- if (displayName !== '' || addrSpec !== '')
+ if (displayName !== '' || addrSpec !== '') {
addrlist.push({name: displayName, email: addrSpec});
+ }
// Clear pending flags and variables.
name = localPart = address = lastComment = '';
inAngle = inComment = needsSpace = false;
@@ -921,7 +969,7 @@ function parseAddressingHeader(header, doRFC2047) {
// Ignore the needs space if we're a "close" delimiter token.
let spacedToken = token;
- if (needsSpace && token.toString()[0] != '.')
+ if (needsSpace && (token.toString().length > 0) && token.toString()[0] != ".")
spacedToken = ' ' + spacedToken;
// Which field do we add this data to?
@@ -2871,7 +2919,7 @@ var nonAsciiRe = /[^\x20-\x7e]/;
var b64Prelude = "=?UTF-8?B?", qpPrelude = "=?UTF-8?Q?";
/// A list of ASCII characters forbidden in RFC 2047 encoded-words
-var qpForbidden = "=?_()\",";
+var qpForbidden = "\"#$%&'(),.:;<=>?@[\\]^_`{|}~";
var hexString = "0123456789abcdef";