summaryrefslogtreecommitdiff
path: root/xpcom
diff options
context:
space:
mode:
authorMoonchild <moonchild@palemoon.org>2021-05-14 12:25:57 +0000
committerMoonchild <moonchild@palemoon.org>2021-05-14 12:25:57 +0000
commitc921ad59d5acfe973199bf742a852db71ebe2b5c (patch)
tree804e618fd084b1782f77b4ba07031b064b3b8656 /xpcom
parent349346d0b76aec02354149da0f860d5bf7ec7b83 (diff)
downloaduxp-c921ad59d5acfe973199bf742a852db71ebe2b5c.tar.gz
Issue #1772 - Stop using legacy code page conversion for file paths on Linux.
OS.File already only supports UTF-8 paths on non-Windows systems, so this change makes our different ways of accessing file paths consistent with each other. This should prevent unexpected crashes in glibc that expect UTF-8. This resolves #1772.
Diffstat (limited to 'xpcom')
-rw-r--r--xpcom/build/XPCOMInit.cpp7
-rw-r--r--xpcom/io/nsNativeCharsetUtils.cpp875
-rw-r--r--xpcom/io/nsNativeCharsetUtils.h24
3 files changed, 15 insertions, 891 deletions
diff --git a/xpcom/build/XPCOMInit.cpp b/xpcom/build/XPCOMInit.cpp
index 18aed6528a..e96314a1c5 100644
--- a/xpcom/build/XPCOMInit.cpp
+++ b/xpcom/build/XPCOMInit.cpp
@@ -574,10 +574,6 @@ NS_InitXPCOM2(nsIServiceManager** aResult,
setlocale(LC_ALL, "");
}
-#if defined(XP_UNIX)
- NS_StartupNativeCharsetUtils();
-#endif
-
NS_StartupLocalFile();
nsDirectoryService::RealInit();
@@ -1020,9 +1016,6 @@ ShutdownXPCOM(nsIServiceManager* aServMgr)
// Shutdown nsLocalFile string conversion
NS_ShutdownLocalFile();
-#ifdef XP_UNIX
- NS_ShutdownNativeCharsetUtils();
-#endif
// Shutdown xpcom. This will release all loaders and cause others holding
// a refcount to the component manager to release it.
diff --git a/xpcom/io/nsNativeCharsetUtils.cpp b/xpcom/io/nsNativeCharsetUtils.cpp
index 927e8cd591..3ed20d7f04 100644
--- a/xpcom/io/nsNativeCharsetUtils.cpp
+++ b/xpcom/io/nsNativeCharsetUtils.cpp
@@ -6,864 +6,9 @@
#include "xpcom-private.h"
//-----------------------------------------------------------------------------
-// XP_UNIX
+// Windows
//-----------------------------------------------------------------------------
-#if defined(XP_UNIX)
-
-#include <stdlib.h> // mbtowc, wctomb
-#include <locale.h> // setlocale
-#include "mozilla/Mutex.h"
-#include "nscore.h"
-#include "nsAString.h"
-#include "nsReadableUtils.h"
-
-using namespace mozilla;
-
-//
-// choose a conversion library. we used to use mbrtowc/wcrtomb under Linux,
-// but that doesn't work for non-BMP characters whether we use '-fshort-wchar'
-// or not (see bug 206811 and
-// news://news.mozilla.org:119/bajml3$fvr1@ripley.netscape.com). we now use
-// iconv for all platforms where nltypes.h and nllanginfo.h are present
-// along with iconv.
-//
-#if defined(HAVE_ICONV) && defined(HAVE_NL_TYPES_H) && defined(HAVE_LANGINFO_CODESET)
-#define USE_ICONV 1
-#else
-#define USE_STDCONV 1
-#endif
-
-static void
-isolatin1_to_utf16(const char** aInput, uint32_t* aInputLeft,
- char16_t** aOutput, uint32_t* aOutputLeft)
-{
- while (*aInputLeft && *aOutputLeft) {
- **aOutput = (unsigned char)** aInput;
- (*aInput)++;
- (*aInputLeft)--;
- (*aOutput)++;
- (*aOutputLeft)--;
- }
-}
-
-static void
-utf16_to_isolatin1(const char16_t** aInput, uint32_t* aInputLeft,
- char** aOutput, uint32_t* aOutputLeft)
-{
- while (*aInputLeft && *aOutputLeft) {
- **aOutput = (unsigned char)**aInput;
- (*aInput)++;
- (*aInputLeft)--;
- (*aOutput)++;
- (*aOutputLeft)--;
- }
-}
-
-//-----------------------------------------------------------------------------
-// conversion using iconv
-//-----------------------------------------------------------------------------
-#if defined(USE_ICONV)
-#include <nl_types.h> // CODESET
-#include <langinfo.h> // nl_langinfo
-#include <iconv.h> // iconv_open, iconv, iconv_close
-#include <errno.h>
-#include "plstr.h"
-
-#if defined(HAVE_ICONV_WITH_CONST_INPUT)
-#define ICONV_INPUT(x) (x)
-#else
-#define ICONV_INPUT(x) ((char **)x)
-#endif
-
-// solaris definitely needs this, but we'll enable it by default
-// just in case... but we know for sure that iconv(3) in glibc
-// doesn't need this.
-#if !defined(__GLIBC__)
-#define ENABLE_UTF8_FALLBACK_SUPPORT
-#endif
-
-#define INVALID_ICONV_T ((iconv_t)-1)
-
-static inline size_t
-xp_iconv(iconv_t converter,
- const char** aInput, size_t* aInputLeft,
- char** aOutput, size_t* aOutputLeft)
-{
- size_t res, outputAvail = *aOutputLeft;
- res = iconv(converter, ICONV_INPUT(aInput), aInputLeft, aOutput, aOutputLeft);
- if (res == (size_t)-1) {
- // on some platforms (e.g., linux) iconv will fail with
- // E2BIG if it cannot convert _all_ of its input. it'll
- // still adjust all of the in/out params correctly, so we
- // can ignore this error. the assumption is that we will
- // be called again to complete the conversion.
- if ((errno == E2BIG) && (*aOutputLeft < outputAvail)) {
- res = 0;
- }
- }
- return res;
-}
-
-static inline void
-xp_iconv_reset(iconv_t converter)
-{
- // NOTE: the man pages on Solaris claim that you can pass nullptr
- // for all parameter to reset the converter, but beware the
- // evil Solaris crash if you go down this route >:-)
-
- const char* zero_char_in_ptr = nullptr;
- char* zero_char_out_ptr = nullptr;
- size_t zero_size_in = 0;
- size_t zero_size_out = 0;
-
- xp_iconv(converter,
- &zero_char_in_ptr,
- &zero_size_in,
- &zero_char_out_ptr,
- &zero_size_out);
-}
-
-static inline iconv_t
-xp_iconv_open(const char** to_list, const char** from_list)
-{
- iconv_t res;
- const char** from_name;
- const char** to_name;
-
- // try all possible combinations to locate a converter.
- to_name = to_list;
- while (*to_name) {
- if (**to_name) {
- from_name = from_list;
- while (*from_name) {
- if (**from_name) {
- res = iconv_open(*to_name, *from_name);
- if (res != INVALID_ICONV_T) {
- return res;
- }
- }
- from_name++;
- }
- }
- to_name++;
- }
-
- return INVALID_ICONV_T;
-}
-
-/*
- * char16_t[] is NOT a UCS-2 array BUT a UTF-16 string. Therefore, we
- * have to use UTF-16 with iconv(3) on platforms where it's supported.
- * However, the way UTF-16 and UCS-2 are interpreted varies across platforms
- * and implementations of iconv(3). On Tru64, it also depends on the environment
- * variable. To avoid the trouble arising from byte-swapping
- * (bug 208809), we have to try UTF-16LE/BE and UCS-2LE/BE before falling
- * back to UTF-16 and UCS-2 and variants. We assume that UTF-16 and UCS-2
- * on systems without UTF-16LE/BE and UCS-2LE/BE have the native endianness,
- * which isn't the case of glibc 2.1.x, for which we use 'UNICODELITTLE'
- * and 'UNICODEBIG'. It's also not true of Tru64 V4 when the environment
- * variable ICONV_BYTEORDER is set to 'big-endian', about which not much
- * can be done other than adding a note in the release notes. (bug 206811)
- */
-static const char* UTF_16_NAMES[] = {
-#if defined(IS_LITTLE_ENDIAN)
- "UTF-16LE",
-#if defined(__GLIBC__)
- "UNICODELITTLE",
-#endif
- "UCS-2LE",
-#else
- "UTF-16BE",
-#if defined(__GLIBC__)
- "UNICODEBIG",
-#endif
- "UCS-2BE",
-#endif
- "UTF-16",
- "UCS-2",
- "UCS2",
- "UCS_2",
- "ucs-2",
- "ucs2",
- "ucs_2",
- nullptr
-};
-
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
-static const char* UTF_8_NAMES[] = {
- "UTF-8",
- "UTF8",
- "UTF_8",
- "utf-8",
- "utf8",
- "utf_8",
- nullptr
-};
-#endif
-
-static const char* ISO_8859_1_NAMES[] = {
- "ISO-8859-1",
-#if !defined(__GLIBC__)
- "ISO8859-1",
- "ISO88591",
- "ISO_8859_1",
- "ISO8859_1",
- "iso-8859-1",
- "iso8859-1",
- "iso88591",
- "iso_8859_1",
- "iso8859_1",
-#endif
- nullptr
-};
-
-class nsNativeCharsetConverter
-{
-public:
- nsNativeCharsetConverter();
- ~nsNativeCharsetConverter();
-
- nsresult NativeToUnicode(const char** aInput, uint32_t* aInputLeft,
- char16_t** aOutput, uint32_t* aOutputLeft);
- nsresult UnicodeToNative(const char16_t** aInput, uint32_t* aInputLeft,
- char** aOutput, uint32_t* aOutputLeft);
-
- static void GlobalInit();
- static void GlobalShutdown();
- static bool IsNativeUTF8();
-
-private:
- static iconv_t gNativeToUnicode;
- static iconv_t gUnicodeToNative;
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
- static iconv_t gNativeToUTF8;
- static iconv_t gUTF8ToNative;
- static iconv_t gUnicodeToUTF8;
- static iconv_t gUTF8ToUnicode;
-#endif
- static Mutex* gLock;
- static bool gInitialized;
- static bool gIsNativeUTF8;
-
- static void LazyInit();
-
- static void Lock()
- {
- if (gLock) {
- gLock->Lock();
- }
- }
- static void Unlock()
- {
- if (gLock) {
- gLock->Unlock();
- }
- }
-};
-
-iconv_t nsNativeCharsetConverter::gNativeToUnicode = INVALID_ICONV_T;
-iconv_t nsNativeCharsetConverter::gUnicodeToNative = INVALID_ICONV_T;
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
-iconv_t nsNativeCharsetConverter::gNativeToUTF8 = INVALID_ICONV_T;
-iconv_t nsNativeCharsetConverter::gUTF8ToNative = INVALID_ICONV_T;
-iconv_t nsNativeCharsetConverter::gUnicodeToUTF8 = INVALID_ICONV_T;
-iconv_t nsNativeCharsetConverter::gUTF8ToUnicode = INVALID_ICONV_T;
-#endif
-Mutex* nsNativeCharsetConverter::gLock = nullptr;
-bool nsNativeCharsetConverter::gInitialized = false;
-bool nsNativeCharsetConverter::gIsNativeUTF8 = false;
-
-void
-nsNativeCharsetConverter::LazyInit()
-{
- // LazyInit may be called before NS_StartupNativeCharsetUtils, but
- // the setlocale it does has to be called before nl_langinfo. Like in
- // NS_StartupNativeCharsetUtils, assume we are called early enough that
- // we are the first to care about the locale's charset.
- if (!gLock) {
- setlocale(LC_CTYPE, "");
- }
- const char* blank_list[] = { "", nullptr };
- const char** native_charset_list = blank_list;
- const char* native_charset = nl_langinfo(CODESET);
- if (!native_charset) {
- NS_ERROR("native charset is unknown");
- // fallback to ISO-8859-1
- native_charset_list = ISO_8859_1_NAMES;
- } else {
- native_charset_list[0] = native_charset;
- }
-
- // Most, if not all, Unixen supporting UTF-8 and nl_langinfo(CODESET)
- // return 'UTF-8' (or 'utf-8')
- if (!PL_strcasecmp(native_charset, "UTF-8")) {
- gIsNativeUTF8 = true;
- }
-
- gNativeToUnicode = xp_iconv_open(UTF_16_NAMES, native_charset_list);
- gUnicodeToNative = xp_iconv_open(native_charset_list, UTF_16_NAMES);
-
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
- if (gNativeToUnicode == INVALID_ICONV_T) {
- gNativeToUTF8 = xp_iconv_open(UTF_8_NAMES, native_charset_list);
- gUTF8ToUnicode = xp_iconv_open(UTF_16_NAMES, UTF_8_NAMES);
- NS_ASSERTION(gNativeToUTF8 != INVALID_ICONV_T, "no native to utf-8 converter");
- NS_ASSERTION(gUTF8ToUnicode != INVALID_ICONV_T, "no utf-8 to utf-16 converter");
- }
- if (gUnicodeToNative == INVALID_ICONV_T) {
- gUnicodeToUTF8 = xp_iconv_open(UTF_8_NAMES, UTF_16_NAMES);
- gUTF8ToNative = xp_iconv_open(native_charset_list, UTF_8_NAMES);
- NS_ASSERTION(gUnicodeToUTF8 != INVALID_ICONV_T, "no utf-16 to utf-8 converter");
- NS_ASSERTION(gUTF8ToNative != INVALID_ICONV_T, "no utf-8 to native converter");
- }
-#else
- NS_ASSERTION(gNativeToUnicode != INVALID_ICONV_T, "no native to utf-16 converter");
- NS_ASSERTION(gUnicodeToNative != INVALID_ICONV_T, "no utf-16 to native converter");
-#endif
-
- /*
- * On Solaris 8 (and newer?), the iconv modules converting to UCS-2
- * prepend a byte order mark unicode character (BOM, u+FEFF) during
- * the first use of the iconv converter. The same is the case of
- * glibc 2.2.9x and Tru64 V5 (see bug 208809) when 'UTF-16' is used.
- * However, we use 'UTF-16LE/BE' in both cases, instead so that we
- * should be safe. But just in case...
- *
- * This dummy conversion gets rid of the BOMs and fixes bug 153562.
- */
- char dummy_input[1] = { ' ' };
- char dummy_output[4];
-
- if (gNativeToUnicode != INVALID_ICONV_T) {
- const char* input = dummy_input;
- size_t input_left = sizeof(dummy_input);
- char* output = dummy_output;
- size_t output_left = sizeof(dummy_output);
-
- xp_iconv(gNativeToUnicode, &input, &input_left, &output, &output_left);
- }
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
- if (gUTF8ToUnicode != INVALID_ICONV_T) {
- const char* input = dummy_input;
- size_t input_left = sizeof(dummy_input);
- char* output = dummy_output;
- size_t output_left = sizeof(dummy_output);
-
- xp_iconv(gUTF8ToUnicode, &input, &input_left, &output, &output_left);
- }
-#endif
-
- gInitialized = true;
-}
-
-void
-nsNativeCharsetConverter::GlobalInit()
-{
- gLock = new Mutex("nsNativeCharsetConverter.gLock");
-}
-
-void
-nsNativeCharsetConverter::GlobalShutdown()
-{
- delete gLock;
- gLock = nullptr;
-
- if (gNativeToUnicode != INVALID_ICONV_T) {
- iconv_close(gNativeToUnicode);
- gNativeToUnicode = INVALID_ICONV_T;
- }
-
- if (gUnicodeToNative != INVALID_ICONV_T) {
- iconv_close(gUnicodeToNative);
- gUnicodeToNative = INVALID_ICONV_T;
- }
-
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
- if (gNativeToUTF8 != INVALID_ICONV_T) {
- iconv_close(gNativeToUTF8);
- gNativeToUTF8 = INVALID_ICONV_T;
- }
- if (gUTF8ToNative != INVALID_ICONV_T) {
- iconv_close(gUTF8ToNative);
- gUTF8ToNative = INVALID_ICONV_T;
- }
- if (gUnicodeToUTF8 != INVALID_ICONV_T) {
- iconv_close(gUnicodeToUTF8);
- gUnicodeToUTF8 = INVALID_ICONV_T;
- }
- if (gUTF8ToUnicode != INVALID_ICONV_T) {
- iconv_close(gUTF8ToUnicode);
- gUTF8ToUnicode = INVALID_ICONV_T;
- }
-#endif
-
- gInitialized = false;
-}
-
-nsNativeCharsetConverter::nsNativeCharsetConverter()
-{
- Lock();
- if (!gInitialized) {
- LazyInit();
- }
-}
-
-nsNativeCharsetConverter::~nsNativeCharsetConverter()
-{
- // reset converters for next time
- if (gNativeToUnicode != INVALID_ICONV_T) {
- xp_iconv_reset(gNativeToUnicode);
- }
- if (gUnicodeToNative != INVALID_ICONV_T) {
- xp_iconv_reset(gUnicodeToNative);
- }
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
- if (gNativeToUTF8 != INVALID_ICONV_T) {
- xp_iconv_reset(gNativeToUTF8);
- }
- if (gUTF8ToNative != INVALID_ICONV_T) {
- xp_iconv_reset(gUTF8ToNative);
- }
- if (gUnicodeToUTF8 != INVALID_ICONV_T) {
- xp_iconv_reset(gUnicodeToUTF8);
- }
- if (gUTF8ToUnicode != INVALID_ICONV_T) {
- xp_iconv_reset(gUTF8ToUnicode);
- }
-#endif
- Unlock();
-}
-
-nsresult
-nsNativeCharsetConverter::NativeToUnicode(const char** aInput,
- uint32_t* aInputLeft,
- char16_t** aOutput,
- uint32_t* aOutputLeft)
-{
- size_t res = 0;
- size_t inLeft = (size_t)*aInputLeft;
- size_t outLeft = (size_t)*aOutputLeft * 2;
-
- if (gNativeToUnicode != INVALID_ICONV_T) {
-
- res = xp_iconv(gNativeToUnicode, aInput, &inLeft, (char**)aOutput, &outLeft);
-
- *aInputLeft = inLeft;
- *aOutputLeft = outLeft / 2;
- if (res != (size_t)-1) {
- return NS_OK;
- }
-
- NS_WARNING("conversion from native to utf-16 failed");
-
- // reset converter
- xp_iconv_reset(gNativeToUnicode);
- }
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
- else if ((gNativeToUTF8 != INVALID_ICONV_T) &&
- (gUTF8ToUnicode != INVALID_ICONV_T)) {
- // convert first to UTF8, then from UTF8 to UCS2
- const char* in = *aInput;
-
- char ubuf[1024];
-
- // we assume we're always called with enough space in |aOutput|,
- // so convert many chars at a time...
- while (inLeft) {
- char* p = ubuf;
- size_t n = sizeof(ubuf);
- res = xp_iconv(gNativeToUTF8, &in, &inLeft, &p, &n);
- if (res == (size_t)-1) {
- NS_ERROR("conversion from native to utf-8 failed");
- break;
- }
- NS_ASSERTION(outLeft > 0, "bad assumption");
- p = ubuf;
- n = sizeof(ubuf) - n;
- res = xp_iconv(gUTF8ToUnicode, (const char**)&p, &n,
- (char**)aOutput, &outLeft);
- if (res == (size_t)-1) {
- NS_ERROR("conversion from utf-8 to utf-16 failed");
- break;
- }
- }
-
- (*aInput) += (*aInputLeft - inLeft);
- *aInputLeft = inLeft;
- *aOutputLeft = outLeft / 2;
-
- if (res != (size_t)-1) {
- return NS_OK;
- }
-
- // reset converters
- xp_iconv_reset(gNativeToUTF8);
- xp_iconv_reset(gUTF8ToUnicode);
- }
-#endif
-
- // fallback: zero-pad and hope for the best
- // XXX This is lame and we have to do better.
- isolatin1_to_utf16(aInput, aInputLeft, aOutput, aOutputLeft);
-
- return NS_OK;
-}
-
-nsresult
-nsNativeCharsetConverter::UnicodeToNative(const char16_t** aInput,
- uint32_t* aInputLeft,
- char** aOutput,
- uint32_t* aOutputLeft)
-{
- size_t res = 0;
- size_t inLeft = (size_t)*aInputLeft * 2;
- size_t outLeft = (size_t)*aOutputLeft;
-
- if (gUnicodeToNative != INVALID_ICONV_T) {
- res = xp_iconv(gUnicodeToNative, (const char**)aInput, &inLeft,
- aOutput, &outLeft);
-
- *aInputLeft = inLeft / 2;
- *aOutputLeft = outLeft;
- if (res != (size_t)-1) {
- return NS_OK;
- }
-
- NS_ERROR("iconv failed");
-
- // reset converter
- xp_iconv_reset(gUnicodeToNative);
- }
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
- else if ((gUnicodeToUTF8 != INVALID_ICONV_T) &&
- (gUTF8ToNative != INVALID_ICONV_T)) {
- const char* in = (const char*)*aInput;
-
- char ubuf[6]; // max utf-8 char length (really only needs to be 4 bytes)
-
- // convert one uchar at a time...
- while (inLeft && outLeft) {
- char* p = ubuf;
- size_t n = sizeof(ubuf), one_uchar = sizeof(char16_t);
- res = xp_iconv(gUnicodeToUTF8, &in, &one_uchar, &p, &n);
- if (res == (size_t)-1) {
- NS_ERROR("conversion from utf-16 to utf-8 failed");
- break;
- }
- p = ubuf;
- n = sizeof(ubuf) - n;
- res = xp_iconv(gUTF8ToNative, (const char**)&p, &n, aOutput, &outLeft);
- if (res == (size_t)-1) {
- if (errno == E2BIG) {
- // not enough room for last uchar... back up and return.
- in -= sizeof(char16_t);
- res = 0;
- } else {
- NS_ERROR("conversion from utf-8 to native failed");
- }
- break;
- }
- inLeft -= sizeof(char16_t);
- }
-
- (*aInput) += (*aInputLeft - inLeft / 2);
- *aInputLeft = inLeft / 2;
- *aOutputLeft = outLeft;
- if (res != (size_t)-1) {
- return NS_OK;
- }
-
- // reset converters
- xp_iconv_reset(gUnicodeToUTF8);
- xp_iconv_reset(gUTF8ToNative);
- }
-#endif
-
- // fallback: truncate and hope for the best
- // XXX This is lame and we have to do better.
- utf16_to_isolatin1(aInput, aInputLeft, aOutput, aOutputLeft);
-
- return NS_OK;
-}
-
-bool
-nsNativeCharsetConverter::IsNativeUTF8()
-{
- if (!gInitialized) {
- Lock();
- if (!gInitialized) {
- LazyInit();
- }
- Unlock();
- }
- return gIsNativeUTF8;
-}
-
-#endif // USE_ICONV
-
-//-----------------------------------------------------------------------------
-// conversion using mb[r]towc/wc[r]tomb
-//-----------------------------------------------------------------------------
-#if defined(USE_STDCONV)
-#if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
-#include <wchar.h> // mbrtowc, wcrtomb
-#endif
-
-class nsNativeCharsetConverter
-{
-public:
- nsNativeCharsetConverter();
-
- nsresult NativeToUnicode(const char** aInput, uint32_t* aInputLeft,
- char16_t** aOutput, uint32_t* aOutputLeft);
- nsresult UnicodeToNative(const char16_t** aInput, uint32_t* aInputLeft,
- char** aOutput, uint32_t* aOutputLeft);
-
- static void GlobalInit();
- static void GlobalShutdown() { }
- static bool IsNativeUTF8();
-
-private:
- static bool gWCharIsUnicode;
-
-#if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
- mbstate_t ps;
-#endif
-};
-
-bool nsNativeCharsetConverter::gWCharIsUnicode = false;
-
-nsNativeCharsetConverter::nsNativeCharsetConverter()
-{
-#if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
- memset(&ps, 0, sizeof(ps));
-#endif
-}
-
-void
-nsNativeCharsetConverter::GlobalInit()
-{
- // verify that wchar_t for the current locale is actually unicode.
- // if it is not, then we should avoid calling mbtowc/wctomb and
- // just fallback on zero-pad/truncation conversion.
- //
- // this test cannot be done at build time because the encoding of
- // wchar_t may depend on the runtime locale. sad, but true!!
- //
- // so, if wchar_t is unicode then converting an ASCII character
- // to wchar_t should not change its numeric value. we'll just
- // check what happens with the ASCII 'a' character.
- //
- // this test is not perfect... obviously, it could yield false
- // positives, but then at least ASCII text would be converted
- // properly (or maybe just the 'a' character) -- oh well :(
-
- char a = 'a';
- unsigned int w = 0;
-
- int res = mbtowc((wchar_t*)&w, &a, 1);
-
- gWCharIsUnicode = (res != -1 && w == 'a');
-
-#ifdef DEBUG
- if (!gWCharIsUnicode) {
- NS_WARNING("wchar_t is not unicode (unicode conversion will be lossy)");
- }
-#endif
-}
-
-nsresult
-nsNativeCharsetConverter::NativeToUnicode(const char** aInput,
- uint32_t* aInputLeft,
- char16_t** aOutput,
- uint32_t* aOutputLeft)
-{
- if (gWCharIsUnicode) {
- int incr;
-
- // cannot use wchar_t here since it may have been redefined (e.g.,
- // via -fshort-wchar). hopefully, sizeof(tmp) is sufficient XP.
- unsigned int tmp = 0;
- while (*aInputLeft && *aOutputLeft) {
-#ifdef HAVE_MBRTOWC
- incr = (int)mbrtowc((wchar_t*)&tmp, *aInput, *aInputLeft, &ps);
-#else
- // XXX is this thread-safe?
- incr = (int)mbtowc((wchar_t*)&tmp, *aInput, *aInputLeft);
-#endif
- if (incr < 0) {
- NS_WARNING("mbtowc failed: possible charset mismatch");
- // zero-pad and hope for the best
- tmp = (unsigned char)**aInput;
- incr = 1;
- }
- ** aOutput = (char16_t)tmp;
- (*aInput) += incr;
- (*aInputLeft) -= incr;
- (*aOutput)++;
- (*aOutputLeft)--;
- }
- } else {
- // wchar_t isn't unicode, so the best we can do is treat the
- // input as if it is isolatin1 :(
- isolatin1_to_utf16(aInput, aInputLeft, aOutput, aOutputLeft);
- }
-
- return NS_OK;
-}
-
-nsresult
-nsNativeCharsetConverter::UnicodeToNative(const char16_t** aInput,
- uint32_t* aInputLeft,
- char** aOutput,
- uint32_t* aOutputLeft)
-{
- if (gWCharIsUnicode) {
- int incr;
-
- while (*aInputLeft && *aOutputLeft >= MB_CUR_MAX) {
-#ifdef HAVE_WCRTOMB
- incr = (int)wcrtomb(*aOutput, (wchar_t)**aInput, &ps);
-#else
- // XXX is this thread-safe?
- incr = (int)wctomb(*aOutput, (wchar_t)**aInput);
-#endif
- if (incr < 0) {
- NS_WARNING("mbtowc failed: possible charset mismatch");
- ** aOutput = (unsigned char)**aInput; // truncate
- incr = 1;
- }
- // most likely we're dead anyways if this assertion should fire
- NS_ASSERTION(uint32_t(incr) <= *aOutputLeft, "wrote beyond end of string");
- (*aOutput) += incr;
- (*aOutputLeft) -= incr;
- (*aInput)++;
- (*aInputLeft)--;
- }
- } else {
- // wchar_t isn't unicode, so the best we can do is treat the
- // input as if it is isolatin1 :(
- utf16_to_isolatin1(aInput, aInputLeft, aOutput, aOutputLeft);
- }
-
- return NS_OK;
-}
-
-// XXX : for now, return false
-bool
-nsNativeCharsetConverter::IsNativeUTF8()
-{
- return false;
-}
-
-#endif // USE_STDCONV
-
-//-----------------------------------------------------------------------------
-// API implementation
-//-----------------------------------------------------------------------------
-
-nsresult
-NS_CopyNativeToUnicode(const nsACString& aInput, nsAString& aOutput)
-{
- aOutput.Truncate();
-
- uint32_t inputLen = aInput.Length();
-
- nsACString::const_iterator iter;
- aInput.BeginReading(iter);
-
- //
- // OPTIMIZATION: preallocate space for largest possible result; convert
- // directly into the result buffer to avoid intermediate buffer copy.
- //
- // this will generally result in a larger allocation, but that seems
- // better than an extra buffer copy.
- //
- if (!aOutput.SetLength(inputLen, fallible)) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
- nsAString::iterator out_iter;
- aOutput.BeginWriting(out_iter);
-
- char16_t* result = out_iter.get();
- uint32_t resultLeft = inputLen;
-
- const char* buf = iter.get();
- uint32_t bufLeft = inputLen;
-
- nsNativeCharsetConverter conv;
- nsresult rv = conv.NativeToUnicode(&buf, &bufLeft, &result, &resultLeft);
- if (NS_SUCCEEDED(rv)) {
- NS_ASSERTION(bufLeft == 0, "did not consume entire input buffer");
- aOutput.SetLength(inputLen - resultLeft);
- }
- return rv;
-}
-
-nsresult
-NS_CopyUnicodeToNative(const nsAString& aInput, nsACString& aOutput)
-{
- aOutput.Truncate();
-
- nsAString::const_iterator iter, end;
- aInput.BeginReading(iter);
- aInput.EndReading(end);
-
- // cannot easily avoid intermediate buffer copy.
- char temp[4096];
-
- nsNativeCharsetConverter conv;
-
- const char16_t* buf = iter.get();
- uint32_t bufLeft = Distance(iter, end);
- while (bufLeft) {
- char* p = temp;
- uint32_t tempLeft = sizeof(temp);
-
- nsresult rv = conv.UnicodeToNative(&buf, &bufLeft, &p, &tempLeft);
- if (NS_FAILED(rv)) {
- return rv;
- }
-
- if (tempLeft < sizeof(temp)) {
- aOutput.Append(temp, sizeof(temp) - tempLeft);
- }
- }
- return NS_OK;
-}
-
-bool
-NS_IsNativeUTF8()
-{
- return nsNativeCharsetConverter::IsNativeUTF8();
-}
-
-void
-NS_StartupNativeCharsetUtils()
-{
- //
- // need to initialize the locale or else charset conversion will fail.
- // better not delay this in case some other component alters the locale
- // settings.
- //
- // XXX we assume that we are called early enough that we should
- // always be the first to care about the locale's charset.
- //
- setlocale(LC_CTYPE, "");
-
- nsNativeCharsetConverter::GlobalInit();
-}
-
-void
-NS_ShutdownNativeCharsetUtils()
-{
- nsNativeCharsetConverter::GlobalShutdown();
-}
-
-//-----------------------------------------------------------------------------
-// XP_WIN
-//-----------------------------------------------------------------------------
-#elif defined(XP_WIN)
+#if defined(XP_WIN)
#include <windows.h>
#include "nsString.h"
@@ -980,30 +125,22 @@ NS_ConvertWtoA(const char16_t* aStrInW, int aBufferSizeOut,
#else
+// Non-windows will always use UTF-8 conversion.
+
#include "nsReadableUtils.h"
nsresult
NS_CopyNativeToUnicode(const nsACString& aInput, nsAString& aOutput)
{
- CopyASCIItoUTF16(aInput, aOutput);
+ CopyUTF8toUTF16(aInput, aOutput);
return NS_OK;
}
nsresult
NS_CopyUnicodeToNative(const nsAString& aInput, nsACString& aOutput)
{
- LossyCopyUTF16toASCII(aInput, aOutput);
+ CopyUTF16toUTF8(aInput, aOutput);
return NS_OK;
}
-void
-NS_StartupNativeCharsetUtils()
-{
-}
-
-void
-NS_ShutdownNativeCharsetUtils()
-{
-}
-
#endif
diff --git a/xpcom/io/nsNativeCharsetUtils.h b/xpcom/io/nsNativeCharsetUtils.h
index bee533cbef..572995aad5 100644
--- a/xpcom/io/nsNativeCharsetUtils.h
+++ b/xpcom/io/nsNativeCharsetUtils.h
@@ -14,9 +14,13 @@
* *** THESE ARE NOT GENERAL PURPOSE CONVERTERS *** *
* *
* NS_CopyNativeToUnicode / NS_CopyUnicodeToNative should only be used *
- * for converting *FILENAMES* between native and unicode. They are not *
+ * for converting *FILENAMES* between bytes and UTF-16. They are not *
* designed or tested for general encoding converter use. *
* *
+ * On Windows, these functions convert to and from the system's legacy *
+ * code page, which cannot represent all of Unicode. Elsewhere, these *
+ * convert to and from UTF-8. *
+ * *
\*****************************************************************************/
/**
@@ -33,25 +37,15 @@ nsresult NS_CopyUnicodeToNative(const nsAString& aInput, nsACString& aOutput);
* name in UTF-8 out of nsIFile, we can just use |GetNativeLeafName| rather
* than using |GetLeafName| and converting the result to UTF-8 if the file
* system encoding is UTF-8.
- * On Unix, it depends on the locale and is not known in advance (at the
- * compilation time) so that this function needs to be a real function.
- * On Windows and other platforms (e.g. OS2), it's never UTF-8.
*/
-#if defined(XP_UNIX)
-bool NS_IsNativeUTF8();
-#else
inline bool
NS_IsNativeUTF8()
{
+#ifdef XP_WIN
return false;
-}
+#else
+ return true;
#endif
-
-
-/**
- * internal
- */
-void NS_StartupNativeCharsetUtils();
-void NS_ShutdownNativeCharsetUtils();
+}
#endif // nsNativeCharsetUtils_h__