diff options
Diffstat (limited to 'xpcom/io/nsUnicharInputStream.cpp')
-rw-r--r-- | xpcom/io/nsUnicharInputStream.cpp | 398 |
1 files changed, 398 insertions, 0 deletions
diff --git a/xpcom/io/nsUnicharInputStream.cpp b/xpcom/io/nsUnicharInputStream.cpp new file mode 100644 index 0000000000..27c074c092 --- /dev/null +++ b/xpcom/io/nsUnicharInputStream.cpp @@ -0,0 +1,398 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsUnicharInputStream.h" +#include "nsIInputStream.h" +#include "nsIServiceManager.h" +#include "nsString.h" +#include "nsTArray.h" +#include "nsAutoPtr.h" +#include "nsCRT.h" +#include "nsStreamUtils.h" +#include "nsUTF8Utils.h" +#include "mozilla/Attributes.h" +#include <fcntl.h> +#if defined(XP_WIN) +#include <io.h> +#else +#include <unistd.h> +#endif + +#define STRING_BUFFER_SIZE 8192 + +class StringUnicharInputStream final : public nsIUnicharInputStream +{ +public: + explicit StringUnicharInputStream(const nsAString& aString) : + mString(aString), mPos(0), mLen(aString.Length()) { } + + NS_DECL_ISUPPORTS + NS_DECL_NSIUNICHARINPUTSTREAM + + nsString mString; + uint32_t mPos; + uint32_t mLen; + +private: + ~StringUnicharInputStream() { } +}; + +NS_IMETHODIMP +StringUnicharInputStream::Read(char16_t* aBuf, + uint32_t aCount, + uint32_t* aReadCount) +{ + if (mPos >= mLen) { + *aReadCount = 0; + return NS_OK; + } + nsAString::const_iterator iter; + mString.BeginReading(iter); + const char16_t* us = iter.get(); + uint32_t amount = mLen - mPos; + if (amount > aCount) { + amount = aCount; + } + memcpy(aBuf, us + mPos, sizeof(char16_t) * amount); + mPos += amount; + *aReadCount = amount; + return NS_OK; +} + +NS_IMETHODIMP +StringUnicharInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, + void* aClosure, + uint32_t aCount, uint32_t* aReadCount) +{ + uint32_t bytesWritten; + uint32_t totalBytesWritten = 0; + + nsresult rv; + aCount = XPCOM_MIN(mString.Length() - mPos, aCount); + + nsAString::const_iterator iter; + mString.BeginReading(iter); + + while (aCount) { + rv = aWriter(this, aClosure, iter.get() + mPos, + totalBytesWritten, aCount, &bytesWritten); + + if (NS_FAILED(rv)) { + // don't propagate errors to the caller + break; + } + + aCount -= bytesWritten; + totalBytesWritten += bytesWritten; + mPos += bytesWritten; + } + + *aReadCount = totalBytesWritten; + + return NS_OK; +} + +NS_IMETHODIMP +StringUnicharInputStream::ReadString(uint32_t aCount, nsAString& aString, + uint32_t* aReadCount) +{ + if (mPos >= mLen) { + *aReadCount = 0; + return NS_OK; + } + uint32_t amount = mLen - mPos; + if (amount > aCount) { + amount = aCount; + } + aString = Substring(mString, mPos, amount); + mPos += amount; + *aReadCount = amount; + return NS_OK; +} + +nsresult +StringUnicharInputStream::Close() +{ + mPos = mLen; + return NS_OK; +} + +NS_IMPL_ISUPPORTS(StringUnicharInputStream, nsIUnicharInputStream) + +//---------------------------------------------------------------------- + +class UTF8InputStream final : public nsIUnicharInputStream +{ +public: + UTF8InputStream(); + nsresult Init(nsIInputStream* aStream); + + NS_DECL_ISUPPORTS + NS_DECL_NSIUNICHARINPUTSTREAM + +private: + ~UTF8InputStream(); + +protected: + int32_t Fill(nsresult* aErrorCode); + + static void CountValidUTF8Bytes(const char* aBuf, uint32_t aMaxBytes, + uint32_t& aValidUTF8bytes, + uint32_t& aValidUTF16CodeUnits); + + nsCOMPtr<nsIInputStream> mInput; + FallibleTArray<char> mByteData; + FallibleTArray<char16_t> mUnicharData; + + uint32_t mByteDataOffset; + uint32_t mUnicharDataOffset; + uint32_t mUnicharDataLength; +}; + +UTF8InputStream::UTF8InputStream() : + mByteDataOffset(0), + mUnicharDataOffset(0), + mUnicharDataLength(0) +{ +} + +nsresult +UTF8InputStream::Init(nsIInputStream* aStream) +{ + if (!mByteData.SetCapacity(STRING_BUFFER_SIZE, mozilla::fallible) || + !mUnicharData.SetCapacity(STRING_BUFFER_SIZE, mozilla::fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + mInput = aStream; + + return NS_OK; +} + +NS_IMPL_ISUPPORTS(UTF8InputStream, nsIUnicharInputStream) + +UTF8InputStream::~UTF8InputStream() +{ + Close(); +} + +nsresult +UTF8InputStream::Close() +{ + mInput = nullptr; + mByteData.Clear(); + mUnicharData.Clear(); + return NS_OK; +} + +nsresult +UTF8InputStream::Read(char16_t* aBuf, uint32_t aCount, uint32_t* aReadCount) +{ + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); + uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; + nsresult errorCode; + if (0 == readCount) { + // Fill the unichar buffer + int32_t bytesRead = Fill(&errorCode); + if (bytesRead <= 0) { + *aReadCount = 0; + return errorCode; + } + readCount = bytesRead; + } + if (readCount > aCount) { + readCount = aCount; + } + memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset, + readCount * sizeof(char16_t)); + mUnicharDataOffset += readCount; + *aReadCount = readCount; + return NS_OK; +} + +NS_IMETHODIMP +UTF8InputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, + void* aClosure, + uint32_t aCount, uint32_t* aReadCount) +{ + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); + uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset; + nsresult rv = NS_OK; + if (0 == bytesToWrite) { + // Fill the unichar buffer + int32_t bytesRead = Fill(&rv); + if (bytesRead <= 0) { + *aReadCount = 0; + return rv; + } + bytesToWrite = bytesRead; + } + + if (bytesToWrite > aCount) { + bytesToWrite = aCount; + } + + uint32_t bytesWritten; + uint32_t totalBytesWritten = 0; + + while (bytesToWrite) { + rv = aWriter(this, aClosure, + mUnicharData.Elements() + mUnicharDataOffset, + totalBytesWritten, bytesToWrite, &bytesWritten); + + if (NS_FAILED(rv)) { + // don't propagate errors to the caller + break; + } + + bytesToWrite -= bytesWritten; + totalBytesWritten += bytesWritten; + mUnicharDataOffset += bytesWritten; + } + + *aReadCount = totalBytesWritten; + + return NS_OK; +} + +NS_IMETHODIMP +UTF8InputStream::ReadString(uint32_t aCount, nsAString& aString, + uint32_t* aReadCount) +{ + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); + uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; + nsresult errorCode; + if (0 == readCount) { + // Fill the unichar buffer + int32_t bytesRead = Fill(&errorCode); + if (bytesRead <= 0) { + *aReadCount = 0; + return errorCode; + } + readCount = bytesRead; + } + if (readCount > aCount) { + readCount = aCount; + } + const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset; + aString.Assign(buf, readCount); + + mUnicharDataOffset += readCount; + *aReadCount = readCount; + return NS_OK; +} + +int32_t +UTF8InputStream::Fill(nsresult* aErrorCode) +{ + if (!mInput) { + // We already closed the stream! + *aErrorCode = NS_BASE_STREAM_CLOSED; + return -1; + } + + NS_ASSERTION(mByteData.Length() >= mByteDataOffset, "unsigned madness"); + uint32_t remainder = mByteData.Length() - mByteDataOffset; + mByteDataOffset = remainder; + uint32_t nb; + *aErrorCode = NS_FillArray(mByteData, mInput, remainder, &nb); + if (nb == 0) { + // Because we assume a many to one conversion, the lingering data + // in the byte buffer must be a partial conversion + // fragment. Because we know that we have received no more new + // data to add to it, we can't convert it. Therefore, we discard + // it. + return nb; + } + NS_ASSERTION(remainder + nb == mByteData.Length(), "bad nb"); + + // Now convert as much of the byte buffer to unicode as possible + uint32_t srcLen, dstLen; + CountValidUTF8Bytes(mByteData.Elements(), remainder + nb, srcLen, dstLen); + + // the number of UCS2 characters should always be <= the number of + // UTF8 chars + NS_ASSERTION(remainder + nb >= srcLen, "cannot be longer than out buffer"); + NS_ASSERTION(dstLen <= mUnicharData.Capacity(), + "Ouch. I would overflow my buffer if I wasn't so careful."); + if (dstLen > mUnicharData.Capacity()) { + return 0; + } + + ConvertUTF8toUTF16 converter(mUnicharData.Elements()); + + nsASingleFragmentCString::const_char_iterator start = mByteData.Elements(); + nsASingleFragmentCString::const_char_iterator end = mByteData.Elements() + srcLen; + + copy_string(start, end, converter); + if (converter.Length() != dstLen) { + *aErrorCode = NS_BASE_STREAM_BAD_CONVERSION; + return -1; + } + + mUnicharDataOffset = 0; + mUnicharDataLength = dstLen; + mByteDataOffset = srcLen; + + return dstLen; +} + +void +UTF8InputStream::CountValidUTF8Bytes(const char* aBuffer, uint32_t aMaxBytes, + uint32_t& aValidUTF8bytes, + uint32_t& aValidUTF16CodeUnits) +{ + const char* c = aBuffer; + const char* end = aBuffer + aMaxBytes; + const char* lastchar = c; // pre-initialize in case of 0-length buffer + uint32_t utf16length = 0; + while (c < end && *c) { + lastchar = c; + utf16length++; + + if (UTF8traits::isASCII(*c)) { + c++; + } else if (UTF8traits::is2byte(*c)) { + c += 2; + } else if (UTF8traits::is3byte(*c)) { + c += 3; + } else if (UTF8traits::is4byte(*c)) { + c += 4; + utf16length++; // add 1 more because this will be converted to a + // surrogate pair. + } else if (UTF8traits::is5byte(*c)) { + c += 5; + } else if (UTF8traits::is6byte(*c)) { + c += 6; + } else { + NS_WARNING("Unrecognized UTF8 string in UTF8InputStream::CountValidUTF8Bytes()"); + break; // Otherwise we go into an infinite loop. But what happens now? + } + } + if (c > end) { + c = lastchar; + utf16length--; + } + + aValidUTF8bytes = c - aBuffer; + aValidUTF16CodeUnits = utf16length; +} + +nsresult +NS_NewUnicharInputStream(nsIInputStream* aStreamToWrap, + nsIUnicharInputStream** aResult) +{ + *aResult = nullptr; + + // Create converter input stream + RefPtr<UTF8InputStream> it = new UTF8InputStream(); + nsresult rv = it->Init(aStreamToWrap); + if (NS_FAILED(rv)) { + return rv; + } + + it.forget(aResult); + return NS_OK; +} |