summaryrefslogtreecommitdiff
path: root/intl/unicharutil/nsUnicodeNormalizer.cpp
blob: 6f6c3d2dd5ceaadea6a48b5c049ad2d1c8b053aa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsUnicodeNormalizer.h"
#include "ICUUtils.h"
#include "unicode/unorm2.h"
#include "unicode/utext.h"

NS_IMPL_ISUPPORTS(nsUnicodeNormalizer, nsIUnicodeNormalizer)

nsUnicodeNormalizer::nsUnicodeNormalizer()
{
}

nsUnicodeNormalizer::~nsUnicodeNormalizer()
{
}

static nsresult
DoNormalization(const UNormalizer2* aNorm, const nsAString& aSrc,
                nsAString& aDest)
{
  UErrorCode errorCode = U_ZERO_ERROR;
  const int32_t length = aSrc.Length();
  const UChar* src = reinterpret_cast<const UChar*>(aSrc.BeginReading());
  // Initial guess for a capacity that is likely to be enough for most cases.
  int32_t capacity = length + (length >> 8) + 8;
  do {
    aDest.SetLength(capacity);
    UChar* dest = reinterpret_cast<UChar*>(aDest.BeginWriting());
    int32_t len = unorm2_normalize(aNorm, src, aSrc.Length(), dest, capacity,
                                   &errorCode);
    if (U_SUCCESS(errorCode)) {
      aDest.SetLength(len);
      break;
    }
    if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
      // Buffer wasn't big enough; adjust to the reported size and try again.
      capacity = len;
      errorCode = U_ZERO_ERROR;
      continue;
    }
  } while (false);
  return ICUUtils::UErrorToNsResult(errorCode);
}

nsresult
nsUnicodeNormalizer::NormalizeUnicodeNFD(const nsAString& aSrc,
                                         nsAString& aDest)
{
  // The unorm2_getNF*Instance functions return static singletons that should
  // not be deleted, so we just get them once on first use.
  static UErrorCode errorCode = U_ZERO_ERROR;
  static const UNormalizer2* norm = unorm2_getNFDInstance(&errorCode);
  if (U_SUCCESS(errorCode)) {
    return DoNormalization(norm, aSrc, aDest);
  }
  return ICUUtils::UErrorToNsResult(errorCode);
}

nsresult
nsUnicodeNormalizer::NormalizeUnicodeNFC(const nsAString& aSrc,
                                         nsAString& aDest)
{
  static UErrorCode errorCode = U_ZERO_ERROR;
  static const UNormalizer2* norm = unorm2_getNFCInstance(&errorCode);
  if (U_SUCCESS(errorCode)) {
    return DoNormalization(norm, aSrc, aDest);
  }
  return ICUUtils::UErrorToNsResult(errorCode);
}

nsresult
nsUnicodeNormalizer::NormalizeUnicodeNFKD(const nsAString& aSrc,
                                          nsAString& aDest)
{
  static UErrorCode errorCode = U_ZERO_ERROR;
  static const UNormalizer2* norm = unorm2_getNFKDInstance(&errorCode);
  if (U_SUCCESS(errorCode)) {
    return DoNormalization(norm, aSrc, aDest);
  }
  return ICUUtils::UErrorToNsResult(errorCode);
}

nsresult
nsUnicodeNormalizer::NormalizeUnicodeNFKC(const nsAString& aSrc,
                                          nsAString& aDest)
{
  static UErrorCode errorCode = U_ZERO_ERROR;
  static const UNormalizer2* norm = unorm2_getNFKCInstance(&errorCode);
  if (U_SUCCESS(errorCode)) {
    return DoNormalization(norm, aSrc, aDest);
  }
  return ICUUtils::UErrorToNsResult(errorCode);
}