summaryrefslogtreecommitdiff
path: root/dom/encoding/EncodingUtils.h
blob: 5bfb254998c1f9e19be3aeead5e9a68382833724 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef mozilla_dom_encodingutils_h_
#define mozilla_dom_encodingutils_h_

#include "nsDataHashtable.h"
#include "nsString.h"

class nsIUnicodeDecoder;
class nsIUnicodeEncoder;

namespace mozilla {
namespace dom {

class EncodingUtils
{
public:

  /**
   * Implements get an encoding algorithm from Encoding spec.
   * http://encoding.spec.whatwg.org/#concept-encoding-get
   * Given a label, this function returns the corresponding encoding or a
   * false.
   * The returned name may not be lowercased due to compatibility with
   * our internal implementations.
   *
   * @param      aLabel, incoming label describing charset to be decoded.
   * @param      aOutEncoding, returning corresponding encoding for label.
   * @return     false if no encoding was found for label.
   *             true if valid encoding found.
   */
  static bool FindEncodingForLabel(const nsACString& aLabel,
                                   nsACString& aOutEncoding);

  static bool FindEncodingForLabel(const nsAString& aLabel,
                                   nsACString& aOutEncoding)
  {
    return FindEncodingForLabel(NS_ConvertUTF16toUTF8(aLabel), aOutEncoding);
  }

  /**
   * Like FindEncodingForLabel() except labels that map to "replacement"
   * are treated as unknown.
   *
   * @param      aLabel, incoming label describing charset to be decoded.
   * @param      aOutEncoding, returning corresponding encoding for label.
   * @return     false if no encoding was found for label.
   *             true if valid encoding found.
   */
  static bool FindEncodingForLabelNoReplacement(const nsACString& aLabel,
                                                nsACString& aOutEncoding);

  static bool FindEncodingForLabelNoReplacement(const nsAString& aLabel,
                                                nsACString& aOutEncoding)
  {
    return FindEncodingForLabelNoReplacement(NS_ConvertUTF16toUTF8(aLabel),
                                             aOutEncoding);
  }

  /**
   * Remove any leading and trailing space characters, following the
   * definition of space characters from Encoding spec.
   * http://encoding.spec.whatwg.org/#terminology
   * Note that nsAString::StripWhitespace() doesn't exactly match the
   * definition. It also removes all matching chars in the string,
   * not just leading and trailing.
   *
   * @param      aString, string to be trimmed.
   */
  template<class T>
  static void TrimSpaceCharacters(T& aString)
  {
    aString.Trim(" \t\n\f\r");
  }

  /**
   * Check is the encoding is ASCII-compatible in the sense that Basic Latin
   * encodes to ASCII bytes. (The reverse may not be true!)
   *
   * @param aPreferredName a preferred encoding label
   * @return whether the encoding is ASCII-compatible
   */
  static bool IsAsciiCompatible(const nsACString& aPreferredName);

  /**
   * Instantiates a decoder for an encoding. The input must be a
   * Gecko-canonical encoding name.
   * @param aEncoding a Gecko-canonical encoding name
   * @return a decoder
   */
  static already_AddRefed<nsIUnicodeDecoder>
  DecoderForEncoding(const char* aEncoding)
  {
    nsDependentCString encoding(aEncoding);
    return DecoderForEncoding(encoding);
  }

  /**
   * Instantiates a decoder for an encoding. The input must be a
   * Gecko-canonical encoding name
   * @param aEncoding a Gecko-canonical encoding name
   * @return a decoder
   */
  static already_AddRefed<nsIUnicodeDecoder>
  DecoderForEncoding(const nsACString& aEncoding);

  /**
   * Instantiates an encoder for an encoding. The input must be a
   * Gecko-canonical encoding name.
   * @param aEncoding a Gecko-canonical encoding name
   * @return an encoder
   */
  static already_AddRefed<nsIUnicodeEncoder>
  EncoderForEncoding(const char* aEncoding)
  {
    nsDependentCString encoding(aEncoding);
    return EncoderForEncoding(encoding);
  }

  /**
   * Instantiates an encoder for an encoding. The input must be a
   * Gecko-canonical encoding name.
   * @param aEncoding a Gecko-canonical encoding name
   * @return an encoder
   */
  static already_AddRefed<nsIUnicodeEncoder>
  EncoderForEncoding(const nsACString& aEncoding);

  /**
   * Finds a Gecko language group string (e.g. x-western) for a Gecko-canonical
   * encoding name.
   *
   * @param      aEncoding, incoming label describing charset to be decoded.
   * @param      aOutGroup, returning corresponding language group.
   */
  static void LangGroupForEncoding(const nsACString& aEncoding,
                                   nsACString& aOutGroup);

private:
  EncodingUtils() = delete;
};

} // namespace dom
} // namespace mozilla

#endif // mozilla_dom_encodingutils_h_