summaryrefslogtreecommitdiff
path: root/dom/base/nsIDocumentEncoder.idl
blob: 4be2c108c8c919c6270279018708160cb28b55a6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsISupports.idl"

interface nsIDOMDocument;
interface nsIDOMRange;
interface nsISelection;
interface nsIDOMNode;
interface nsIOutputStream;

%{ C++
class nsINode;
class nsIDocument;
%}
[ptr] native nsINodePtr(nsINode);
[ptr] native nsIDocumentPtr(nsIDocument);

[scriptable, uuid(3d9371d8-a2ad-403e-8b0e-8885ad3562e3)]
interface nsIDocumentEncoderNodeFixup : nsISupports
{
  /**
   * Create a fixed up version of a node. This method is called before
   * each node in a document is about to be persisted. The implementor
   * may return a new node with fixed up attributes or null. If null is
   * returned the node should be used as-is.
   * @param aNode Node to fixup.
   * @param [OUT] aSerializeCloneKids True if the document encoder should
   * apply recursive serialization to the children of the fixed up node
   * instead of the children of the original node.
   * @return The resulting fixed up node.
   */
  nsIDOMNode fixupNode(in nsIDOMNode aNode, out boolean aSerializeCloneKids);
};

[scriptable, uuid(21f112df-d96f-47da-bfcb-5331273003d1)]
interface nsIDocumentEncoder : nsISupports
{
  // Output methods flag bits. There are a frightening number of these,
  // because everyone wants something a little bit different
   

  /** 
   * Output only the selection (as opposed to the whole document).
   */
  const unsigned long OutputSelectionOnly = (1 << 0);

  /** Plaintext output: Convert html to plaintext that looks like the html.
    * Implies wrap (except inside <pre>), since html wraps.
    * HTML, XHTML and XML output: do prettyprinting, ignoring existing formatting.
    * XML output : it doesn't implicitly wrap
    */
  const unsigned long OutputFormatted     = (1 << 1);

  /** Don't do prettyprinting. Don't do any wrapping that's not in the existing
   * HTML/XML source. This option overrides OutputFormatted if both are set.
   * HTML/XHTML output: If neither are set, there won't be prettyprinting too, but
   * long lines will be wrapped.
   * Supported also in XML and Plaintext output.
   * @note This option does not affect entity conversion.
   */
  const unsigned long OutputRaw           = (1 << 2);

  /** 
   * Do not print html head tags.
   * XHTML/HTML output only.
   */
  const unsigned long OutputBodyOnly      = (1 << 3);

  /**
   * Output as though the content is preformatted
   * (e.g. maybe it's wrapped in a PRE or PRE_WRAP style tag)
   * Plaintext output only.
   * XXXbz How does this interact with
   * OutputFormatted/OutputRaw/OutputPreformatted/OutputFormatFlowed?
   */
  const unsigned long OutputPreformatted  = (1 << 4);

  /**
   * Wrap even if we're not doing formatted output (e.g. for text fields).
   * Supported in XML, XHTML, HTML and Plaintext output.
   * Set implicitly in HTML/XHTML output when no OutputRaw.
   * Ignored when OutputRaw.
   * XXXLJ: set implicitly in HTML/XHTML output, to keep compatible behaviors
   *        for old callers of this interface
   * XXXbz How does this interact with OutputFormatFlowed?
   */
  const unsigned long OutputWrap          = (1 << 5);

  /**
   * Output for format flowed (RFC 2646). This is used when converting
   * to text for mail sending. This differs just slightly
   * but in an important way from normal formatted, and that is that
   * lines are space stuffed. This can't (correctly) be done later.
   * PlainText output only.
   * XXXbz How does this interact with
   * OutputFormatted/OutputRaw/OutputPreformatted/OutputWrap?
   */
  const unsigned long OutputFormatFlowed  = (1 << 6);

  /**
   * Convert links, image src, and script src to absolute URLs when possible.
   * XHTML/HTML output only.
   */
  const unsigned long OutputAbsoluteLinks = (1 << 7);

  /**
   * Attempt to encode entities standardized at W3C (HTML, MathML, etc).
   * This is a catch-all flag for documents with mixed contents. Beware of
   * interoperability issues. See below for other flags which might likely
   * do what you want.
   * HTML output only.
   */
  const unsigned long OutputEncodeW3CEntities = (1 << 8);

  /** 
   * LineBreak processing: if this flag is set than CR line breaks will
   * be written. If neither this nor OutputLFLineBreak is set, then we
   * will use platform line breaks. The combination of the two flags will
   * cause CRLF line breaks to be written.
   */
  const unsigned long OutputCRLineBreak = (1 << 9);

  /** 
   * LineBreak processing: if this flag is set than LF line breaks will
   * be written. If neither this nor OutputCRLineBreak is set, then we
   * will use platform line breaks. The combination of the two flags will
   * cause CRLF line breaks to be written.
   */
  const unsigned long OutputLFLineBreak = (1 << 10);

  /**
   * Output the content of noscript elements (only for serializing
   * to plaintext).
   */
  const unsigned long OutputNoScriptContent = (1 << 11);

  /**
   * Output the content of noframes elements (only for serializing
   * to plaintext). (Used only internally in the plain text serializer;
   * ignored if passed by the caller.)
   */
  const unsigned long OutputNoFramesContent = (1 << 12);

  /**
   * Don't allow any formatting nodes (e.g. <br>, <b>) inside a <pre>.
   * This is used primarily by mail. XHTML/HTML output only.
   */
  const unsigned long OutputNoFormattingInPre = (1 << 13);

  /**
   * Encode entities when outputting to a string.
   * E.g. If set, we'll output &nbsp; if clear, we'll output 0xa0.
   * The basic set is just &nbsp; &amp; &lt; &gt; &quot; for interoperability
   * with older products that don't support &alpha; and friends.
   * HTML output only.
   */
  const unsigned long OutputEncodeBasicEntities = (1 << 14);
    
  /**
   * Encode entities when outputting to a string.
   * The Latin1 entity set additionally includes 8bit accented letters
   * between 128 and 255.
   * HTML output only.
   */
  const unsigned long OutputEncodeLatin1Entities = (1 << 15);
  
  /**
   * Encode entities when outputting to a string.
   * The HTML entity set additionally includes accented letters, greek
   * letters, and other special markup symbols as defined in HTML4.
   * HTML output only.
   */
  const unsigned long OutputEncodeHTMLEntities = (1 << 16);

  /**
   * Normally &nbsp; is replaced with a space character when
   * encoding data as plain text, set this flag if that's
   * not desired.
   * Plaintext output only.
   */
  const unsigned long OutputPersistNBSP = (1 << 17);

  /**
   * Normally when serializing the whole document using the HTML or 
   * XHTML serializer, the encoding declaration is rewritten to match.
   * This flag suppresses that behavior.
   */
  const unsigned long OutputDontRewriteEncodingDeclaration = (1 << 18);
 
  /**
   * When using the HTML or XHTML serializer, skip elements that are not
   * visible when this flag is set.  Elements are not visible when they
   * have CSS style display:none or visibility:collapse, for example.
   */
  const unsigned long SkipInvisibleContent = (1 << 19);
  
  /**
   * Output for delsp=yes (RFC 3676). This is used with OutputFormatFlowed
   * when converting to text for mail sending.
   * PlainText output only.
   */
  const unsigned long OutputFormatDelSp  = (1 << 20);
 
  /**
   * Drop <br> elements considered "invisible" by the editor. OutputPreformatted
   * implies this flag.
   */
  const unsigned long OutputDropInvisibleBreak = (1 << 21);

  /**
   * Don't check for _moz_dirty attributes when deciding whether to
   * pretty-print if this flag is set (bug 599983).
   */
  const unsigned long OutputIgnoreMozDirty = (1 << 22);

  /**
   * Output the content of non-text elements as the placehodler character
   * U+FFFC (OBJECT REPLACEMENT CHARACTER, only for serializing to plaintext).
   */
  const unsigned long OutputNonTextContentAsPlaceholder = (1 << 23);

  /**
   * Don't Strip ending spaces from a line (only for serializing to plaintext).
   */
  const unsigned long OutputDontRemoveLineEndingSpaces = (1 << 24);

  /**
   * Serialize in a way that is suitable for copying a plaintext version of the
   * document to the clipboard.  This can for example cause line endings to be
   * injected at preformatted block element boundaries.
   */
  const unsigned long OutputForPlainTextClipboardCopy = (1 << 25);

  /**
   * Include ruby annotations and ruby parentheses in the output.
   * PlainText output only.
   */
  const unsigned long OutputRubyAnnotation = (1 << 26);

  /**
   * Disallow breaking of long character strings. This is important
   * for serializing e-mail which contains CJK strings. These must
   * not be broken just as "normal" longs strings aren't broken.
   */
  const unsigned long OutputDisallowLineBreaking = (1 << 27);

  /**
   * Initialize with a pointer to the document and the mime type.
   * @param aDocument Document to encode.
   * @param aMimeType MimeType to use. May also be set by SetMimeType.
   * @param aFlags Flags to use while encoding. May also be set by SetFlags.
   */
  void init(in nsIDOMDocument aDocument,
            in AString aMimeType,
            in unsigned long aFlags);
  [noscript] void nativeInit(in nsIDocumentPtr aDocument,
                             in AString aMimeType,
                             in unsigned long aFlags);

  /**
   *  If the selection is set to a non-null value, then the
   *  selection is used for encoding, otherwise the entire
   *  document is encoded.
   * @param aSelection The selection to encode.
   */
  void setSelection(in nsISelection aSelection);

  /**
   *  If the range is set to a non-null value, then the
   *  range is used for encoding, otherwise the entire
   *  document or selection is encoded.
   * @param aRange The range to encode.
   */
  void setRange(in nsIDOMRange aRange);

  /**
   *  If the node is set to a non-null value, then the
   *  node is used for encoding, otherwise the entire
   *  document or range or selection is encoded.
   * @param aNode The node to encode.
   */
  void setNode(in nsIDOMNode aNode);
  [noscript] void setNativeNode(in nsINodePtr aNode);

  /**
   *  If the container is set to a non-null value, then its
   *  child nodes are used for encoding, otherwise the entire
   *  document or range or selection or node is encoded.
   *  @param aContainer The node which child nodes will be encoded.
   */
  void setContainerNode(in nsIDOMNode aContainer);
  [noscript] void setNativeContainerNode(in nsINodePtr aContainer);

  /**
   *  Documents typically have an intrinsic character set,
   *  but if no intrinsic value is found, the platform character set
   *  is used. This function overrides both the intrinisc and platform
   *  charset.
   *  @param aCharset Overrides the both the intrinsic or platform
   *  character set when encoding the document.
   *
   *  Possible result codes: NS_ERROR_NO_CHARSET_CONVERTER
   */
  void setCharset(in ACString aCharset);

  /**
   *  Set a wrap column.  This may have no effect in some types of encoders.
   * @param aWrapColumn Column to which to wrap.
   */
  void setWrapColumn(in unsigned long aWrapColumn);

  /**
   *  The mime type preferred by the encoder.  This piece of api was
   *  added because the copy encoder may need to switch mime types on you
   *  if you ask it to copy html that really represents plaintext content.
   *  Call this AFTER Init() and SetSelection() have both been called.
   */
  readonly attribute AString mimeType;
  
  /**
   *  Encode the document and send the result to the nsIOutputStream.
   *
   *  Possible result codes are the stream errors which might have
   *  been encountered.
   * @param aStream Stream into which to encode.
   */
  void encodeToStream(in nsIOutputStream aStream);

  /**
   * Encode the document into a string.
   *
   * @return The document encoded into a string.
   */
  AString encodeToString();

  /**
   * Encode the document into a string. Stores the extra context information
   * into the two arguments.
   * @param [OUT] aContextString The string where the parent hierarchy
   *              information will be stored.
   * @param [OUT] aInfoString The string where extra context info will
   *              be stored.
   * @return The document encoded as a string.
   * 
   */
  AString encodeToStringWithContext( out AString aContextString,
                                     out AString aInfoString);

  /**
   * Encode the document into a string of limited size.
   * @param aMaxLength After aMaxLength characters, the encoder will stop
   *                   encoding new data.
   *                   Only values > 0 will be considered.
   *                   The returned string may be slightly larger than
   *                   aMaxLength because some serializers (eg. HTML)
   *                   may need to close some tags after they stop
   *                   encoding new data, or finish a line (72 columns
   *                   by default for the plain text serializer).
   *
   * @return The document encoded into a string.
   */
  AString encodeToStringWithMaxLength(in unsigned long aMaxLength);

  /**
   * Set the fixup object associated with node persistence.
   * @param aFixup The fixup object.
   */
  void setNodeFixup(in nsIDocumentEncoderNodeFixup aFixup);
};