1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef INCREMENTAL_TOKENIZER_H__
#define INCREMENTAL_TOKENIZER_H__
#include "mozilla/Tokenizer.h"
#include "nsError.h"
#include <functional>
class nsIInputStream;
namespace mozilla {
class IncrementalTokenizer : public TokenizerBase
{
public:
/**
* The consumer callback. The function is called for every single token
* as found in the input. Failure result returned by this callback stops
* the tokenization immediately and bubbles to result of Feed/FinishInput.
*
* Fragment()s of consumed tokens are ensured to remain valid until next call to
* Feed/FinishInput and are pointing to a single linear buffer. Hence, those can
* be safely used to accumulate the data for processing after Feed/FinishInput
* returned.
*/
typedef std::function<nsresult(Token const&, IncrementalTokenizer& i)> Consumer;
/**
* For aWhitespaces and aAdditionalWordChars arguments see TokenizerBase.
*
* @param aConsumer
* A mandatory non-null argument, a function that consumes the tokens as they
* come when the tokenizer is fed.
* @param aRawMinBuffered
* When we have buffered at least aRawMinBuffered data, but there was no custom
* token found so far because of too small incremental feed chunks, deliver
* the raw data to preserve streaming and to save memory. This only has effect
* in OnlyCustomTokenizing mode.
*/
explicit IncrementalTokenizer(Consumer aConsumer,
const char* aWhitespaces = nullptr,
const char* aAdditionalWordChars = nullptr,
uint32_t aRawMinBuffered = 1024);
/**
* Pushes the input to be tokenized. These directly call the Consumer callback
* on every found token. Result of the Consumer callback is returned here.
*
* The tokenizer must be initialized with a valid consumer prior call to these
* methods. It's not allowed to call Feed/FinishInput from inside the Consumer
* callback.
*/
nsresult FeedInput(const nsACString& aInput);
nsresult FeedInput(nsIInputStream* aInput, uint32_t aCount);
nsresult FinishInput();
/**
* Can only be called from inside the consumer callback.
*
* When there is still anything to read from the input, tokenize it, store
* the token type and value to aToken result and shift the cursor past this
* just parsed token. Each call to Next() reads another token from
* the input and shifts the cursor.
*
* Returns false if there is not enough data to deterministically recognize
* tokens or when the last returned token was EOF.
*/
[[nodiscard]] bool Next(Token& aToken);
/**
* Can only be called from inside the consumer callback.
*
* Tells the tokenizer to revert the cursor and stop the async parsing until
* next feed of the input. This is useful when more than one token is needed
* to decide on the syntax but there is not enough input to get a next token
* (Next() returned false.)
*/
void NeedMoreInput();
/**
* Can only be called from inside the consumer callback.
*
* This makes the consumer callback be called again while parsing
* the input at the previous cursor position again. This is useful when
* the tokenizer state (custom tokens, tokenization mode) has changed and
* we want to re-parse the input again.
*/
void Rollback();
private:
// Loops over the input with TokenizerBase::Parse and calls the Consumer callback.
nsresult Process();
#ifdef DEBUG
// True when inside the consumer callback, used only for assertions.
bool mConsuming;
#endif // DEBUG
// Modifyable only from the Consumer callback, tells the parser to break, rollback
// and wait for more input.
bool mNeedMoreInput;
// Modifyable only from the Consumer callback, tells the parser to rollback and
// parse the input again, with (if modified) new settings of the tokenizer.
bool mRollback;
// The input buffer. Updated with each call to Feed/FinishInput.
nsCString mInput;
// Numerical index pointing at the current cursor position. We don't keep direct
// reference to the string buffer since the buffer gets often reallocated.
nsCString::index_type mInputCursor;
// Refernce to the consumer function.
Consumer mConsumer;
};
} // mozilla
#endif
|