storage/src/mozStorageSQLFunctions.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "mozilla/ArrayUtils.h"

#include "mozStorageSQLFunctions.h"
#include "nsUnicharUtils.h"
#include <algorithm>

namespace mozilla {
namespace storage {

////////////////////////////////////////////////////////////////////////////////
//// Local Helper Functions

namespace {

/**
 * Performs the LIKE comparison of a string against a pattern.  For more detail
 * see http://www.sqlite.org/lang_expr.html#like.
 *
 * @param aPatternItr
 *        An iterator at the start of the pattern to check for.
 * @param aPatternEnd
 *        An iterator at the end of the pattern to check for.
 * @param aStringItr
 *        An iterator at the start of the string to check for the pattern.
 * @param aStringEnd
 *        An iterator at the end of the string to check for the pattern.
 * @param aEscapeChar
 *        The character to use for escaping symbols in the pattern.
 * @return 1 if the pattern is found, 0 otherwise.
 */
int
likeCompare(nsAString::const_iterator aPatternItr,
            nsAString::const_iterator aPatternEnd,
            nsAString::const_iterator aStringItr,
            nsAString::const_iterator aStringEnd,
            char16_t aEscapeChar)
{
  const char16_t MATCH_ALL('%');
  const char16_t MATCH_ONE('_');

  bool lastWasEscape = false;
  while (aPatternItr != aPatternEnd) {
    /**
     * What we do in here is take a look at each character from the input
     * pattern, and do something with it.  There are 4 possibilities:
     * 1) character is an un-escaped match-all character
     * 2) character is an un-escaped match-one character
     * 3) character is an un-escaped escape character
     * 4) character is not any of the above
     */
    if (!lastWasEscape && *aPatternItr == MATCH_ALL) {
      // CASE 1
      /**
       * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a
       * MATCH_ALL character.  For each MATCH_ONE character, skip one character
       * in the pattern string.
       */
      while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) {
        if (*aPatternItr == MATCH_ONE) {
          // If we've hit the end of the string we are testing, no match
          if (aStringItr == aStringEnd)
            return 0;
          aStringItr++;
        }
        aPatternItr++;
      }

      // If we've hit the end of the pattern string, match
      if (aPatternItr == aPatternEnd)
        return 1;

      while (aStringItr != aStringEnd) {
        if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd,
                        aEscapeChar)) {
          // we've hit a match, so indicate this
          return 1;
        }
        aStringItr++;
      }

      // No match
      return 0;
    }
    else if (!lastWasEscape && *aPatternItr == MATCH_ONE) {
      // CASE 2
      if (aStringItr == aStringEnd) {
        // If we've hit the end of the string we are testing, no match
        return 0;
      }
      aStringItr++;
      lastWasEscape = false;
    }
    else if (!lastWasEscape && *aPatternItr == aEscapeChar) {
      // CASE 3
      lastWasEscape = true;
    }
    else {
      // CASE 4
      if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) {
        // If we've hit a point where the strings don't match, there is no match
        return 0;
      }
      aStringItr++;
      lastWasEscape = false;
    }

    aPatternItr++;
  }

  return aStringItr == aStringEnd;
}

/**
 * This class manages a dynamic array.  It can represent an array of any 
 * reasonable size, but if the array is "N" elements or smaller, it will be
 * stored using fixed space inside the auto array itself.  If the auto array
 * is a local variable, this internal storage will be allocated cheaply on the
 * stack, similar to nsAutoString.  If a larger size is requested, the memory
 * will be dynamically allocated from the heap.  Since the destructor will
 * free any heap-allocated memory, client code doesn't need to care where the
 * memory came from.
 */
template <class T, size_t N> class AutoArray
{

public:

  explicit AutoArray(size_t size)
  : mBuffer(size <= N ? mAutoBuffer : new T[size])
  {
  }

  ~AutoArray()
  { 
    if (mBuffer != mAutoBuffer)
      delete[] mBuffer; 
  }

  /**
   * Return the pointer to the allocated array.
   * @note If the array allocation failed, get() will return nullptr!
   *
   * @return the pointer to the allocated array
   */
  T *get() 
  {
    return mBuffer; 
  }

private:
  T *mBuffer;           // Points to mAutoBuffer if we can use it, heap otherwise.
  T mAutoBuffer[N];     // The internal memory buffer that we use if we can.
};

/**
 * Compute the Levenshtein Edit Distance between two strings.
 * 
 * @param aStringS
 *        a string
 * @param aStringT
 *        another string
 * @param _result
 *        an outparam that will receive the edit distance between the arguments
 * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc.
 */
int
levenshteinDistance(const nsAString &aStringS,
                    const nsAString &aStringT,
                    int *_result)
{
    // Set the result to a non-sensical value in case we encounter an error.
    *_result = -1;

    const uint32_t sLen = aStringS.Length();
    const uint32_t tLen = aStringT.Length();

    if (sLen == 0) {
      *_result = tLen;
      return SQLITE_OK;
    }
    if (tLen == 0) {
      *_result = sLen;
      return SQLITE_OK;
    }

    // Notionally, Levenshtein Distance is computed in a matrix.  If we 
    // assume s = "span" and t = "spam", the matrix would look like this:
    //    s -->
    //  t          s   p   a   n
    //  |      0   1   2   3   4
    //  V  s   1   *   *   *   *
    //     p   2   *   *   *   *
    //     a   3   *   *   *   *
    //     m   4   *   *   *   *
    //
    // Note that the row width is sLen + 1 and the column height is tLen + 1,
    // where sLen is the length of the string "s" and tLen is the length of "t".
    // The first row and the first column are initialized as shown, and
    // the algorithm computes the remaining cells row-by-row, and
    // left-to-right within each row.  The computation only requires that
    // we be able to see the current row and the previous one.

    // Allocate memory for two rows.  Use AutoArray's to manage the memory
    // so we don't have to explicitly free it, and so we can avoid the expense
    // of memory allocations for relatively small strings.
    AutoArray<int, nsAutoString::kDefaultStorageSize> row1(sLen + 1);
    AutoArray<int, nsAutoString::kDefaultStorageSize> row2(sLen + 1);

    // Declare the raw pointers that will actually be used to access the memory.
    int *prevRow = row1.get();
    NS_ENSURE_TRUE(prevRow, SQLITE_NOMEM);
    int *currRow = row2.get();
    NS_ENSURE_TRUE(currRow, SQLITE_NOMEM);

    // Initialize the first row.
    for (uint32_t i = 0; i <= sLen; i++)
        prevRow[i] = i;

    const char16_t *s = aStringS.BeginReading();
    const char16_t *t = aStringT.BeginReading();

    // Compute the empty cells in the "matrix" row-by-row, starting with
    // the second row.
    for (uint32_t ti = 1; ti <= tLen; ti++) {

        // Initialize the first cell in this row.
        currRow[0] = ti;

        // Get the character from "t" that corresponds to this row.
        const char16_t tch = t[ti - 1];

        // Compute the remaining cells in this row, left-to-right,
        // starting at the second column (and first character of "s").
        for (uint32_t si = 1; si <= sLen; si++) {
            
            // Get the character from "s" that corresponds to this column,
            // compare it to the t-character, and compute the "cost".
            const char16_t sch = s[si - 1];
            int cost = (sch == tch) ? 0 : 1;

            // ............ We want to calculate the value of cell "d" from
            // ...ab....... the previously calculated (or initialized) cells
            // ...cd....... "a", "b", and "c", where d = min(a', b', c').
            // ............ 
            int aPrime = prevRow[si - 1] + cost;
            int bPrime = prevRow[si] + 1;
            int cPrime = currRow[si - 1] + 1;
            currRow[si] = std::min(aPrime, std::min(bPrime, cPrime));
        }

        // Advance to the next row.  The current row becomes the previous
        // row and we recycle the old previous row as the new current row.
        // We don't need to re-initialize the new current row since we will
        // rewrite all of its cells anyway.
        int *oldPrevRow = prevRow;
        prevRow = currRow;
        currRow = oldPrevRow;
    }

    // The final result is the value of the last cell in the last row.
    // Note that that's now in the "previous" row, since we just swapped them.
    *_result = prevRow[sLen];
    return SQLITE_OK;
}

// This struct is used only by registerFunctions below, but ISO C++98 forbids
// instantiating a template dependent on a locally-defined type.  Boo-urns!
struct Functions {
  const char *zName;
  int nArg;
  int enc;
  void *pContext;
  void (*xFunc)(::sqlite3_context*, int, sqlite3_value**);
};

} // anonymous namespace

////////////////////////////////////////////////////////////////////////////////
//// Exposed Functions

int
registerFunctions(sqlite3 *aDB)
{
  Functions functions[] = {
    {"lower",               
      1, 
      SQLITE_UTF16, 
      0,        
      caseFunction},
    {"lower",               
      1, 
      SQLITE_UTF8,  
      0,        
      caseFunction},
    {"upper",               
      1, 
      SQLITE_UTF16, 
      (void*)1, 
      caseFunction},
    {"upper",               
      1, 
      SQLITE_UTF8,  
      (void*)1, 
      caseFunction},

    {"like",                
      2, 
      SQLITE_UTF16, 
      0,        
      likeFunction},
    {"like",                
      2, 
      SQLITE_UTF8,  
      0,        
      likeFunction},
    {"like",                
      3, 
      SQLITE_UTF16, 
      0,        
      likeFunction},
    {"like",                
      3, 
      SQLITE_UTF8,  
      0,        
      likeFunction},

    {"levenshteinDistance", 
      2, 
      SQLITE_UTF16, 
      0,        
      levenshteinDistanceFunction},
    {"levenshteinDistance", 
      2, 
      SQLITE_UTF8,  
      0,        
      levenshteinDistanceFunction},
  };

  int rv = SQLITE_OK;
  for (size_t i = 0; SQLITE_OK == rv && i < ArrayLength(functions); ++i) {
    struct Functions *p = &functions[i];
    rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext,
                                   p->xFunc, nullptr, nullptr);
  }

  return rv;
}

////////////////////////////////////////////////////////////////////////////////
//// SQL Functions

void
caseFunction(sqlite3_context *aCtx,
             int aArgc,
             sqlite3_value **aArgv)
{
  NS_ASSERTION(1 == aArgc, "Invalid number of arguments!");

  nsAutoString data(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])));
  bool toUpper = ::sqlite3_user_data(aCtx) ? true : false;

  if (toUpper)
    ::ToUpperCase(data);
  else
    ::ToLowerCase(data);

  // Set the result.
  ::sqlite3_result_text16(aCtx, data.get(), -1, SQLITE_TRANSIENT);
}

/**
 * This implements the like() SQL function.  This is used by the LIKE operator.
 * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is
 * an escape character, say E, it is implemented as 'like(B, A, E)'.
 */
void
likeFunction(sqlite3_context *aCtx,
             int aArgc,
             sqlite3_value **aArgv)
{
  NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!");

  if (::sqlite3_value_bytes(aArgv[0]) > SQLITE_MAX_LIKE_PATTERN_LENGTH) {
    ::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex",
                           SQLITE_TOOBIG);
    return;
  }

  if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1]))
    return;

  nsDependentString A(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1])));
  nsDependentString B(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])));
  NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!");

  char16_t E = 0;
  if (3 == aArgc)
    E = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[2]))[0];

  nsAString::const_iterator itrString, endString;
  A.BeginReading(itrString);
  A.EndReading(endString);
  nsAString::const_iterator itrPattern, endPattern;
  B.BeginReading(itrPattern);
  B.EndReading(endPattern);
  ::sqlite3_result_int(aCtx, likeCompare(itrPattern, endPattern, itrString,
                                         endString, E));
}

void levenshteinDistanceFunction(sqlite3_context *aCtx,
                                 int aArgc,
                                 sqlite3_value **aArgv)
{
  NS_ASSERTION(2 == aArgc, "Invalid number of arguments!");

  // If either argument is a SQL NULL, then return SQL NULL.
  if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL ||
      ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) {
    ::sqlite3_result_null(aCtx);
    return;
  }

  int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t);
  const char16_t *a = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0]));

  int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t);
  const char16_t *b = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1]));

  // Compute the Levenshtein Distance, and return the result (or error).
  int distance = -1;
  const nsDependentString A(a, aLen);
  const nsDependentString B(b, bLen);
  int status = levenshteinDistance(A, B, &distance);
  if (status == SQLITE_OK) {
    ::sqlite3_result_int(aCtx, distance);    
  }
  else if (status == SQLITE_NOMEM) {
    ::sqlite3_result_error_nomem(aCtx);
  }
  else {
    ::sqlite3_result_error(aCtx, "User function returned error code", -1);
  }
}

} // namespace storage
} // namespace mozilla