1/*
2** 2012 April 10
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12**
13** This module implements the spellfix1 VIRTUAL TABLE that can be used
14** to search a large vocabulary for close matches. See separate
15** documentation (http://www.sqlite.org/spellfix1.html) for details.
16*/
17#include "sqlite3ext.h"
18SQLITE_EXTENSION_INIT1
19
20#ifndef SQLITE_AMALGAMATION
21# if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
22# define NDEBUG 1
23# endif
24# if defined(NDEBUG) && defined(SQLITE_DEBUG)
25# undef NDEBUG
26# endif
27# include <string.h>
28# include <stdio.h>
29# include <stdlib.h>
30# include <assert.h>
31# define ALWAYS(X) 1
32# define NEVER(X) 0
33 typedef unsigned char u8;
34 typedef unsigned short u16;
35#endif
36#include <ctype.h>
37
38#ifndef SQLITE_OMIT_VIRTUALTABLE
39
40/*
41** Character classes for ASCII characters:
42**
43** 0 '' Silent letters: H W
44** 1 'A' Any vowel: A E I O U (Y)
45** 2 'B' A bilabeal stop or fricative: B F P V W
46** 3 'C' Other fricatives or back stops: C G J K Q S X Z
47** 4 'D' Alveolar stops: D T
48** 5 'H' Letter H at the beginning of a word
49** 6 'L' Glide: L
50** 7 'R' Semivowel: R
51** 8 'M' Nasals: M N
52** 9 'Y' Letter Y at the beginning of a word.
53** 10 '9' Digits: 0 1 2 3 4 5 6 7 8 9
54** 11 ' ' White space
55** 12 '?' Other.
56*/
57#define CCLASS_SILENT 0
58#define CCLASS_VOWEL 1
59#define CCLASS_B 2
60#define CCLASS_C 3
61#define CCLASS_D 4
62#define CCLASS_H 5
63#define CCLASS_L 6
64#define CCLASS_R 7
65#define CCLASS_M 8
66#define CCLASS_Y 9
67#define CCLASS_DIGIT 10
68#define CCLASS_SPACE 11
69#define CCLASS_OTHER 12
70
71/*
72** The following table gives the character class for non-initial ASCII
73** characters.
74*/
75static const unsigned char midClass[] = {
76 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
77 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
78 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
79 /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
80 /* */ CCLASS_SPACE, /* */ CCLASS_SPACE, /* */ CCLASS_OTHER,
81 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
82 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
83 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
84 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
85 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
86 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_SPACE,
87 /* ! */ CCLASS_OTHER, /* " */ CCLASS_OTHER, /* # */ CCLASS_OTHER,
88 /* $ */ CCLASS_OTHER, /* % */ CCLASS_OTHER, /* & */ CCLASS_OTHER,
89 /* ' */ CCLASS_SILENT, /* ( */ CCLASS_OTHER, /* ) */ CCLASS_OTHER,
90 /* * */ CCLASS_OTHER, /* + */ CCLASS_OTHER, /* , */ CCLASS_OTHER,
91 /* - */ CCLASS_OTHER, /* . */ CCLASS_OTHER, /* / */ CCLASS_OTHER,
92 /* 0 */ CCLASS_DIGIT, /* 1 */ CCLASS_DIGIT, /* 2 */ CCLASS_DIGIT,
93 /* 3 */ CCLASS_DIGIT, /* 4 */ CCLASS_DIGIT, /* 5 */ CCLASS_DIGIT,
94 /* 6 */ CCLASS_DIGIT, /* 7 */ CCLASS_DIGIT, /* 8 */ CCLASS_DIGIT,
95 /* 9 */ CCLASS_DIGIT, /* : */ CCLASS_OTHER, /* ; */ CCLASS_OTHER,
96 /* < */ CCLASS_OTHER, /* = */ CCLASS_OTHER, /* > */ CCLASS_OTHER,
97 /* ? */ CCLASS_OTHER, /* @ */ CCLASS_OTHER, /* A */ CCLASS_VOWEL,
98 /* B */ CCLASS_B, /* C */ CCLASS_C, /* D */ CCLASS_D,
99 /* E */ CCLASS_VOWEL, /* F */ CCLASS_B, /* G */ CCLASS_C,
100 /* H */ CCLASS_SILENT, /* I */ CCLASS_VOWEL, /* J */ CCLASS_C,
101 /* K */ CCLASS_C, /* L */ CCLASS_L, /* M */ CCLASS_M,
102 /* N */ CCLASS_M, /* O */ CCLASS_VOWEL, /* P */ CCLASS_B,
103 /* Q */ CCLASS_C, /* R */ CCLASS_R, /* S */ CCLASS_C,
104 /* T */ CCLASS_D, /* U */ CCLASS_VOWEL, /* V */ CCLASS_B,
105 /* W */ CCLASS_B, /* X */ CCLASS_C, /* Y */ CCLASS_VOWEL,
106 /* Z */ CCLASS_C, /* [ */ CCLASS_OTHER, /* \ */ CCLASS_OTHER,
107 /* ] */ CCLASS_OTHER, /* ^ */ CCLASS_OTHER, /* _ */ CCLASS_OTHER,
108 /* ` */ CCLASS_OTHER, /* a */ CCLASS_VOWEL, /* b */ CCLASS_B,
109 /* c */ CCLASS_C, /* d */ CCLASS_D, /* e */ CCLASS_VOWEL,
110 /* f */ CCLASS_B, /* g */ CCLASS_C, /* h */ CCLASS_SILENT,
111 /* i */ CCLASS_VOWEL, /* j */ CCLASS_C, /* k */ CCLASS_C,
112 /* l */ CCLASS_L, /* m */ CCLASS_M, /* n */ CCLASS_M,
113 /* o */ CCLASS_VOWEL, /* p */ CCLASS_B, /* q */ CCLASS_C,
114 /* r */ CCLASS_R, /* s */ CCLASS_C, /* t */ CCLASS_D,
115 /* u */ CCLASS_VOWEL, /* v */ CCLASS_B, /* w */ CCLASS_B,
116 /* x */ CCLASS_C, /* y */ CCLASS_VOWEL, /* z */ CCLASS_C,
117 /* { */ CCLASS_OTHER, /* | */ CCLASS_OTHER, /* } */ CCLASS_OTHER,
118 /* ~ */ CCLASS_OTHER, /* */ CCLASS_OTHER,
119};
120/*
121** This tables gives the character class for ASCII characters that form the
122** initial character of a word. The only difference from midClass is with
123** the letters H, W, and Y.
124*/
125static const unsigned char initClass[] = {
126 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
127 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
128 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
129 /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
130 /* */ CCLASS_SPACE, /* */ CCLASS_SPACE, /* */ CCLASS_OTHER,
131 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
132 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
133 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
134 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
135 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER,
136 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_SPACE,
137 /* ! */ CCLASS_OTHER, /* " */ CCLASS_OTHER, /* # */ CCLASS_OTHER,
138 /* $ */ CCLASS_OTHER, /* % */ CCLASS_OTHER, /* & */ CCLASS_OTHER,
139 /* ' */ CCLASS_OTHER, /* ( */ CCLASS_OTHER, /* ) */ CCLASS_OTHER,
140 /* * */ CCLASS_OTHER, /* + */ CCLASS_OTHER, /* , */ CCLASS_OTHER,
141 /* - */ CCLASS_OTHER, /* . */ CCLASS_OTHER, /* / */ CCLASS_OTHER,
142 /* 0 */ CCLASS_DIGIT, /* 1 */ CCLASS_DIGIT, /* 2 */ CCLASS_DIGIT,
143 /* 3 */ CCLASS_DIGIT, /* 4 */ CCLASS_DIGIT, /* 5 */ CCLASS_DIGIT,
144 /* 6 */ CCLASS_DIGIT, /* 7 */ CCLASS_DIGIT, /* 8 */ CCLASS_DIGIT,
145 /* 9 */ CCLASS_DIGIT, /* : */ CCLASS_OTHER, /* ; */ CCLASS_OTHER,
146 /* < */ CCLASS_OTHER, /* = */ CCLASS_OTHER, /* > */ CCLASS_OTHER,
147 /* ? */ CCLASS_OTHER, /* @ */ CCLASS_OTHER, /* A */ CCLASS_VOWEL,
148 /* B */ CCLASS_B, /* C */ CCLASS_C, /* D */ CCLASS_D,
149 /* E */ CCLASS_VOWEL, /* F */ CCLASS_B, /* G */ CCLASS_C,
150 /* H */ CCLASS_SILENT, /* I */ CCLASS_VOWEL, /* J */ CCLASS_C,
151 /* K */ CCLASS_C, /* L */ CCLASS_L, /* M */ CCLASS_M,
152 /* N */ CCLASS_M, /* O */ CCLASS_VOWEL, /* P */ CCLASS_B,
153 /* Q */ CCLASS_C, /* R */ CCLASS_R, /* S */ CCLASS_C,
154 /* T */ CCLASS_D, /* U */ CCLASS_VOWEL, /* V */ CCLASS_B,
155 /* W */ CCLASS_B, /* X */ CCLASS_C, /* Y */ CCLASS_Y,
156 /* Z */ CCLASS_C, /* [ */ CCLASS_OTHER, /* \ */ CCLASS_OTHER,
157 /* ] */ CCLASS_OTHER, /* ^ */ CCLASS_OTHER, /* _ */ CCLASS_OTHER,
158 /* ` */ CCLASS_OTHER, /* a */ CCLASS_VOWEL, /* b */ CCLASS_B,
159 /* c */ CCLASS_C, /* d */ CCLASS_D, /* e */ CCLASS_VOWEL,
160 /* f */ CCLASS_B, /* g */ CCLASS_C, /* h */ CCLASS_SILENT,
161 /* i */ CCLASS_VOWEL, /* j */ CCLASS_C, /* k */ CCLASS_C,
162 /* l */ CCLASS_L, /* m */ CCLASS_M, /* n */ CCLASS_M,
163 /* o */ CCLASS_VOWEL, /* p */ CCLASS_B, /* q */ CCLASS_C,
164 /* r */ CCLASS_R, /* s */ CCLASS_C, /* t */ CCLASS_D,
165 /* u */ CCLASS_VOWEL, /* v */ CCLASS_B, /* w */ CCLASS_B,
166 /* x */ CCLASS_C, /* y */ CCLASS_Y, /* z */ CCLASS_C,
167 /* { */ CCLASS_OTHER, /* | */ CCLASS_OTHER, /* } */ CCLASS_OTHER,
168 /* ~ */ CCLASS_OTHER, /* */ CCLASS_OTHER,
169};
170
171/*
172** Mapping from the character class number (0-13) to a symbol for each
173** character class. Note that initClass[] can be used to map the class
174** symbol back into the class number.
175*/
176static const unsigned char className[] = ".ABCDHLRMY9 ?";
177
178/*
179** Generate a "phonetic hash" from a string of ASCII characters
180** in zIn[0..nIn-1].
181**
182** * Map characters by character class as defined above.
183** * Omit double-letters
184** * Omit vowels beside R and L
185** * Omit T when followed by CH
186** * Omit W when followed by R
187** * Omit D when followed by J or G
188** * Omit K in KN or G in GN at the beginning of a word
189**
190** Space to hold the result is obtained from sqlite3_malloc()
191**
192** Return NULL if memory allocation fails.
193*/
194static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){
195 unsigned char *zOut = sqlite3_malloc64( nIn + 1 );
196 int i;
197 int nOut = 0;
198 char cPrev = 0x77;
199 char cPrevX = 0x77;
200 const unsigned char *aClass = initClass;
201
202 if( zOut==0 ) return 0;
203 if( nIn>2 ){
204 switch( zIn[0] ){
205 case 'g':
206 case 'k': {
207 if( zIn[1]=='n' ){ zIn++; nIn--; }
208 break;
209 }
210 }
211 }
212 for(i=0; i<nIn; i++){
213 unsigned char c = zIn[i];
214 if( i+1<nIn ){
215 if( c=='w' && zIn[i+1]=='r' ) continue;
216 if( c=='d' && (zIn[i+1]=='j' || zIn[i+1]=='g') ) continue;
217 if( i+2<nIn ){
218 if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue;
219 }
220 }
221 c = aClass[c&0x7f];
222 if( c==CCLASS_SPACE ) continue;
223 if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue;
224 aClass = midClass;
225 if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ){
226 continue; /* No vowels beside L or R */
227 }
228 if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ){
229 nOut--; /* No vowels beside L or R */
230 }
231 cPrev = c;
232 if( c==CCLASS_SILENT ) continue;
233 cPrevX = c;
234 c = className[c];
235 assert( nOut>=0 );
236 if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c;
237 }
238 zOut[nOut] = 0;
239 return zOut;
240}
241
242/*
243** This is an SQL function wrapper around phoneticHash(). See
244** the description of phoneticHash() for additional information.
245*/
246static void phoneticHashSqlFunc(
247 sqlite3_context *context,
248 int argc,
249 sqlite3_value **argv
250){
251 const unsigned char *zIn;
252 unsigned char *zOut;
253
254 zIn = sqlite3_value_text(argv[0]);
255 if( zIn==0 ) return;
256 zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0]));
257 if( zOut==0 ){
258 sqlite3_result_error_nomem(context);
259 }else{
260 sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
261 }
262}
263
264/*
265** Return the character class number for a character given its
266** context.
267*/
268static char characterClass(char cPrev, char c){
269 return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f];
270}
271
272/*
273** Return the cost of inserting or deleting character c immediately
274** following character cPrev. If cPrev==0, that means c is the first
275** character of the word.
276*/
277static int insertOrDeleteCost(char cPrev, char c, char cNext){
278 char classC = characterClass(cPrev, c);
279 char classCprev;
280
281 if( classC==CCLASS_SILENT ){
282 /* Insert or delete "silent" characters such as H or W */
283 return 1;
284 }
285 if( cPrev==c ){
286 /* Repeated characters, or miss a repeat */
287 return 10;
288 }
289 if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ){
290 return 20; /* Insert a vowel before or after 'r' */
291 }
292 classCprev = characterClass(cPrev, cPrev);
293 if( classC==classCprev ){
294 if( classC==CCLASS_VOWEL ){
295 /* Remove or add a new vowel to a vowel cluster */
296 return 15;
297 }else{
298 /* Remove or add a consonant not in the same class */
299 return 50;
300 }
301 }
302
303 /* any other character insertion or deletion */
304 return 100;
305}
306
307/*
308** Divide the insertion cost by this factor when appending to the
309** end of the word.
310*/
311#define FINAL_INS_COST_DIV 4
312
313/*
314** Return the cost of substituting cTo in place of cFrom assuming
315** the previous character is cPrev. If cPrev==0 then cTo is the first
316** character of the word.
317*/
318static int substituteCost(char cPrev, char cFrom, char cTo){
319 char classFrom, classTo;
320 if( cFrom==cTo ){
321 /* Exact match */
322 return 0;
323 }
324 if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ){
325 /* differ only in case */
326 return 0;
327 }
328 classFrom = characterClass(cPrev, cFrom);
329 classTo = characterClass(cPrev, cTo);
330 if( classFrom==classTo ){
331 /* Same character class */
332 return 40;
333 }
334 if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y
335 && classTo>=CCLASS_B && classTo<=CCLASS_Y ){
336 /* Convert from one consonant to another, but in a different class */
337 return 75;
338 }
339 /* Any other subsitution */
340 return 100;
341}
342
343/*
344** Given two strings zA and zB which are pure ASCII, return the cost
345** of transforming zA into zB. If zA ends with '*' assume that it is
346** a prefix of zB and give only minimal penalty for extra characters
347** on the end of zB.
348**
349** Smaller numbers mean a closer match.
350**
351** Negative values indicate an error:
352** -1 One of the inputs is NULL
353** -2 Non-ASCII characters on input
354** -3 Unable to allocate memory
355**
356** If pnMatch is not NULL, then *pnMatch is set to the number of bytes
357** of zB that matched the pattern in zA. If zA does not end with a '*',
358** then this value is always the number of bytes in zB (i.e. strlen(zB)).
359** If zA does end in a '*', then it is the number of bytes in the prefix
360** of zB that was deemed to match zA.
361*/
362static int editdist1(const char *zA, const char *zB, int *pnMatch){
363 int nA, nB; /* Number of characters in zA[] and zB[] */
364 int xA, xB; /* Loop counters for zA[] and zB[] */
365 char cA = 0, cB; /* Current character of zA and zB */
366 char cAprev, cBprev; /* Previous character of zA and zB */
367 char cAnext, cBnext; /* Next character in zA and zB */
368 int d; /* North-west cost value */
369 int dc = 0; /* North-west character value */
370 int res; /* Final result */
371 int *m; /* The cost matrix */
372 char *cx; /* Corresponding character values */
373 int *toFree = 0; /* Malloced space */
374 int nMatch = 0;
375 int mStack[60+15]; /* Stack space to use if not too much is needed */
376
377 /* Early out if either input is NULL */
378 if( zA==0 || zB==0 ) return -1;
379
380 /* Skip any common prefix */
381 while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; }
382 if( pnMatch ) *pnMatch = nMatch;
383 if( zA[0]==0 && zB[0]==0 ) return 0;
384
385#if 0
386 printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' ');
387#endif
388
389 /* Verify input strings and measure their lengths */
390 for(nA=0; zA[nA]; nA++){
391 if( zA[nA]&0x80 ) return -2;
392 }
393 for(nB=0; zB[nB]; nB++){
394 if( zB[nB]&0x80 ) return -2;
395 }
396
397 /* Special processing if either string is empty */
398 if( nA==0 ){
399 cBprev = (char)dc;
400 for(xB=res=0; (cB = zB[xB])!=0; xB++){
401 res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV;
402 cBprev = cB;
403 }
404 return res;
405 }
406 if( nB==0 ){
407 cAprev = (char)dc;
408 for(xA=res=0; (cA = zA[xA])!=0; xA++){
409 res += insertOrDeleteCost(cAprev, cA, zA[xA+1]);
410 cAprev = cA;
411 }
412 return res;
413 }
414
415 /* A is a prefix of B */
416 if( zA[0]=='*' && zA[1]==0 ) return 0;
417
418 /* Allocate and initialize the Wagner matrix */
419 if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){
420 m = mStack;
421 }else{
422 m = toFree = sqlite3_malloc64( (nB+1)*5*sizeof(m[0])/4 );
423 if( m==0 ) return -3;
424 }
425 cx = (char*)&m[nB+1];
426
427 /* Compute the Wagner edit distance */
428 m[0] = 0;
429 cx[0] = (char)dc;
430 cBprev = (char)dc;
431 for(xB=1; xB<=nB; xB++){
432 cBnext = zB[xB];
433 cB = zB[xB-1];
434 cx[xB] = cB;
435 m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext);
436 cBprev = cB;
437 }
438 cAprev = (char)dc;
439 for(xA=1; xA<=nA; xA++){
440 int lastA = (xA==nA);
441 cA = zA[xA-1];
442 cAnext = zA[xA];
443 if( cA=='*' && lastA ) break;
444 d = m[0];
445 dc = cx[0];
446 m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext);
447 cBprev = 0;
448 for(xB=1; xB<=nB; xB++){
449 int totalCost, insCost, delCost, subCost, ncx;
450 cB = zB[xB-1];
451 cBnext = zB[xB];
452
453 /* Cost to insert cB */
454 insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext);
455 if( lastA ) insCost /= FINAL_INS_COST_DIV;
456
457 /* Cost to delete cA */
458 delCost = insertOrDeleteCost(cx[xB], cA, cBnext);
459
460 /* Cost to substitute cA->cB */
461 subCost = substituteCost(cx[xB-1], cA, cB);
462
463 /* Best cost */
464 totalCost = insCost + m[xB-1];
465 ncx = cB;
466 if( (delCost + m[xB])<totalCost ){
467 totalCost = delCost + m[xB];
468 ncx = cA;
469 }
470 if( (subCost + d)<totalCost ){
471 totalCost = subCost + d;
472 }
473
474#if 0
475 printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c"
476 " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n",
477 xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB,
478 insCost, delCost, subCost, totalCost, ncx?ncx:' ');
479#endif
480
481 /* Update the matrix */
482 d = m[xB];
483 dc = cx[xB];
484 m[xB] = totalCost;
485 cx[xB] = (char)ncx;
486 cBprev = cB;
487 }
488 cAprev = cA;
489 }
490
491 /* Free the wagner matrix and return the result */
492 if( cA=='*' ){
493 res = m[1];
494 for(xB=1; xB<=nB; xB++){
495 if( m[xB]<res ){
496 res = m[xB];
497 if( pnMatch ) *pnMatch = xB+nMatch;
498 }
499 }
500 }else{
501 res = m[nB];
502 /* In the current implementation, pnMatch is always NULL if zA does
503 ** not end in "*" */
504 assert( pnMatch==0 );
505 }
506 sqlite3_free(toFree);
507 return res;
508}
509
510/*
511** Function: editdist(A,B)
512**
513** Return the cost of transforming string A into string B. Both strings
514** must be pure ASCII text. If A ends with '*' then it is assumed to be
515** a prefix of B and extra characters on the end of B have minimal additional
516** cost.
517*/
518static void editdistSqlFunc(
519 sqlite3_context *context,
520 int argc,
521 sqlite3_value **argv
522){
523 int res = editdist1(
524 (const char*)sqlite3_value_text(argv[0]),
525 (const char*)sqlite3_value_text(argv[1]),
526 0);
527 if( res<0 ){
528 if( res==(-3) ){
529 sqlite3_result_error_nomem(context);
530 }else if( res==(-2) ){
531 sqlite3_result_error(context, "non-ASCII input to editdist()", -1);
532 }else{
533 sqlite3_result_error(context, "NULL input to editdist()", -1);
534 }
535 }else{
536 sqlite3_result_int(context, res);
537 }
538}
539
540/* End of the fixed-cost edit distance implementation
541******************************************************************************
542*****************************************************************************
543** Begin: Configurable cost unicode edit distance routines
544*/
545/* Forward declaration of structures */
546typedef struct EditDist3Cost EditDist3Cost;
547typedef struct EditDist3Config EditDist3Config;
548typedef struct EditDist3Point EditDist3Point;
549typedef struct EditDist3From EditDist3From;
550typedef struct EditDist3FromString EditDist3FromString;
551typedef struct EditDist3To EditDist3To;
552typedef struct EditDist3ToString EditDist3ToString;
553typedef struct EditDist3Lang EditDist3Lang;
554
555
556/*
557** An entry in the edit cost table
558*/
559struct EditDist3Cost {
560 EditDist3Cost *pNext; /* Next cost element */
561 u8 nFrom; /* Number of bytes in aFrom */
562 u8 nTo; /* Number of bytes in aTo */
563 u16 iCost; /* Cost of this transformation */
564 char a[4] ; /* FROM string followed by TO string */
565 /* Additional TO and FROM string bytes appended as necessary */
566};
567
568/*
569** Edit costs for a particular language ID
570*/
571struct EditDist3Lang {
572 int iLang; /* Language ID */
573 int iInsCost; /* Default insertion cost */
574 int iDelCost; /* Default deletion cost */
575 int iSubCost; /* Default substitution cost */
576 EditDist3Cost *pCost; /* Costs */
577};
578
579
580/*
581** The default EditDist3Lang object, with default costs.
582*/
583static const EditDist3Lang editDist3Lang = { 0, 100, 100, 150, 0 };
584
585/*
586** Complete configuration
587*/
588struct EditDist3Config {
589 int nLang; /* Number of language IDs. Size of a[] */
590 EditDist3Lang *a; /* One for each distinct language ID */
591};
592
593/*
594** Extra information about each character in the FROM string.
595*/
596struct EditDist3From {
597 int nSubst; /* Number of substitution cost entries */
598 int nDel; /* Number of deletion cost entries */
599 int nByte; /* Number of bytes in this character */
600 EditDist3Cost **apSubst; /* Array of substitution costs for this element */
601 EditDist3Cost **apDel; /* Array of deletion cost entries */
602};
603
604/*
605** A precompiled FROM string.
606*
607** In the common case we expect the FROM string to be reused multiple times.
608** In other words, the common case will be to measure the edit distance
609** from a single origin string to multiple target strings.
610*/
611struct EditDist3FromString {
612 char *z; /* The complete text of the FROM string */
613 int n; /* Number of characters in the FROM string */
614 int isPrefix; /* True if ends with '*' character */
615 EditDist3From *a; /* Extra info about each char of the FROM string */
616};
617
618/*
619** Extra information about each character in the TO string.
620*/
621struct EditDist3To {
622 int nIns; /* Number of insertion cost entries */
623 int nByte; /* Number of bytes in this character */
624 EditDist3Cost **apIns; /* Array of deletion cost entries */
625};
626
627/*
628** A precompiled FROM string
629*/
630struct EditDist3ToString {
631 char *z; /* The complete text of the TO string */
632 int n; /* Number of characters in the TO string */
633 EditDist3To *a; /* Extra info about each char of the TO string */
634};
635
636/*
637** Clear or delete an instance of the object that records all edit-distance
638** weights.
639*/
640static void editDist3ConfigClear(EditDist3Config *p){
641 int i;
642 if( p==0 ) return;
643 for(i=0; i<p->nLang; i++){
644 EditDist3Cost *pCost, *pNext;
645 pCost = p->a[i].pCost;
646 while( pCost ){
647 pNext = pCost->pNext;
648 sqlite3_free(pCost);
649 pCost = pNext;
650 }
651 }
652 sqlite3_free(p->a);
653 memset(p, 0, sizeof(*p));
654}
655static void editDist3ConfigDelete(void *pIn){
656 EditDist3Config *p = (EditDist3Config*)pIn;
657 editDist3ConfigClear(p);
658 sqlite3_free(p);
659}
660
661/* Compare the FROM values of two EditDist3Cost objects, for sorting.
662** Return negative, zero, or positive if the A is less than, equal to,
663** or greater than B.
664*/
665static int editDist3CostCompare(EditDist3Cost *pA, EditDist3Cost *pB){
666 int n = pA->nFrom;
667 int rc;
668 if( n>pB->nFrom ) n = pB->nFrom;
669 rc = strncmp(pA->a, pB->a, n);
670 if( rc==0 ) rc = pA->nFrom - pB->nFrom;
671 return rc;
672}
673
674/*
675** Merge together two sorted lists of EditDist3Cost objects, in order
676** of increasing FROM.
677*/
678static EditDist3Cost *editDist3CostMerge(
679 EditDist3Cost *pA,
680 EditDist3Cost *pB
681){
682 EditDist3Cost *pHead = 0;
683 EditDist3Cost **ppTail = &pHead;
684 EditDist3Cost *p;
685 while( pA && pB ){
686 if( editDist3CostCompare(pA,pB)<=0 ){
687 p = pA;
688 pA = pA->pNext;
689 }else{
690 p = pB;
691 pB = pB->pNext;
692 }
693 *ppTail = p;
694 ppTail = &p->pNext;
695 }
696 if( pA ){
697 *ppTail = pA;
698 }else{
699 *ppTail = pB;
700 }
701 return pHead;
702}
703
704/*
705** Sort a list of EditDist3Cost objects into order of increasing FROM
706*/
707static EditDist3Cost *editDist3CostSort(EditDist3Cost *pList){
708 EditDist3Cost *ap[60], *p;
709 int i;
710 int mx = 0;
711 ap[0] = 0;
712 ap[1] = 0;
713 while( pList ){
714 p = pList;
715 pList = p->pNext;
716 p->pNext = 0;
717 for(i=0; ap[i]; i++){
718 p = editDist3CostMerge(ap[i],p);
719 ap[i] = 0;
720 }
721 ap[i] = p;
722 if( i>mx ){
723 mx = i;
724 ap[i+1] = 0;
725 }
726 }
727 p = 0;
728 for(i=0; i<=mx; i++){
729 if( ap[i] ) p = editDist3CostMerge(p,ap[i]);
730 }
731 return p;
732}
733
734/*
735** Load all edit-distance weights from a table.
736*/
737static int editDist3ConfigLoad(
738 EditDist3Config *p, /* The edit distance configuration to load */
739 sqlite3 *db, /* Load from this database */
740 const char *zTable /* Name of the table from which to load */
741){
742 sqlite3_stmt *pStmt;
743 int rc, rc2;
744 char *zSql;
745 int iLangPrev = -9999;
746 EditDist3Lang *pLang = 0;
747
748 zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost"
749 " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable);
750 if( zSql==0 ) return SQLITE_NOMEM;
751 rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
752 sqlite3_free(zSql);
753 if( rc ) return rc;
754 editDist3ConfigClear(p);
755 while( sqlite3_step(pStmt)==SQLITE_ROW ){
756 int iLang = sqlite3_column_int(pStmt, 0);
757 const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1);
758 int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0;
759 const char *zTo = (const char*)sqlite3_column_text(pStmt, 2);
760 int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0;
761 int iCost = sqlite3_column_int(pStmt, 3);
762
763 assert( zFrom!=0 || nFrom==0 );
764 assert( zTo!=0 || nTo==0 );
765 if( nFrom>100 || nTo>100 ) continue;
766 if( iCost<0 ) continue;
767 if( iCost>=10000 ) continue; /* Costs above 10K are considered infinite */
768 if( pLang==0 || iLang!=iLangPrev ){
769 EditDist3Lang *pNew;
770 pNew = sqlite3_realloc64(p->a, (p->nLang+1)*sizeof(p->a[0]));
771 if( pNew==0 ){ rc = SQLITE_NOMEM; break; }
772 p->a = pNew;
773 pLang = &p->a[p->nLang];
774 p->nLang++;
775 pLang->iLang = iLang;
776 pLang->iInsCost = 100;
777 pLang->iDelCost = 100;
778 pLang->iSubCost = 150;
779 pLang->pCost = 0;
780 iLangPrev = iLang;
781 }
782 if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){
783 pLang->iDelCost = iCost;
784 }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){
785 pLang->iInsCost = iCost;
786 }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){
787 pLang->iSubCost = iCost;
788 }else{
789 EditDist3Cost *pCost;
790 int nExtra = nFrom + nTo - 4;
791 if( nExtra<0 ) nExtra = 0;
792 pCost = sqlite3_malloc64( sizeof(*pCost) + nExtra );
793 if( pCost==0 ){ rc = SQLITE_NOMEM; break; }
794 pCost->nFrom = (u8)nFrom;
795 pCost->nTo = (u8)nTo;
796 pCost->iCost = (u16)iCost;
797 memcpy(pCost->a, zFrom, nFrom);
798 memcpy(pCost->a + nFrom, zTo, nTo);
799 pCost->pNext = pLang->pCost;
800 pLang->pCost = pCost;
801 }
802 }
803 rc2 = sqlite3_finalize(pStmt);
804 if( rc==SQLITE_OK ) rc = rc2;
805 if( rc==SQLITE_OK ){
806 int iLang;
807 for(iLang=0; iLang<p->nLang; iLang++){
808 p->a[iLang].pCost = editDist3CostSort(p->a[iLang].pCost);
809 }
810 }
811 return rc;
812}
813
814/*
815** Return the length (in bytes) of a utf-8 character. Or return a maximum
816** of N.
817*/
818static int utf8Len(unsigned char c, int N){
819 int len = 1;
820 if( c>0x7f ){
821 if( (c&0xe0)==0xc0 ){
822 len = 2;
823 }else if( (c&0xf0)==0xe0 ){
824 len = 3;
825 }else{
826 len = 4;
827 }
828 }
829 if( len>N ) len = N;
830 return len;
831}
832
833/*
834** Return TRUE (non-zero) if the To side of the given cost matches
835** the given string.
836*/
837static int matchTo(EditDist3Cost *p, const char *z, int n){
838 assert( n>0 );
839 if( p->a[p->nFrom]!=z[0] ) return 0;
840 if( p->nTo>n ) return 0;
841 if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0;
842 return 1;
843}
844
845/*
846** Return TRUE (non-zero) if the From side of the given cost matches
847** the given string.
848*/
849static int matchFrom(EditDist3Cost *p, const char *z, int n){
850 assert( p->nFrom<=n );
851 if( p->nFrom ){
852 if( p->a[0]!=z[0] ) return 0;
853 if( strncmp(p->a, z, p->nFrom)!=0 ) return 0;
854 }
855 return 1;
856}
857
858/*
859** Return TRUE (non-zero) of the next FROM character and the next TO
860** character are the same.
861*/
862static int matchFromTo(
863 EditDist3FromString *pStr, /* Left hand string */
864 int n1, /* Index of comparison character on the left */
865 const char *z2, /* Right-handl comparison character */
866 int n2 /* Bytes remaining in z2[] */
867){
868 int b1 = pStr->a[n1].nByte;
869 if( b1>n2 ) return 0;
870 assert( b1>0 );
871 if( pStr->z[n1]!=z2[0] ) return 0;
872 if( strncmp(pStr->z+n1, z2, b1)!=0 ) return 0;
873 return 1;
874}
875
876/*
877** Delete an EditDist3FromString objecct
878*/
879static void editDist3FromStringDelete(EditDist3FromString *p){
880 int i;
881 if( p ){
882 for(i=0; i<p->n; i++){
883 sqlite3_free(p->a[i].apDel);
884 sqlite3_free(p->a[i].apSubst);
885 }
886 sqlite3_free(p);
887 }
888}
889
890/*
891** Create a EditDist3FromString object.
892*/
893static EditDist3FromString *editDist3FromStringNew(
894 const EditDist3Lang *pLang,
895 const char *z,
896 int n
897){
898 EditDist3FromString *pStr;
899 EditDist3Cost *p;
900 int i;
901
902 if( z==0 ) return 0;
903 if( n<0 ) n = (int)strlen(z);
904 pStr = sqlite3_malloc64( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 );
905 if( pStr==0 ) return 0;
906 pStr->a = (EditDist3From*)&pStr[1];
907 memset(pStr->a, 0, sizeof(pStr->a[0])*n);
908 pStr->n = n;
909 pStr->z = (char*)&pStr->a[n];
910 memcpy(pStr->z, z, n+1);
911 if( n && z[n-1]=='*' ){
912 pStr->isPrefix = 1;
913 n--;
914 pStr->n--;
915 pStr->z[n] = 0;
916 }else{
917 pStr->isPrefix = 0;
918 }
919
920 for(i=0; i<n; i++){
921 EditDist3From *pFrom = &pStr->a[i];
922 memset(pFrom, 0, sizeof(*pFrom));
923 pFrom->nByte = utf8Len((unsigned char)z[i], n-i);
924 for(p=pLang->pCost; p; p=p->pNext){
925 EditDist3Cost **apNew;
926 if( i+p->nFrom>n ) continue;
927 if( matchFrom(p, z+i, n-i)==0 ) continue;
928 if( p->nTo==0 ){
929 apNew = sqlite3_realloc64(pFrom->apDel,
930 sizeof(*apNew)*(pFrom->nDel+1));
931 if( apNew==0 ) break;
932 pFrom->apDel = apNew;
933 apNew[pFrom->nDel++] = p;
934 }else{
935 apNew = sqlite3_realloc64(pFrom->apSubst,
936 sizeof(*apNew)*(pFrom->nSubst+1));
937 if( apNew==0 ) break;
938 pFrom->apSubst = apNew;
939 apNew[pFrom->nSubst++] = p;
940 }
941 }
942 if( p ){
943 editDist3FromStringDelete(pStr);
944 pStr = 0;
945 break;
946 }
947 }
948 return pStr;
949}
950
951/*
952** Update entry m[i] such that it is the minimum of its current value
953** and m[j]+iCost.
954*/
955static void updateCost(
956 unsigned int *m,
957 int i,
958 int j,
959 int iCost
960){
961 unsigned int b;
962 assert( iCost>=0 );
963 assert( iCost<10000 );
964 b = m[j] + iCost;
965 if( b<m[i] ) m[i] = b;
966}
967
968/*
969** How much stack space (int bytes) to use for Wagner matrix in
970** editDist3Core(). If more space than this is required, the entire
971** matrix is taken from the heap. To reduce the load on the memory
972** allocator, make this value as large as practical for the
973** architecture in use.
974*/
975#ifndef SQLITE_SPELLFIX_STACKALLOC_SZ
976# define SQLITE_SPELLFIX_STACKALLOC_SZ (1024)
977#endif
978
979/* Compute the edit distance between two strings.
980**
981** If an error occurs, return a negative number which is the error code.
982**
983** If pnMatch is not NULL, then *pnMatch is set to the number of characters
984** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does
985** not contain the pattern for a prefix-search, then this is always the number
986** of characters in z2. If pFrom does contain a prefix search pattern, then
987** it is the number of characters in the prefix of z2 that was deemed to
988** match pFrom.
989*/
990static int editDist3Core(
991 EditDist3FromString *pFrom, /* The FROM string */
992 const char *z2, /* The TO string */
993 int n2, /* Length of the TO string */
994 const EditDist3Lang *pLang, /* Edit weights for a particular language ID */
995 int *pnMatch /* OUT: Characters in matched prefix */
996){
997 int k, n;
998 int i1, b1;
999 int i2, b2;
1000 EditDist3FromString f = *pFrom;
1001 EditDist3To *a2;
1002 unsigned int *m;
1003 unsigned int *pToFree;
1004 int szRow;
1005 EditDist3Cost *p;
1006 int res;
1007 sqlite3_uint64 nByte;
1008 unsigned int stackSpace[SQLITE_SPELLFIX_STACKALLOC_SZ/sizeof(unsigned int)];
1009
1010 /* allocate the Wagner matrix and the aTo[] array for the TO string */
1011 n = (f.n+1)*(n2+1);
1012 n = (n+1)&~1;
1013 nByte = n*sizeof(m[0]) + sizeof(a2[0])*n2;
1014 if( nByte<=sizeof(stackSpace) ){
1015 m = stackSpace;
1016 pToFree = 0;
1017 }else{
1018 m = pToFree = sqlite3_malloc64( nByte );
1019 if( m==0 ) return -1; /* Out of memory */
1020 }
1021 a2 = (EditDist3To*)&m[n];
1022 memset(a2, 0, sizeof(a2[0])*n2);
1023
1024 /* Fill in the a1[] matrix for all characters of the TO string */
1025 for(i2=0; i2<n2; i2++){
1026 a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2);
1027 for(p=pLang->pCost; p; p=p->pNext){
1028 EditDist3Cost **apNew;
1029 if( p->nFrom>0 ) break;
1030 if( i2+p->nTo>n2 ) continue;
1031 if( p->a[0]>z2[i2] ) break;
1032 if( matchTo(p, z2+i2, n2-i2)==0 ) continue;
1033 a2[i2].nIns++;
1034 apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns);
1035 if( apNew==0 ){
1036 res = -1; /* Out of memory */
1037 goto editDist3Abort;
1038 }
1039 a2[i2].apIns = apNew;
1040 a2[i2].apIns[a2[i2].nIns-1] = p;
1041 }
1042 }
1043
1044 /* Prepare to compute the minimum edit distance */
1045 szRow = f.n+1;
1046 memset(m, 0x01, (n2+1)*szRow*sizeof(m[0]));
1047 m[0] = 0;
1048
1049 /* First fill in the top-row of the matrix with FROM deletion costs */
1050 for(i1=0; i1<f.n; i1 += b1){
1051 b1 = f.a[i1].nByte;
1052 updateCost(m, i1+b1, i1, pLang->iDelCost);
1053 for(k=0; k<f.a[i1].nDel; k++){
1054 p = f.a[i1].apDel[k];
1055 updateCost(m, i1+p->nFrom, i1, p->iCost);
1056 }
1057 }
1058
1059 /* Fill in all subsequent rows, top-to-bottom, left-to-right */
1060 for(i2=0; i2<n2; i2 += b2){
1061 int rx; /* Starting index for current row */
1062 int rxp; /* Starting index for previous row */
1063 b2 = a2[i2].nByte;
1064 rx = szRow*(i2+b2);
1065 rxp = szRow*i2;
1066 updateCost(m, rx, rxp, pLang->iInsCost);
1067 for(k=0; k<a2[i2].nIns; k++){
1068 p = a2[i2].apIns[k];
1069 updateCost(m, szRow*(i2+p->nTo), rxp, p->iCost);
1070 }
1071 for(i1=0; i1<f.n; i1+=b1){
1072 int cx; /* Index of current cell */
1073 int cxp; /* Index of cell immediately to the left */
1074 int cxd; /* Index of cell to the left and one row above */
1075 int cxu; /* Index of cell immediately above */
1076 b1 = f.a[i1].nByte;
1077 cxp = rx + i1;
1078 cx = cxp + b1;
1079 cxd = rxp + i1;
1080 cxu = cxd + b1;
1081 updateCost(m, cx, cxp, pLang->iDelCost);
1082 for(k=0; k<f.a[i1].nDel; k++){
1083 p = f.a[i1].apDel[k];
1084 updateCost(m, cxp+p->nFrom, cxp, p->iCost);
1085 }
1086 updateCost(m, cx, cxu, pLang->iInsCost);
1087 if( matchFromTo(&f, i1, z2+i2, n2-i2) ){
1088 updateCost(m, cx, cxd, 0);
1089 }
1090 updateCost(m, cx, cxd, pLang->iSubCost);
1091 for(k=0; k<f.a[i1].nSubst; k++){
1092 p = f.a[i1].apSubst[k];
1093 if( matchTo(p, z2+i2, n2-i2) ){
1094 updateCost(m, cxd+p->nFrom+szRow*p->nTo, cxd, p->iCost);
1095 }
1096 }
1097 }
1098 }
1099
1100#if 0 /* Enable for debugging */
1101 printf(" ^");
1102 for(i1=0; i1<f.n; i1++) printf(" %c-%2x", f.z[i1], f.z[i1]&0xff);
1103 printf("\n ^:");
1104 for(i1=0; i1<szRow; i1++){
1105 int v = m[i1];
1106 if( v>9999 ) printf(" ****");
1107 else printf(" %4d", v);
1108 }
1109 printf("\n");
1110 for(i2=0; i2<n2; i2++){
1111 printf("%c-%02x:", z2[i2], z2[i2]&0xff);
1112 for(i1=0; i1<szRow; i1++){
1113 int v = m[(i2+1)*szRow+i1];
1114 if( v>9999 ) printf(" ****");
1115 else printf(" %4d", v);
1116 }
1117 printf("\n");
1118 }
1119#endif
1120
1121 /* Free memory allocations and return the result */
1122 res = (int)m[szRow*(n2+1)-1];
1123 n = n2;
1124 if( f.isPrefix ){
1125 for(i2=1; i2<=n2; i2++){
1126 int b = m[szRow*i2-1];
1127 if( b<=res ){
1128 res = b;
1129 n = i2 - 1;
1130 }
1131 }
1132 }
1133 if( pnMatch ){
1134 int nExtra = 0;
1135 for(k=0; k<n; k++){
1136 if( (z2[k] & 0xc0)==0x80 ) nExtra++;
1137 }
1138 *pnMatch = n - nExtra;
1139 }
1140
1141editDist3Abort:
1142 for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns);
1143 sqlite3_free(pToFree);
1144 return res;
1145}
1146
1147/*
1148** Get an appropriate EditDist3Lang object.
1149*/
1150static const EditDist3Lang *editDist3FindLang(
1151 EditDist3Config *pConfig,
1152 int iLang
1153){
1154 int i;
1155 for(i=0; i<pConfig->nLang; i++){
1156 if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i];
1157 }
1158 return &editDist3Lang;
1159}
1160
1161/*
1162** Function: editdist3(A,B,iLang)
1163** editdist3(tablename)
1164**
1165** Return the cost of transforming string A into string B using edit
1166** weights for iLang.
1167**
1168** The second form loads edit weights into memory from a table.
1169*/
1170static void editDist3SqlFunc(
1171 sqlite3_context *context,
1172 int argc,
1173 sqlite3_value **argv
1174){
1175 EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context);
1176 sqlite3 *db = sqlite3_context_db_handle(context);
1177 int rc;
1178 if( argc==1 ){
1179 const char *zTable = (const char*)sqlite3_value_text(argv[0]);
1180 rc = editDist3ConfigLoad(pConfig, db, zTable);
1181 if( rc ) sqlite3_result_error_code(context, rc);
1182 }else{
1183 const char *zA = (const char*)sqlite3_value_text(argv[0]);
1184 const char *zB = (const char*)sqlite3_value_text(argv[1]);
1185 int nA = sqlite3_value_bytes(argv[0]);
1186 int nB = sqlite3_value_bytes(argv[1]);
1187 int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0;
1188 const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang);
1189 EditDist3FromString *pFrom;
1190 int dist;
1191
1192 pFrom = editDist3FromStringNew(pLang, zA, nA);
1193 if( pFrom==0 ){
1194 sqlite3_result_error_nomem(context);
1195 return;
1196 }
1197 dist = editDist3Core(pFrom, zB, nB, pLang, 0);
1198 editDist3FromStringDelete(pFrom);
1199 if( dist==(-1) ){
1200 sqlite3_result_error_nomem(context);
1201 }else{
1202 sqlite3_result_int(context, dist);
1203 }
1204 }
1205}
1206
1207/*
1208** Register the editDist3 function with SQLite
1209*/
1210static int editDist3Install(sqlite3 *db){
1211 int rc;
1212 EditDist3Config *pConfig = sqlite3_malloc64( sizeof(*pConfig) );
1213 if( pConfig==0 ) return SQLITE_NOMEM;
1214 memset(pConfig, 0, sizeof(*pConfig));
1215 rc = sqlite3_create_function_v2(db, "editdist3",
1216 2, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1217 editDist3SqlFunc, 0, 0, 0);
1218 if( rc==SQLITE_OK ){
1219 rc = sqlite3_create_function_v2(db, "editdist3",
1220 3, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1221 editDist3SqlFunc, 0, 0, 0);
1222 }
1223 if( rc==SQLITE_OK ){
1224 rc = sqlite3_create_function_v2(db, "editdist3",
1225 1, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1226 editDist3SqlFunc, 0, 0, editDist3ConfigDelete);
1227 }else{
1228 sqlite3_free(pConfig);
1229 }
1230 return rc;
1231}
1232/* End configurable cost unicode edit distance routines
1233******************************************************************************
1234******************************************************************************
1235** Begin transliterate unicode-to-ascii implementation
1236*/
1237
1238#if !SQLITE_AMALGAMATION
1239/*
1240** This lookup table is used to help decode the first byte of
1241** a multi-byte UTF8 character.
1242*/
1243static const unsigned char sqlite3Utf8Trans1[] = {
1244 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1245 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1246 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1247 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1248 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1249 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1250 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1251 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
1252};
1253#endif
1254
1255/*
1256** Return the value of the first UTF-8 character in the string.
1257*/
1258static int utf8Read(const unsigned char *z, int n, int *pSize){
1259 int c, i;
1260
1261 /* All callers to this routine (in the current implementation)
1262 ** always have n>0. */
1263 if( NEVER(n==0) ){
1264 c = i = 0;
1265 }else{
1266 c = z[0];
1267 i = 1;
1268 if( c>=0xc0 ){
1269 c = sqlite3Utf8Trans1[c-0xc0];
1270 while( i<n && (z[i] & 0xc0)==0x80 ){
1271 c = (c<<6) + (0x3f & z[i++]);
1272 }
1273 }
1274 }
1275 *pSize = i;
1276 return c;
1277}
1278
1279/*
1280** Return the number of characters in the utf-8 string in the nIn byte
1281** buffer pointed to by zIn.
1282*/
1283static int utf8Charlen(const char *zIn, int nIn){
1284 int i;
1285 int nChar = 0;
1286 for(i=0; i<nIn; nChar++){
1287 int sz;
1288 utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1289 i += sz;
1290 }
1291 return nChar;
1292}
1293
1294typedef struct Transliteration Transliteration;
1295struct Transliteration {
1296 unsigned short int cFrom;
1297 unsigned char cTo0, cTo1, cTo2, cTo3;
1298#ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS
1299 unsigned char cTo4;
1300#endif
1301};
1302
1303/*
1304** Table of translations from unicode characters into ASCII.
1305*/
1306static const Transliteration translit[] = {
1307 { 0x00A0, 0x20, 0x00, 0x00, 0x00 }, /* to */
1308 { 0x00B5, 0x75, 0x00, 0x00, 0x00 }, /* µ to u */
1309 { 0x00C0, 0x41, 0x00, 0x00, 0x00 }, /* À to A */
1310 { 0x00C1, 0x41, 0x00, 0x00, 0x00 }, /* Á to A */
1311 { 0x00C2, 0x41, 0x00, 0x00, 0x00 }, /* Â to A */
1312 { 0x00C3, 0x41, 0x00, 0x00, 0x00 }, /* Ã to A */
1313 { 0x00C4, 0x41, 0x65, 0x00, 0x00 }, /* Ä to Ae */
1314 { 0x00C5, 0x41, 0x61, 0x00, 0x00 }, /* Å to Aa */
1315 { 0x00C6, 0x41, 0x45, 0x00, 0x00 }, /* Æ to AE */
1316 { 0x00C7, 0x43, 0x00, 0x00, 0x00 }, /* Ç to C */
1317 { 0x00C8, 0x45, 0x00, 0x00, 0x00 }, /* È to E */
1318 { 0x00C9, 0x45, 0x00, 0x00, 0x00 }, /* É to E */
1319 { 0x00CA, 0x45, 0x00, 0x00, 0x00 }, /* Ê to E */
1320 { 0x00CB, 0x45, 0x00, 0x00, 0x00 }, /* Ë to E */
1321 { 0x00CC, 0x49, 0x00, 0x00, 0x00 }, /* Ì to I */
1322 { 0x00CD, 0x49, 0x00, 0x00, 0x00 }, /* Í to I */
1323 { 0x00CE, 0x49, 0x00, 0x00, 0x00 }, /* Î to I */
1324 { 0x00CF, 0x49, 0x00, 0x00, 0x00 }, /* Ï to I */
1325 { 0x00D0, 0x44, 0x00, 0x00, 0x00 }, /* Ð to D */
1326 { 0x00D1, 0x4E, 0x00, 0x00, 0x00 }, /* Ñ to N */
1327 { 0x00D2, 0x4F, 0x00, 0x00, 0x00 }, /* Ò to O */
1328 { 0x00D3, 0x4F, 0x00, 0x00, 0x00 }, /* Ó to O */
1329 { 0x00D4, 0x4F, 0x00, 0x00, 0x00 }, /* Ô to O */
1330 { 0x00D5, 0x4F, 0x00, 0x00, 0x00 }, /* Õ to O */
1331 { 0x00D6, 0x4F, 0x65, 0x00, 0x00 }, /* Ö to Oe */
1332 { 0x00D7, 0x78, 0x00, 0x00, 0x00 }, /* × to x */
1333 { 0x00D8, 0x4F, 0x00, 0x00, 0x00 }, /* Ø to O */
1334 { 0x00D9, 0x55, 0x00, 0x00, 0x00 }, /* Ù to U */
1335 { 0x00DA, 0x55, 0x00, 0x00, 0x00 }, /* Ú to U */
1336 { 0x00DB, 0x55, 0x00, 0x00, 0x00 }, /* Û to U */
1337 { 0x00DC, 0x55, 0x65, 0x00, 0x00 }, /* Ü to Ue */
1338 { 0x00DD, 0x59, 0x00, 0x00, 0x00 }, /* Ý to Y */
1339 { 0x00DE, 0x54, 0x68, 0x00, 0x00 }, /* Þ to Th */
1340 { 0x00DF, 0x73, 0x73, 0x00, 0x00 }, /* ß to ss */
1341 { 0x00E0, 0x61, 0x00, 0x00, 0x00 }, /* à to a */
1342 { 0x00E1, 0x61, 0x00, 0x00, 0x00 }, /* á to a */
1343 { 0x00E2, 0x61, 0x00, 0x00, 0x00 }, /* â to a */
1344 { 0x00E3, 0x61, 0x00, 0x00, 0x00 }, /* ã to a */
1345 { 0x00E4, 0x61, 0x65, 0x00, 0x00 }, /* ä to ae */
1346 { 0x00E5, 0x61, 0x61, 0x00, 0x00 }, /* å to aa */
1347 { 0x00E6, 0x61, 0x65, 0x00, 0x00 }, /* æ to ae */
1348 { 0x00E7, 0x63, 0x00, 0x00, 0x00 }, /* ç to c */
1349 { 0x00E8, 0x65, 0x00, 0x00, 0x00 }, /* è to e */
1350 { 0x00E9, 0x65, 0x00, 0x00, 0x00 }, /* é to e */
1351 { 0x00EA, 0x65, 0x00, 0x00, 0x00 }, /* ê to e */
1352 { 0x00EB, 0x65, 0x00, 0x00, 0x00 }, /* ë to e */
1353 { 0x00EC, 0x69, 0x00, 0x00, 0x00 }, /* ì to i */
1354 { 0x00ED, 0x69, 0x00, 0x00, 0x00 }, /* í to i */
1355 { 0x00EE, 0x69, 0x00, 0x00, 0x00 }, /* î to i */
1356 { 0x00EF, 0x69, 0x00, 0x00, 0x00 }, /* ï to i */
1357 { 0x00F0, 0x64, 0x00, 0x00, 0x00 }, /* ð to d */
1358 { 0x00F1, 0x6E, 0x00, 0x00, 0x00 }, /* ñ to n */
1359 { 0x00F2, 0x6F, 0x00, 0x00, 0x00 }, /* ò to o */
1360 { 0x00F3, 0x6F, 0x00, 0x00, 0x00 }, /* ó to o */
1361 { 0x00F4, 0x6F, 0x00, 0x00, 0x00 }, /* ô to o */
1362 { 0x00F5, 0x6F, 0x00, 0x00, 0x00 }, /* õ to o */
1363 { 0x00F6, 0x6F, 0x65, 0x00, 0x00 }, /* ö to oe */
1364 { 0x00F7, 0x3A, 0x00, 0x00, 0x00 }, /* ÷ to : */
1365 { 0x00F8, 0x6F, 0x00, 0x00, 0x00 }, /* ø to o */
1366 { 0x00F9, 0x75, 0x00, 0x00, 0x00 }, /* ù to u */
1367 { 0x00FA, 0x75, 0x00, 0x00, 0x00 }, /* ú to u */
1368 { 0x00FB, 0x75, 0x00, 0x00, 0x00 }, /* û to u */
1369 { 0x00FC, 0x75, 0x65, 0x00, 0x00 }, /* ü to ue */
1370 { 0x00FD, 0x79, 0x00, 0x00, 0x00 }, /* ý to y */
1371 { 0x00FE, 0x74, 0x68, 0x00, 0x00 }, /* þ to th */
1372 { 0x00FF, 0x79, 0x00, 0x00, 0x00 }, /* ÿ to y */
1373 { 0x0100, 0x41, 0x00, 0x00, 0x00 }, /* Ā to A */
1374 { 0x0101, 0x61, 0x00, 0x00, 0x00 }, /* ā to a */
1375 { 0x0102, 0x41, 0x00, 0x00, 0x00 }, /* Ă to A */
1376 { 0x0103, 0x61, 0x00, 0x00, 0x00 }, /* ă to a */
1377 { 0x0104, 0x41, 0x00, 0x00, 0x00 }, /* Ą to A */
1378 { 0x0105, 0x61, 0x00, 0x00, 0x00 }, /* ą to a */
1379 { 0x0106, 0x43, 0x00, 0x00, 0x00 }, /* Ć to C */
1380 { 0x0107, 0x63, 0x00, 0x00, 0x00 }, /* ć to c */
1381 { 0x0108, 0x43, 0x68, 0x00, 0x00 }, /* Ĉ to Ch */
1382 { 0x0109, 0x63, 0x68, 0x00, 0x00 }, /* ĉ to ch */
1383 { 0x010A, 0x43, 0x00, 0x00, 0x00 }, /* Ċ to C */
1384 { 0x010B, 0x63, 0x00, 0x00, 0x00 }, /* ċ to c */
1385 { 0x010C, 0x43, 0x00, 0x00, 0x00 }, /* Č to C */
1386 { 0x010D, 0x63, 0x00, 0x00, 0x00 }, /* č to c */
1387 { 0x010E, 0x44, 0x00, 0x00, 0x00 }, /* Ď to D */
1388 { 0x010F, 0x64, 0x00, 0x00, 0x00 }, /* ď to d */
1389 { 0x0110, 0x44, 0x00, 0x00, 0x00 }, /* Đ to D */
1390 { 0x0111, 0x64, 0x00, 0x00, 0x00 }, /* đ to d */
1391 { 0x0112, 0x45, 0x00, 0x00, 0x00 }, /* Ē to E */
1392 { 0x0113, 0x65, 0x00, 0x00, 0x00 }, /* ē to e */
1393 { 0x0114, 0x45, 0x00, 0x00, 0x00 }, /* Ĕ to E */
1394 { 0x0115, 0x65, 0x00, 0x00, 0x00 }, /* ĕ to e */
1395 { 0x0116, 0x45, 0x00, 0x00, 0x00 }, /* Ė to E */
1396 { 0x0117, 0x65, 0x00, 0x00, 0x00 }, /* ė to e */
1397 { 0x0118, 0x45, 0x00, 0x00, 0x00 }, /* Ę to E */
1398 { 0x0119, 0x65, 0x00, 0x00, 0x00 }, /* ę to e */
1399 { 0x011A, 0x45, 0x00, 0x00, 0x00 }, /* Ě to E */
1400 { 0x011B, 0x65, 0x00, 0x00, 0x00 }, /* ě to e */
1401 { 0x011C, 0x47, 0x68, 0x00, 0x00 }, /* Ĝ to Gh */
1402 { 0x011D, 0x67, 0x68, 0x00, 0x00 }, /* ĝ to gh */
1403 { 0x011E, 0x47, 0x00, 0x00, 0x00 }, /* Ğ to G */
1404 { 0x011F, 0x67, 0x00, 0x00, 0x00 }, /* ğ to g */
1405 { 0x0120, 0x47, 0x00, 0x00, 0x00 }, /* Ġ to G */
1406 { 0x0121, 0x67, 0x00, 0x00, 0x00 }, /* ġ to g */
1407 { 0x0122, 0x47, 0x00, 0x00, 0x00 }, /* Ģ to G */
1408 { 0x0123, 0x67, 0x00, 0x00, 0x00 }, /* ģ to g */
1409 { 0x0124, 0x48, 0x68, 0x00, 0x00 }, /* Ĥ to Hh */
1410 { 0x0125, 0x68, 0x68, 0x00, 0x00 }, /* ĥ to hh */
1411 { 0x0126, 0x48, 0x00, 0x00, 0x00 }, /* Ħ to H */
1412 { 0x0127, 0x68, 0x00, 0x00, 0x00 }, /* ħ to h */
1413 { 0x0128, 0x49, 0x00, 0x00, 0x00 }, /* Ĩ to I */
1414 { 0x0129, 0x69, 0x00, 0x00, 0x00 }, /* ĩ to i */
1415 { 0x012A, 0x49, 0x00, 0x00, 0x00 }, /* Ī to I */
1416 { 0x012B, 0x69, 0x00, 0x00, 0x00 }, /* ī to i */
1417 { 0x012C, 0x49, 0x00, 0x00, 0x00 }, /* Ĭ to I */
1418 { 0x012D, 0x69, 0x00, 0x00, 0x00 }, /* ĭ to i */
1419 { 0x012E, 0x49, 0x00, 0x00, 0x00 }, /* Į to I */
1420 { 0x012F, 0x69, 0x00, 0x00, 0x00 }, /* į to i */
1421 { 0x0130, 0x49, 0x00, 0x00, 0x00 }, /* İ to I */
1422 { 0x0131, 0x69, 0x00, 0x00, 0x00 }, /* ı to i */
1423 { 0x0132, 0x49, 0x4A, 0x00, 0x00 }, /* IJ to IJ */
1424 { 0x0133, 0x69, 0x6A, 0x00, 0x00 }, /* ij to ij */
1425 { 0x0134, 0x4A, 0x68, 0x00, 0x00 }, /* Ĵ to Jh */
1426 { 0x0135, 0x6A, 0x68, 0x00, 0x00 }, /* ĵ to jh */
1427 { 0x0136, 0x4B, 0x00, 0x00, 0x00 }, /* Ķ to K */
1428 { 0x0137, 0x6B, 0x00, 0x00, 0x00 }, /* ķ to k */
1429 { 0x0138, 0x6B, 0x00, 0x00, 0x00 }, /* ĸ to k */
1430 { 0x0139, 0x4C, 0x00, 0x00, 0x00 }, /* Ĺ to L */
1431 { 0x013A, 0x6C, 0x00, 0x00, 0x00 }, /* ĺ to l */
1432 { 0x013B, 0x4C, 0x00, 0x00, 0x00 }, /* Ļ to L */
1433 { 0x013C, 0x6C, 0x00, 0x00, 0x00 }, /* ļ to l */
1434 { 0x013D, 0x4C, 0x00, 0x00, 0x00 }, /* Ľ to L */
1435 { 0x013E, 0x6C, 0x00, 0x00, 0x00 }, /* ľ to l */
1436 { 0x013F, 0x4C, 0x2E, 0x00, 0x00 }, /* Ŀ to L. */
1437 { 0x0140, 0x6C, 0x2E, 0x00, 0x00 }, /* ŀ to l. */
1438 { 0x0141, 0x4C, 0x00, 0x00, 0x00 }, /* Ł to L */
1439 { 0x0142, 0x6C, 0x00, 0x00, 0x00 }, /* ł to l */
1440 { 0x0143, 0x4E, 0x00, 0x00, 0x00 }, /* Ń to N */
1441 { 0x0144, 0x6E, 0x00, 0x00, 0x00 }, /* ń to n */
1442 { 0x0145, 0x4E, 0x00, 0x00, 0x00 }, /* Ņ to N */
1443 { 0x0146, 0x6E, 0x00, 0x00, 0x00 }, /* ņ to n */
1444 { 0x0147, 0x4E, 0x00, 0x00, 0x00 }, /* Ň to N */
1445 { 0x0148, 0x6E, 0x00, 0x00, 0x00 }, /* ň to n */
1446 { 0x0149, 0x27, 0x6E, 0x00, 0x00 }, /* ʼn to 'n */
1447 { 0x014A, 0x4E, 0x47, 0x00, 0x00 }, /* Ŋ to NG */
1448 { 0x014B, 0x6E, 0x67, 0x00, 0x00 }, /* ŋ to ng */
1449 { 0x014C, 0x4F, 0x00, 0x00, 0x00 }, /* Ō to O */
1450 { 0x014D, 0x6F, 0x00, 0x00, 0x00 }, /* ō to o */
1451 { 0x014E, 0x4F, 0x00, 0x00, 0x00 }, /* Ŏ to O */
1452 { 0x014F, 0x6F, 0x00, 0x00, 0x00 }, /* ŏ to o */
1453 { 0x0150, 0x4F, 0x00, 0x00, 0x00 }, /* Ő to O */
1454 { 0x0151, 0x6F, 0x00, 0x00, 0x00 }, /* ő to o */
1455 { 0x0152, 0x4F, 0x45, 0x00, 0x00 }, /* Œ to OE */
1456 { 0x0153, 0x6F, 0x65, 0x00, 0x00 }, /* œ to oe */
1457 { 0x0154, 0x52, 0x00, 0x00, 0x00 }, /* Ŕ to R */
1458 { 0x0155, 0x72, 0x00, 0x00, 0x00 }, /* ŕ to r */
1459 { 0x0156, 0x52, 0x00, 0x00, 0x00 }, /* Ŗ to R */
1460 { 0x0157, 0x72, 0x00, 0x00, 0x00 }, /* ŗ to r */
1461 { 0x0158, 0x52, 0x00, 0x00, 0x00 }, /* Ř to R */
1462 { 0x0159, 0x72, 0x00, 0x00, 0x00 }, /* ř to r */
1463 { 0x015A, 0x53, 0x00, 0x00, 0x00 }, /* Ś to S */
1464 { 0x015B, 0x73, 0x00, 0x00, 0x00 }, /* ś to s */
1465 { 0x015C, 0x53, 0x68, 0x00, 0x00 }, /* Ŝ to Sh */
1466 { 0x015D, 0x73, 0x68, 0x00, 0x00 }, /* ŝ to sh */
1467 { 0x015E, 0x53, 0x00, 0x00, 0x00 }, /* Ş to S */
1468 { 0x015F, 0x73, 0x00, 0x00, 0x00 }, /* ş to s */
1469 { 0x0160, 0x53, 0x00, 0x00, 0x00 }, /* Š to S */
1470 { 0x0161, 0x73, 0x00, 0x00, 0x00 }, /* š to s */
1471 { 0x0162, 0x54, 0x00, 0x00, 0x00 }, /* Ţ to T */
1472 { 0x0163, 0x74, 0x00, 0x00, 0x00 }, /* ţ to t */
1473 { 0x0164, 0x54, 0x00, 0x00, 0x00 }, /* Ť to T */
1474 { 0x0165, 0x74, 0x00, 0x00, 0x00 }, /* ť to t */
1475 { 0x0166, 0x54, 0x00, 0x00, 0x00 }, /* Ŧ to T */
1476 { 0x0167, 0x74, 0x00, 0x00, 0x00 }, /* ŧ to t */
1477 { 0x0168, 0x55, 0x00, 0x00, 0x00 }, /* Ũ to U */
1478 { 0x0169, 0x75, 0x00, 0x00, 0x00 }, /* ũ to u */
1479 { 0x016A, 0x55, 0x00, 0x00, 0x00 }, /* Ū to U */
1480 { 0x016B, 0x75, 0x00, 0x00, 0x00 }, /* ū to u */
1481 { 0x016C, 0x55, 0x00, 0x00, 0x00 }, /* Ŭ to U */
1482 { 0x016D, 0x75, 0x00, 0x00, 0x00 }, /* ŭ to u */
1483 { 0x016E, 0x55, 0x00, 0x00, 0x00 }, /* Ů to U */
1484 { 0x016F, 0x75, 0x00, 0x00, 0x00 }, /* ů to u */
1485 { 0x0170, 0x55, 0x00, 0x00, 0x00 }, /* Ű to U */
1486 { 0x0171, 0x75, 0x00, 0x00, 0x00 }, /* ű to u */
1487 { 0x0172, 0x55, 0x00, 0x00, 0x00 }, /* Ų to U */
1488 { 0x0173, 0x75, 0x00, 0x00, 0x00 }, /* ų to u */
1489 { 0x0174, 0x57, 0x00, 0x00, 0x00 }, /* Ŵ to W */
1490 { 0x0175, 0x77, 0x00, 0x00, 0x00 }, /* ŵ to w */
1491 { 0x0176, 0x59, 0x00, 0x00, 0x00 }, /* Ŷ to Y */
1492 { 0x0177, 0x79, 0x00, 0x00, 0x00 }, /* ŷ to y */
1493 { 0x0178, 0x59, 0x00, 0x00, 0x00 }, /* Ÿ to Y */
1494 { 0x0179, 0x5A, 0x00, 0x00, 0x00 }, /* Ź to Z */
1495 { 0x017A, 0x7A, 0x00, 0x00, 0x00 }, /* ź to z */
1496 { 0x017B, 0x5A, 0x00, 0x00, 0x00 }, /* Ż to Z */
1497 { 0x017C, 0x7A, 0x00, 0x00, 0x00 }, /* ż to z */
1498 { 0x017D, 0x5A, 0x00, 0x00, 0x00 }, /* Ž to Z */
1499 { 0x017E, 0x7A, 0x00, 0x00, 0x00 }, /* ž to z */
1500 { 0x017F, 0x73, 0x00, 0x00, 0x00 }, /* ſ to s */
1501 { 0x0192, 0x66, 0x00, 0x00, 0x00 }, /* ƒ to f */
1502 { 0x0218, 0x53, 0x00, 0x00, 0x00 }, /* Ș to S */
1503 { 0x0219, 0x73, 0x00, 0x00, 0x00 }, /* ș to s */
1504 { 0x021A, 0x54, 0x00, 0x00, 0x00 }, /* Ț to T */
1505 { 0x021B, 0x74, 0x00, 0x00, 0x00 }, /* ț to t */
1506 { 0x0386, 0x41, 0x00, 0x00, 0x00 }, /* Ά to A */
1507 { 0x0388, 0x45, 0x00, 0x00, 0x00 }, /* Έ to E */
1508 { 0x0389, 0x49, 0x00, 0x00, 0x00 }, /* Ή to I */
1509 { 0x038A, 0x49, 0x00, 0x00, 0x00 }, /* Ί to I */
1510 { 0x038C, 0x4f, 0x00, 0x00, 0x00 }, /* Ό to O */
1511 { 0x038E, 0x59, 0x00, 0x00, 0x00 }, /* Ύ to Y */
1512 { 0x038F, 0x4f, 0x00, 0x00, 0x00 }, /* Ώ to O */
1513 { 0x0390, 0x69, 0x00, 0x00, 0x00 }, /* ΐ to i */
1514 { 0x0391, 0x41, 0x00, 0x00, 0x00 }, /* Α to A */
1515 { 0x0392, 0x42, 0x00, 0x00, 0x00 }, /* Β to B */
1516 { 0x0393, 0x47, 0x00, 0x00, 0x00 }, /* Γ to G */
1517 { 0x0394, 0x44, 0x00, 0x00, 0x00 }, /* Δ to D */
1518 { 0x0395, 0x45, 0x00, 0x00, 0x00 }, /* Ε to E */
1519 { 0x0396, 0x5a, 0x00, 0x00, 0x00 }, /* Ζ to Z */
1520 { 0x0397, 0x49, 0x00, 0x00, 0x00 }, /* Η to I */
1521 { 0x0398, 0x54, 0x68, 0x00, 0x00 }, /* Θ to Th */
1522 { 0x0399, 0x49, 0x00, 0x00, 0x00 }, /* Ι to I */
1523 { 0x039A, 0x4b, 0x00, 0x00, 0x00 }, /* Κ to K */
1524 { 0x039B, 0x4c, 0x00, 0x00, 0x00 }, /* Λ to L */
1525 { 0x039C, 0x4d, 0x00, 0x00, 0x00 }, /* Μ to M */
1526 { 0x039D, 0x4e, 0x00, 0x00, 0x00 }, /* Ν to N */
1527 { 0x039E, 0x58, 0x00, 0x00, 0x00 }, /* Ξ to X */
1528 { 0x039F, 0x4f, 0x00, 0x00, 0x00 }, /* Ο to O */
1529 { 0x03A0, 0x50, 0x00, 0x00, 0x00 }, /* Π to P */
1530 { 0x03A1, 0x52, 0x00, 0x00, 0x00 }, /* Ρ to R */
1531 { 0x03A3, 0x53, 0x00, 0x00, 0x00 }, /* Σ to S */
1532 { 0x03A4, 0x54, 0x00, 0x00, 0x00 }, /* Τ to T */
1533 { 0x03A5, 0x59, 0x00, 0x00, 0x00 }, /* Υ to Y */
1534 { 0x03A6, 0x46, 0x00, 0x00, 0x00 }, /* Φ to F */
1535 { 0x03A7, 0x43, 0x68, 0x00, 0x00 }, /* Χ to Ch */
1536 { 0x03A8, 0x50, 0x73, 0x00, 0x00 }, /* Ψ to Ps */
1537 { 0x03A9, 0x4f, 0x00, 0x00, 0x00 }, /* Ω to O */
1538 { 0x03AA, 0x49, 0x00, 0x00, 0x00 }, /* Ϊ to I */
1539 { 0x03AB, 0x59, 0x00, 0x00, 0x00 }, /* Ϋ to Y */
1540 { 0x03AC, 0x61, 0x00, 0x00, 0x00 }, /* ά to a */
1541 { 0x03AD, 0x65, 0x00, 0x00, 0x00 }, /* έ to e */
1542 { 0x03AE, 0x69, 0x00, 0x00, 0x00 }, /* ή to i */
1543 { 0x03AF, 0x69, 0x00, 0x00, 0x00 }, /* ί to i */
1544 { 0x03B1, 0x61, 0x00, 0x00, 0x00 }, /* α to a */
1545 { 0x03B2, 0x62, 0x00, 0x00, 0x00 }, /* β to b */
1546 { 0x03B3, 0x67, 0x00, 0x00, 0x00 }, /* γ to g */
1547 { 0x03B4, 0x64, 0x00, 0x00, 0x00 }, /* δ to d */
1548 { 0x03B5, 0x65, 0x00, 0x00, 0x00 }, /* ε to e */
1549 { 0x03B6, 0x7a, 0x00, 0x00, 0x00 }, /* ζ to z */
1550 { 0x03B7, 0x69, 0x00, 0x00, 0x00 }, /* η to i */
1551 { 0x03B8, 0x74, 0x68, 0x00, 0x00 }, /* θ to th */
1552 { 0x03B9, 0x69, 0x00, 0x00, 0x00 }, /* ι to i */
1553 { 0x03BA, 0x6b, 0x00, 0x00, 0x00 }, /* κ to k */
1554 { 0x03BB, 0x6c, 0x00, 0x00, 0x00 }, /* λ to l */
1555 { 0x03BC, 0x6d, 0x00, 0x00, 0x00 }, /* μ to m */
1556 { 0x03BD, 0x6e, 0x00, 0x00, 0x00 }, /* ν to n */
1557 { 0x03BE, 0x78, 0x00, 0x00, 0x00 }, /* ξ to x */
1558 { 0x03BF, 0x6f, 0x00, 0x00, 0x00 }, /* ο to o */
1559 { 0x03C0, 0x70, 0x00, 0x00, 0x00 }, /* π to p */
1560 { 0x03C1, 0x72, 0x00, 0x00, 0x00 }, /* ρ to r */
1561 { 0x03C3, 0x73, 0x00, 0x00, 0x00 }, /* σ to s */
1562 { 0x03C4, 0x74, 0x00, 0x00, 0x00 }, /* τ to t */
1563 { 0x03C5, 0x79, 0x00, 0x00, 0x00 }, /* υ to y */
1564 { 0x03C6, 0x66, 0x00, 0x00, 0x00 }, /* φ to f */
1565 { 0x03C7, 0x63, 0x68, 0x00, 0x00 }, /* χ to ch */
1566 { 0x03C8, 0x70, 0x73, 0x00, 0x00 }, /* ψ to ps */
1567 { 0x03C9, 0x6f, 0x00, 0x00, 0x00 }, /* ω to o */
1568 { 0x03CA, 0x69, 0x00, 0x00, 0x00 }, /* ϊ to i */
1569 { 0x03CB, 0x79, 0x00, 0x00, 0x00 }, /* ϋ to y */
1570 { 0x03CC, 0x6f, 0x00, 0x00, 0x00 }, /* ό to o */
1571 { 0x03CD, 0x79, 0x00, 0x00, 0x00 }, /* ύ to y */
1572 { 0x03CE, 0x69, 0x00, 0x00, 0x00 }, /* ώ to i */
1573 { 0x0400, 0x45, 0x00, 0x00, 0x00 }, /* Ѐ to E */
1574 { 0x0401, 0x45, 0x00, 0x00, 0x00 }, /* Ё to E */
1575 { 0x0402, 0x44, 0x00, 0x00, 0x00 }, /* Ђ to D */
1576 { 0x0403, 0x47, 0x00, 0x00, 0x00 }, /* Ѓ to G */
1577 { 0x0404, 0x45, 0x00, 0x00, 0x00 }, /* Є to E */
1578 { 0x0405, 0x5a, 0x00, 0x00, 0x00 }, /* Ѕ to Z */
1579 { 0x0406, 0x49, 0x00, 0x00, 0x00 }, /* І to I */
1580 { 0x0407, 0x49, 0x00, 0x00, 0x00 }, /* Ї to I */
1581 { 0x0408, 0x4a, 0x00, 0x00, 0x00 }, /* Ј to J */
1582 { 0x0409, 0x49, 0x00, 0x00, 0x00 }, /* Љ to I */
1583 { 0x040A, 0x4e, 0x00, 0x00, 0x00 }, /* Њ to N */
1584 { 0x040B, 0x44, 0x00, 0x00, 0x00 }, /* Ћ to D */
1585 { 0x040C, 0x4b, 0x00, 0x00, 0x00 }, /* Ќ to K */
1586 { 0x040D, 0x49, 0x00, 0x00, 0x00 }, /* Ѝ to I */
1587 { 0x040E, 0x55, 0x00, 0x00, 0x00 }, /* Ў to U */
1588 { 0x040F, 0x44, 0x00, 0x00, 0x00 }, /* Џ to D */
1589 { 0x0410, 0x41, 0x00, 0x00, 0x00 }, /* А to A */
1590 { 0x0411, 0x42, 0x00, 0x00, 0x00 }, /* Б to B */
1591 { 0x0412, 0x56, 0x00, 0x00, 0x00 }, /* В to V */
1592 { 0x0413, 0x47, 0x00, 0x00, 0x00 }, /* Г to G */
1593 { 0x0414, 0x44, 0x00, 0x00, 0x00 }, /* Д to D */
1594 { 0x0415, 0x45, 0x00, 0x00, 0x00 }, /* Е to E */
1595 { 0x0416, 0x5a, 0x68, 0x00, 0x00 }, /* Ж to Zh */
1596 { 0x0417, 0x5a, 0x00, 0x00, 0x00 }, /* З to Z */
1597 { 0x0418, 0x49, 0x00, 0x00, 0x00 }, /* И to I */
1598 { 0x0419, 0x49, 0x00, 0x00, 0x00 }, /* Й to I */
1599 { 0x041A, 0x4b, 0x00, 0x00, 0x00 }, /* К to K */
1600 { 0x041B, 0x4c, 0x00, 0x00, 0x00 }, /* Л to L */
1601 { 0x041C, 0x4d, 0x00, 0x00, 0x00 }, /* М to M */
1602 { 0x041D, 0x4e, 0x00, 0x00, 0x00 }, /* Н to N */
1603 { 0x041E, 0x4f, 0x00, 0x00, 0x00 }, /* О to O */
1604 { 0x041F, 0x50, 0x00, 0x00, 0x00 }, /* П to P */
1605 { 0x0420, 0x52, 0x00, 0x00, 0x00 }, /* Р to R */
1606 { 0x0421, 0x53, 0x00, 0x00, 0x00 }, /* С to S */
1607 { 0x0422, 0x54, 0x00, 0x00, 0x00 }, /* Т to T */
1608 { 0x0423, 0x55, 0x00, 0x00, 0x00 }, /* У to U */
1609 { 0x0424, 0x46, 0x00, 0x00, 0x00 }, /* Ф to F */
1610 { 0x0425, 0x4b, 0x68, 0x00, 0x00 }, /* Х to Kh */
1611 { 0x0426, 0x54, 0x63, 0x00, 0x00 }, /* Ц to Tc */
1612 { 0x0427, 0x43, 0x68, 0x00, 0x00 }, /* Ч to Ch */
1613 { 0x0428, 0x53, 0x68, 0x00, 0x00 }, /* Ш to Sh */
1614 { 0x0429, 0x53, 0x68, 0x63, 0x68 }, /* Щ to Shch */
1615 { 0x042A, 0x61, 0x00, 0x00, 0x00 }, /* to A */
1616 { 0x042B, 0x59, 0x00, 0x00, 0x00 }, /* Ы to Y */
1617 { 0x042C, 0x59, 0x00, 0x00, 0x00 }, /* to Y */
1618 { 0x042D, 0x45, 0x00, 0x00, 0x00 }, /* Э to E */
1619 { 0x042E, 0x49, 0x75, 0x00, 0x00 }, /* Ю to Iu */
1620 { 0x042F, 0x49, 0x61, 0x00, 0x00 }, /* Я to Ia */
1621 { 0x0430, 0x61, 0x00, 0x00, 0x00 }, /* а to a */
1622 { 0x0431, 0x62, 0x00, 0x00, 0x00 }, /* б to b */
1623 { 0x0432, 0x76, 0x00, 0x00, 0x00 }, /* в to v */
1624 { 0x0433, 0x67, 0x00, 0x00, 0x00 }, /* г to g */
1625 { 0x0434, 0x64, 0x00, 0x00, 0x00 }, /* д to d */
1626 { 0x0435, 0x65, 0x00, 0x00, 0x00 }, /* е to e */
1627 { 0x0436, 0x7a, 0x68, 0x00, 0x00 }, /* ж to zh */
1628 { 0x0437, 0x7a, 0x00, 0x00, 0x00 }, /* з to z */
1629 { 0x0438, 0x69, 0x00, 0x00, 0x00 }, /* и to i */
1630 { 0x0439, 0x69, 0x00, 0x00, 0x00 }, /* й to i */
1631 { 0x043A, 0x6b, 0x00, 0x00, 0x00 }, /* к to k */
1632 { 0x043B, 0x6c, 0x00, 0x00, 0x00 }, /* л to l */
1633 { 0x043C, 0x6d, 0x00, 0x00, 0x00 }, /* м to m */
1634 { 0x043D, 0x6e, 0x00, 0x00, 0x00 }, /* н to n */
1635 { 0x043E, 0x6f, 0x00, 0x00, 0x00 }, /* о to o */
1636 { 0x043F, 0x70, 0x00, 0x00, 0x00 }, /* п to p */
1637 { 0x0440, 0x72, 0x00, 0x00, 0x00 }, /* р to r */
1638 { 0x0441, 0x73, 0x00, 0x00, 0x00 }, /* с to s */
1639 { 0x0442, 0x74, 0x00, 0x00, 0x00 }, /* т to t */
1640 { 0x0443, 0x75, 0x00, 0x00, 0x00 }, /* у to u */
1641 { 0x0444, 0x66, 0x00, 0x00, 0x00 }, /* ф to f */
1642 { 0x0445, 0x6b, 0x68, 0x00, 0x00 }, /* х to kh */
1643 { 0x0446, 0x74, 0x63, 0x00, 0x00 }, /* ц to tc */
1644 { 0x0447, 0x63, 0x68, 0x00, 0x00 }, /* ч to ch */
1645 { 0x0448, 0x73, 0x68, 0x00, 0x00 }, /* ш to sh */
1646 { 0x0449, 0x73, 0x68, 0x63, 0x68 }, /* щ to shch */
1647 { 0x044A, 0x61, 0x00, 0x00, 0x00 }, /* to a */
1648 { 0x044B, 0x79, 0x00, 0x00, 0x00 }, /* ы to y */
1649 { 0x044C, 0x79, 0x00, 0x00, 0x00 }, /* to y */
1650 { 0x044D, 0x65, 0x00, 0x00, 0x00 }, /* э to e */
1651 { 0x044E, 0x69, 0x75, 0x00, 0x00 }, /* ю to iu */
1652 { 0x044F, 0x69, 0x61, 0x00, 0x00 }, /* я to ia */
1653 { 0x0450, 0x65, 0x00, 0x00, 0x00 }, /* ѐ to e */
1654 { 0x0451, 0x65, 0x00, 0x00, 0x00 }, /* ё to e */
1655 { 0x0452, 0x64, 0x00, 0x00, 0x00 }, /* ђ to d */
1656 { 0x0453, 0x67, 0x00, 0x00, 0x00 }, /* ѓ to g */
1657 { 0x0454, 0x65, 0x00, 0x00, 0x00 }, /* є to e */
1658 { 0x0455, 0x7a, 0x00, 0x00, 0x00 }, /* ѕ to z */
1659 { 0x0456, 0x69, 0x00, 0x00, 0x00 }, /* і to i */
1660 { 0x0457, 0x69, 0x00, 0x00, 0x00 }, /* ї to i */
1661 { 0x0458, 0x6a, 0x00, 0x00, 0x00 }, /* ј to j */
1662 { 0x0459, 0x69, 0x00, 0x00, 0x00 }, /* љ to i */
1663 { 0x045A, 0x6e, 0x00, 0x00, 0x00 }, /* њ to n */
1664 { 0x045B, 0x64, 0x00, 0x00, 0x00 }, /* ћ to d */
1665 { 0x045C, 0x6b, 0x00, 0x00, 0x00 }, /* ќ to k */
1666 { 0x045D, 0x69, 0x00, 0x00, 0x00 }, /* ѝ to i */
1667 { 0x045E, 0x75, 0x00, 0x00, 0x00 }, /* ў to u */
1668 { 0x045F, 0x64, 0x00, 0x00, 0x00 }, /* џ to d */
1669 { 0x1E02, 0x42, 0x00, 0x00, 0x00 }, /* Ḃ to B */
1670 { 0x1E03, 0x62, 0x00, 0x00, 0x00 }, /* ḃ to b */
1671 { 0x1E0A, 0x44, 0x00, 0x00, 0x00 }, /* Ḋ to D */
1672 { 0x1E0B, 0x64, 0x00, 0x00, 0x00 }, /* ḋ to d */
1673 { 0x1E1E, 0x46, 0x00, 0x00, 0x00 }, /* Ḟ to F */
1674 { 0x1E1F, 0x66, 0x00, 0x00, 0x00 }, /* ḟ to f */
1675 { 0x1E40, 0x4D, 0x00, 0x00, 0x00 }, /* Ṁ to M */
1676 { 0x1E41, 0x6D, 0x00, 0x00, 0x00 }, /* ṁ to m */
1677 { 0x1E56, 0x50, 0x00, 0x00, 0x00 }, /* Ṗ to P */
1678 { 0x1E57, 0x70, 0x00, 0x00, 0x00 }, /* ṗ to p */
1679 { 0x1E60, 0x53, 0x00, 0x00, 0x00 }, /* Ṡ to S */
1680 { 0x1E61, 0x73, 0x00, 0x00, 0x00 }, /* ṡ to s */
1681 { 0x1E6A, 0x54, 0x00, 0x00, 0x00 }, /* Ṫ to T */
1682 { 0x1E6B, 0x74, 0x00, 0x00, 0x00 }, /* ṫ to t */
1683 { 0x1E80, 0x57, 0x00, 0x00, 0x00 }, /* Ẁ to W */
1684 { 0x1E81, 0x77, 0x00, 0x00, 0x00 }, /* ẁ to w */
1685 { 0x1E82, 0x57, 0x00, 0x00, 0x00 }, /* Ẃ to W */
1686 { 0x1E83, 0x77, 0x00, 0x00, 0x00 }, /* ẃ to w */
1687 { 0x1E84, 0x57, 0x00, 0x00, 0x00 }, /* Ẅ to W */
1688 { 0x1E85, 0x77, 0x00, 0x00, 0x00 }, /* ẅ to w */
1689 { 0x1EF2, 0x59, 0x00, 0x00, 0x00 }, /* Ỳ to Y */
1690 { 0x1EF3, 0x79, 0x00, 0x00, 0x00 }, /* ỳ to y */
1691 { 0xFB00, 0x66, 0x66, 0x00, 0x00 }, /* ff to ff */
1692 { 0xFB01, 0x66, 0x69, 0x00, 0x00 }, /* fi to fi */
1693 { 0xFB02, 0x66, 0x6C, 0x00, 0x00 }, /* fl to fl */
1694 { 0xFB05, 0x73, 0x74, 0x00, 0x00 }, /* ſt to st */
1695 { 0xFB06, 0x73, 0x74, 0x00, 0x00 }, /* st to st */
1696};
1697
1698static const Transliteration *spellfixFindTranslit(int c, int *pxTop){
1699 *pxTop = (sizeof(translit)/sizeof(translit[0])) - 1;
1700 return translit;
1701}
1702
1703/*
1704** Convert the input string from UTF-8 into pure ASCII by converting
1705** all non-ASCII characters to some combination of characters in the
1706** ASCII subset.
1707**
1708** The returned string might contain more characters than the input.
1709**
1710** Space to hold the returned string comes from sqlite3_malloc() and
1711** should be freed by the caller.
1712*/
1713static unsigned char *transliterate(const unsigned char *zIn, int nIn){
1714#ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS
1715 unsigned char *zOut = sqlite3_malloc64( nIn*5 + 1 );
1716#else
1717 unsigned char *zOut = sqlite3_malloc64( nIn*4 + 1 );
1718#endif
1719 int c, sz, nOut;
1720 if( zOut==0 ) return 0;
1721 nOut = 0;
1722 while( nIn>0 ){
1723 c = utf8Read(zIn, nIn, &sz);
1724 zIn += sz;
1725 nIn -= sz;
1726 if( c<=127 ){
1727 zOut[nOut++] = (unsigned char)c;
1728 }else{
1729 int xTop, xBtm, x;
1730 const Transliteration *tbl = spellfixFindTranslit(c, &xTop);
1731 xBtm = 0;
1732 while( xTop>=xBtm ){
1733 x = (xTop + xBtm)/2;
1734 if( tbl[x].cFrom==c ){
1735 zOut[nOut++] = tbl[x].cTo0;
1736 if( tbl[x].cTo1 ){
1737 zOut[nOut++] = tbl[x].cTo1;
1738 if( tbl[x].cTo2 ){
1739 zOut[nOut++] = tbl[x].cTo2;
1740 if( tbl[x].cTo3 ){
1741 zOut[nOut++] = tbl[x].cTo3;
1742#ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS
1743 if( tbl[x].cTo4 ){
1744 zOut[nOut++] = tbl[x].cTo4;
1745 }
1746#endif /* SQLITE_SPELLFIX_5BYTE_MAPPINGS */
1747 }
1748 }
1749 }
1750 c = 0;
1751 break;
1752 }else if( tbl[x].cFrom>c ){
1753 xTop = x-1;
1754 }else{
1755 xBtm = x+1;
1756 }
1757 }
1758 if( c ) zOut[nOut++] = '?';
1759 }
1760 }
1761 zOut[nOut] = 0;
1762 return zOut;
1763}
1764
1765/*
1766** Return the number of characters in the shortest prefix of the input
1767** string that transliterates to an ASCII string nTrans bytes or longer.
1768** Or, if the transliteration of the input string is less than nTrans
1769** bytes in size, return the number of characters in the input string.
1770*/
1771static int translen_to_charlen(const char *zIn, int nIn, int nTrans){
1772 int i, c, sz, nOut;
1773 int nChar;
1774
1775 i = nOut = 0;
1776 for(nChar=0; i<nIn && nOut<nTrans; nChar++){
1777 c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1778 i += sz;
1779
1780 nOut++;
1781 if( c>=128 ){
1782 int xTop, xBtm, x;
1783 const Transliteration *tbl = spellfixFindTranslit(c, &xTop);
1784 xBtm = 0;
1785 while( xTop>=xBtm ){
1786 x = (xTop + xBtm)/2;
1787 if( tbl[x].cFrom==c ){
1788 if( tbl[x].cTo1 ){
1789 nOut++;
1790 if( tbl[x].cTo2 ){
1791 nOut++;
1792 if( tbl[x].cTo3 ){
1793 nOut++;
1794 }
1795 }
1796 }
1797 break;
1798 }else if( tbl[x].cFrom>c ){
1799 xTop = x-1;
1800 }else{
1801 xBtm = x+1;
1802 }
1803 }
1804 }
1805 }
1806
1807 return nChar;
1808}
1809
1810
1811/*
1812** spellfix1_translit(X)
1813**
1814** Convert a string that contains non-ASCII Roman characters into
1815** pure ASCII.
1816*/
1817static void transliterateSqlFunc(
1818 sqlite3_context *context,
1819 int argc,
1820 sqlite3_value **argv
1821){
1822 const unsigned char *zIn = sqlite3_value_text(argv[0]);
1823 int nIn = sqlite3_value_bytes(argv[0]);
1824 unsigned char *zOut = transliterate(zIn, nIn);
1825 if( zOut==0 ){
1826 sqlite3_result_error_nomem(context);
1827 }else{
1828 sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
1829 }
1830}
1831
1832/*
1833** spellfix1_scriptcode(X)
1834**
1835** Try to determine the dominant script used by the word X and return
1836** its ISO 15924 numeric code.
1837**
1838** The current implementation only understands the following scripts:
1839**
1840** 215 (Latin)
1841** 220 (Cyrillic)
1842** 200 (Greek)
1843**
1844** This routine will return 998 if the input X contains characters from
1845** two or more of the above scripts or 999 if X contains no characters
1846** from any of the above scripts.
1847*/
1848static void scriptCodeSqlFunc(
1849 sqlite3_context *context,
1850 int argc,
1851 sqlite3_value **argv
1852){
1853 const unsigned char *zIn = sqlite3_value_text(argv[0]);
1854 int nIn = sqlite3_value_bytes(argv[0]);
1855 int c, sz;
1856 int scriptMask = 0;
1857 int res;
1858 int seenDigit = 0;
1859# define SCRIPT_LATIN 0x0001
1860# define SCRIPT_CYRILLIC 0x0002
1861# define SCRIPT_GREEK 0x0004
1862# define SCRIPT_HEBREW 0x0008
1863# define SCRIPT_ARABIC 0x0010
1864
1865 while( nIn>0 ){
1866 c = utf8Read(zIn, nIn, &sz);
1867 zIn += sz;
1868 nIn -= sz;
1869 if( c<0x02af ){
1870 if( c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT ){
1871 scriptMask |= SCRIPT_LATIN;
1872 }else if( c>='0' && c<='9' ){
1873 seenDigit = 1;
1874 }
1875 }else if( c>=0x0400 && c<=0x04ff ){
1876 scriptMask |= SCRIPT_CYRILLIC;
1877 }else if( c>=0x0386 && c<=0x03ce ){
1878 scriptMask |= SCRIPT_GREEK;
1879 }else if( c>=0x0590 && c<=0x05ff ){
1880 scriptMask |= SCRIPT_HEBREW;
1881 }else if( c>=0x0600 && c<=0x06ff ){
1882 scriptMask |= SCRIPT_ARABIC;
1883 }
1884 }
1885 if( scriptMask==0 && seenDigit ) scriptMask = SCRIPT_LATIN;
1886 switch( scriptMask ){
1887 case 0: res = 999; break;
1888 case SCRIPT_LATIN: res = 215; break;
1889 case SCRIPT_CYRILLIC: res = 220; break;
1890 case SCRIPT_GREEK: res = 200; break;
1891 case SCRIPT_HEBREW: res = 125; break;
1892 case SCRIPT_ARABIC: res = 160; break;
1893 default: res = 998; break;
1894 }
1895 sqlite3_result_int(context, res);
1896}
1897
1898/* End transliterate
1899******************************************************************************
1900******************************************************************************
1901** Begin spellfix1 virtual table.
1902*/
1903
1904/* Maximum length of a phonehash used for querying the shadow table */
1905#define SPELLFIX_MX_HASH 32
1906
1907/* Maximum number of hash strings to examine per query */
1908#define SPELLFIX_MX_RUN 1
1909
1910typedef struct spellfix1_vtab spellfix1_vtab;
1911typedef struct spellfix1_cursor spellfix1_cursor;
1912
1913/* Fuzzy-search virtual table object */
1914struct spellfix1_vtab {
1915 sqlite3_vtab base; /* Base class - must be first */
1916 sqlite3 *db; /* Database connection */
1917 char *zDbName; /* Name of database holding this table */
1918 char *zTableName; /* Name of the virtual table */
1919 char *zCostTable; /* Table holding edit-distance cost numbers */
1920 EditDist3Config *pConfig3; /* Parsed edit distance costs */
1921};
1922
1923/* Fuzzy-search cursor object */
1924struct spellfix1_cursor {
1925 sqlite3_vtab_cursor base; /* Base class - must be first */
1926 spellfix1_vtab *pVTab; /* The table to which this cursor belongs */
1927 char *zPattern; /* rhs of MATCH clause */
1928 int idxNum; /* idxNum value passed to xFilter() */
1929 int nRow; /* Number of rows of content */
1930 int nAlloc; /* Number of allocated rows */
1931 int iRow; /* Current row of content */
1932 int iLang; /* Value of the langid= constraint */
1933 int iTop; /* Value of the top= constraint */
1934 int iScope; /* Value of the scope= constraint */
1935 int nSearch; /* Number of vocabulary items checked */
1936 sqlite3_stmt *pFullScan; /* Shadow query for a full table scan */
1937 struct spellfix1_row { /* For each row of content */
1938 sqlite3_int64 iRowid; /* Rowid for this row */
1939 char *zWord; /* Text for this row */
1940 int iRank; /* Rank for this row */
1941 int iDistance; /* Distance from pattern for this row */
1942 int iScore; /* Score for sorting */
1943 int iMatchlen; /* Value of matchlen column (or -1) */
1944 char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */
1945 } *a;
1946};
1947
1948/*
1949** Construct one or more SQL statements from the format string given
1950** and then evaluate those statements. The success code is written
1951** into *pRc.
1952**
1953** If *pRc is initially non-zero then this routine is a no-op.
1954*/
1955static void spellfix1DbExec(
1956 int *pRc, /* Success code */
1957 sqlite3 *db, /* Database in which to run SQL */
1958 const char *zFormat, /* Format string for SQL */
1959 ... /* Arguments to the format string */
1960){
1961 va_list ap;
1962 char *zSql;
1963 if( *pRc ) return;
1964 va_start(ap, zFormat);
1965 zSql = sqlite3_vmprintf(zFormat, ap);
1966 va_end(ap);
1967 if( zSql==0 ){
1968 *pRc = SQLITE_NOMEM;
1969 }else{
1970 *pRc = sqlite3_exec(db, zSql, 0, 0, 0);
1971 sqlite3_free(zSql);
1972 }
1973}
1974
1975/*
1976** xDisconnect/xDestroy method for the fuzzy-search module.
1977*/
1978static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab){
1979 spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
1980 int rc = SQLITE_OK;
1981 if( isDestroy ){
1982 sqlite3 *db = p->db;
1983 spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"",
1984 p->zDbName, p->zTableName);
1985 }
1986 if( rc==SQLITE_OK ){
1987 sqlite3_free(p->zTableName);
1988 editDist3ConfigDelete(p->pConfig3);
1989 sqlite3_free(p->zCostTable);
1990 sqlite3_free(p);
1991 }
1992 return rc;
1993}
1994static int spellfix1Disconnect(sqlite3_vtab *pVTab){
1995 return spellfix1Uninit(0, pVTab);
1996}
1997static int spellfix1Destroy(sqlite3_vtab *pVTab){
1998 return spellfix1Uninit(1, pVTab);
1999}
2000
2001/*
2002** Make a copy of a string. Remove leading and trailing whitespace
2003** and dequote it.
2004*/
2005static char *spellfix1Dequote(const char *zIn){
2006 char *zOut;
2007 int i, j;
2008 char c;
2009 while( isspace((unsigned char)zIn[0]) ) zIn++;
2010 zOut = sqlite3_mprintf("%s", zIn);
2011 if( zOut==0 ) return 0;
2012 i = (int)strlen(zOut);
2013#if 0 /* The parser will never leave spaces at the end */
2014 while( i>0 && isspace(zOut[i-1]) ){ i--; }
2015#endif
2016 zOut[i] = 0;
2017 c = zOut[0];
2018 if( c=='\'' || c=='"' ){
2019 for(i=1, j=0; ALWAYS(zOut[i]); i++){
2020 zOut[j++] = zOut[i];
2021 if( zOut[i]==c ){
2022 if( zOut[i+1]==c ){
2023 i++;
2024 }else{
2025 zOut[j-1] = 0;
2026 break;
2027 }
2028 }
2029 }
2030 }
2031 return zOut;
2032}
2033
2034
2035/*
2036** xConnect/xCreate method for the spellfix1 module. Arguments are:
2037**
2038** argv[0] -> module name ("spellfix1")
2039** argv[1] -> database name
2040** argv[2] -> table name
2041** argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter)
2042*/
2043static int spellfix1Init(
2044 int isCreate,
2045 sqlite3 *db,
2046 void *pAux,
2047 int argc, const char *const*argv,
2048 sqlite3_vtab **ppVTab,
2049 char **pzErr
2050){
2051 spellfix1_vtab *pNew = 0;
2052 /* const char *zModule = argv[0]; // not used */
2053 const char *zDbName = argv[1];
2054 const char *zTableName = argv[2];
2055 int nDbName;
2056 int rc = SQLITE_OK;
2057 int i;
2058
2059 nDbName = (int)strlen(zDbName);
2060 pNew = sqlite3_malloc64( sizeof(*pNew) + nDbName + 1);
2061 if( pNew==0 ){
2062 rc = SQLITE_NOMEM;
2063 }else{
2064 memset(pNew, 0, sizeof(*pNew));
2065 pNew->zDbName = (char*)&pNew[1];
2066 memcpy(pNew->zDbName, zDbName, nDbName+1);
2067 pNew->zTableName = sqlite3_mprintf("%s", zTableName);
2068 pNew->db = db;
2069 if( pNew->zTableName==0 ){
2070 rc = SQLITE_NOMEM;
2071 }else{
2072 sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS);
2073 rc = sqlite3_declare_vtab(db,
2074 "CREATE TABLE x(word,rank,distance,langid, "
2075 "score, matchlen, phonehash HIDDEN, "
2076 "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, "
2077 "soundslike HIDDEN, command HIDDEN)"
2078 );
2079#define SPELLFIX_COL_WORD 0
2080#define SPELLFIX_COL_RANK 1
2081#define SPELLFIX_COL_DISTANCE 2
2082#define SPELLFIX_COL_LANGID 3
2083#define SPELLFIX_COL_SCORE 4
2084#define SPELLFIX_COL_MATCHLEN 5
2085#define SPELLFIX_COL_PHONEHASH 6
2086#define SPELLFIX_COL_TOP 7
2087#define SPELLFIX_COL_SCOPE 8
2088#define SPELLFIX_COL_SRCHCNT 9
2089#define SPELLFIX_COL_SOUNDSLIKE 10
2090#define SPELLFIX_COL_COMMAND 11
2091 }
2092 if( rc==SQLITE_OK && isCreate ){
2093 spellfix1DbExec(&rc, db,
2094 "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n"
2095 " id INTEGER PRIMARY KEY,\n"
2096 " rank INT,\n"
2097 " langid INT,\n"
2098 " word TEXT,\n"
2099 " k1 TEXT,\n"
2100 " k2 TEXT\n"
2101 ");\n",
2102 zDbName, zTableName
2103 );
2104 spellfix1DbExec(&rc, db,
2105 "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_vocab_index_langid_k2\" "
2106 "ON \"%w_vocab\"(langid,k2);",
2107 zDbName, zTableName, zTableName
2108 );
2109 }
2110 for(i=3; rc==SQLITE_OK && i<argc; i++){
2111 if( strncmp(argv[i],"edit_cost_table=",16)==0 && pNew->zCostTable==0 ){
2112 pNew->zCostTable = spellfix1Dequote(&argv[i][16]);
2113 if( pNew->zCostTable==0 ) rc = SQLITE_NOMEM;
2114 continue;
2115 }
2116 *pzErr = sqlite3_mprintf("bad argument to spellfix1(): \"%s\"", argv[i]);
2117 rc = SQLITE_ERROR;
2118 }
2119 }
2120
2121 if( rc && pNew ){
2122 *ppVTab = 0;
2123 spellfix1Uninit(0, &pNew->base);
2124 }else{
2125 *ppVTab = (sqlite3_vtab *)pNew;
2126 }
2127 return rc;
2128}
2129
2130/*
2131** The xConnect and xCreate methods
2132*/
2133static int spellfix1Connect(
2134 sqlite3 *db,
2135 void *pAux,
2136 int argc, const char *const*argv,
2137 sqlite3_vtab **ppVTab,
2138 char **pzErr
2139){
2140 return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr);
2141}
2142static int spellfix1Create(
2143 sqlite3 *db,
2144 void *pAux,
2145 int argc, const char *const*argv,
2146 sqlite3_vtab **ppVTab,
2147 char **pzErr
2148){
2149 return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr);
2150}
2151
2152/*
2153** Clear all of the content from a cursor.
2154*/
2155static void spellfix1ResetCursor(spellfix1_cursor *pCur){
2156 int i;
2157 for(i=0; i<pCur->nRow; i++){
2158 sqlite3_free(pCur->a[i].zWord);
2159 }
2160 pCur->nRow = 0;
2161 pCur->iRow = 0;
2162 pCur->nSearch = 0;
2163 if( pCur->pFullScan ){
2164 sqlite3_finalize(pCur->pFullScan);
2165 pCur->pFullScan = 0;
2166 }
2167}
2168
2169/*
2170** Resize the cursor to hold up to N rows of content
2171*/
2172static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){
2173 struct spellfix1_row *aNew;
2174 assert( N>=pCur->nRow );
2175 aNew = sqlite3_realloc64(pCur->a, sizeof(pCur->a[0])*N);
2176 if( aNew==0 && N>0 ){
2177 spellfix1ResetCursor(pCur);
2178 sqlite3_free(pCur->a);
2179 pCur->nAlloc = 0;
2180 pCur->a = 0;
2181 }else{
2182 pCur->nAlloc = N;
2183 pCur->a = aNew;
2184 }
2185}
2186
2187
2188/*
2189** Close a fuzzy-search cursor.
2190*/
2191static int spellfix1Close(sqlite3_vtab_cursor *cur){
2192 spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2193 spellfix1ResetCursor(pCur);
2194 spellfix1ResizeCursor(pCur, 0);
2195 sqlite3_free(pCur->zPattern);
2196 sqlite3_free(pCur);
2197 return SQLITE_OK;
2198}
2199
2200#define SPELLFIX_IDXNUM_MATCH 0x01 /* word MATCH $str */
2201#define SPELLFIX_IDXNUM_LANGID 0x02 /* langid == $langid */
2202#define SPELLFIX_IDXNUM_TOP 0x04 /* top = $top */
2203#define SPELLFIX_IDXNUM_SCOPE 0x08 /* scope = $scope */
2204#define SPELLFIX_IDXNUM_DISTLT 0x10 /* distance < $distance */
2205#define SPELLFIX_IDXNUM_DISTLE 0x20 /* distance <= $distance */
2206#define SPELLFIX_IDXNUM_ROWID 0x40 /* rowid = $rowid */
2207#define SPELLFIX_IDXNUM_DIST (0x10|0x20) /* DISTLT and DISTLE */
2208
2209/*
2210**
2211** The plan number is a bitmask of the SPELLFIX_IDXNUM_* values defined
2212** above.
2213**
2214** filter.argv[*] values contains $str, $langid, $top, $scope and $rowid
2215** if specified and in that order.
2216*/
2217static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
2218 int iPlan = 0;
2219 int iLangTerm = -1;
2220 int iTopTerm = -1;
2221 int iScopeTerm = -1;
2222 int iDistTerm = -1;
2223 int iRowidTerm = -1;
2224 int i;
2225 const struct sqlite3_index_constraint *pConstraint;
2226 pConstraint = pIdxInfo->aConstraint;
2227 for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
2228 if( pConstraint->usable==0 ) continue;
2229
2230 /* Terms of the form: word MATCH $str */
2231 if( (iPlan & SPELLFIX_IDXNUM_MATCH)==0
2232 && pConstraint->iColumn==SPELLFIX_COL_WORD
2233 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
2234 ){
2235 iPlan |= SPELLFIX_IDXNUM_MATCH;
2236 pIdxInfo->aConstraintUsage[i].argvIndex = 1;
2237 pIdxInfo->aConstraintUsage[i].omit = 1;
2238 }
2239
2240 /* Terms of the form: langid = $langid */
2241 if( (iPlan & SPELLFIX_IDXNUM_LANGID)==0
2242 && pConstraint->iColumn==SPELLFIX_COL_LANGID
2243 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2244 ){
2245 iPlan |= SPELLFIX_IDXNUM_LANGID;
2246 iLangTerm = i;
2247 }
2248
2249 /* Terms of the form: top = $top */
2250 if( (iPlan & SPELLFIX_IDXNUM_TOP)==0
2251 && pConstraint->iColumn==SPELLFIX_COL_TOP
2252 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2253 ){
2254 iPlan |= SPELLFIX_IDXNUM_TOP;
2255 iTopTerm = i;
2256 }
2257
2258 /* Terms of the form: scope = $scope */
2259 if( (iPlan & SPELLFIX_IDXNUM_SCOPE)==0
2260 && pConstraint->iColumn==SPELLFIX_COL_SCOPE
2261 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2262 ){
2263 iPlan |= SPELLFIX_IDXNUM_SCOPE;
2264 iScopeTerm = i;
2265 }
2266
2267 /* Terms of the form: distance < $dist or distance <= $dist */
2268 if( (iPlan & SPELLFIX_IDXNUM_DIST)==0
2269 && pConstraint->iColumn==SPELLFIX_COL_DISTANCE
2270 && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
2271 || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
2272 ){
2273 if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ){
2274 iPlan |= SPELLFIX_IDXNUM_DISTLT;
2275 }else{
2276 iPlan |= SPELLFIX_IDXNUM_DISTLE;
2277 }
2278 iDistTerm = i;
2279 }
2280
2281 /* Terms of the form: distance < $dist or distance <= $dist */
2282 if( (iPlan & SPELLFIX_IDXNUM_ROWID)==0
2283 && pConstraint->iColumn<0
2284 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2285 ){
2286 iPlan |= SPELLFIX_IDXNUM_ROWID;
2287 iRowidTerm = i;
2288 }
2289 }
2290 if( iPlan&SPELLFIX_IDXNUM_MATCH ){
2291 int idx = 2;
2292 pIdxInfo->idxNum = iPlan;
2293 if( pIdxInfo->nOrderBy==1
2294 && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE
2295 && pIdxInfo->aOrderBy[0].desc==0
2296 ){
2297 pIdxInfo->orderByConsumed = 1; /* Default order by iScore */
2298 }
2299 if( iPlan&SPELLFIX_IDXNUM_LANGID ){
2300 pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++;
2301 pIdxInfo->aConstraintUsage[iLangTerm].omit = 1;
2302 }
2303 if( iPlan&SPELLFIX_IDXNUM_TOP ){
2304 pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++;
2305 pIdxInfo->aConstraintUsage[iTopTerm].omit = 1;
2306 }
2307 if( iPlan&SPELLFIX_IDXNUM_SCOPE ){
2308 pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++;
2309 pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1;
2310 }
2311 if( iPlan&SPELLFIX_IDXNUM_DIST ){
2312 pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++;
2313 pIdxInfo->aConstraintUsage[iDistTerm].omit = 1;
2314 }
2315 pIdxInfo->estimatedCost = 1e5;
2316 }else if( (iPlan & SPELLFIX_IDXNUM_ROWID) ){
2317 pIdxInfo->idxNum = SPELLFIX_IDXNUM_ROWID;
2318 pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1;
2319 pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1;
2320 pIdxInfo->estimatedCost = 5;
2321 }else{
2322 pIdxInfo->idxNum = 0;
2323 pIdxInfo->estimatedCost = 1e50;
2324 }
2325 return SQLITE_OK;
2326}
2327
2328/*
2329** Open a new fuzzy-search cursor.
2330*/
2331static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
2332 spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2333 spellfix1_cursor *pCur;
2334 pCur = sqlite3_malloc64( sizeof(*pCur) );
2335 if( pCur==0 ) return SQLITE_NOMEM;
2336 memset(pCur, 0, sizeof(*pCur));
2337 pCur->pVTab = p;
2338 *ppCursor = &pCur->base;
2339 return SQLITE_OK;
2340}
2341
2342/*
2343** Adjust a distance measurement by the words rank in order to show
2344** preference to common words.
2345*/
2346static int spellfix1Score(int iDistance, int iRank){
2347 int iLog2;
2348 for(iLog2=0; iRank>0; iLog2++, iRank>>=1){}
2349 return iDistance + 32 - iLog2;
2350}
2351
2352/*
2353** Compare two spellfix1_row objects for sorting purposes in qsort() such
2354** that they sort in order of increasing distance.
2355*/
2356static int SQLITE_CDECL spellfix1RowCompare(const void *A, const void *B){
2357 const struct spellfix1_row *a = (const struct spellfix1_row*)A;
2358 const struct spellfix1_row *b = (const struct spellfix1_row*)B;
2359 return a->iScore - b->iScore;
2360}
2361
2362/*
2363** A structure used to pass information from spellfix1FilterForMatch()
2364** into spellfix1RunQuery().
2365*/
2366typedef struct MatchQuery {
2367 spellfix1_cursor *pCur; /* The cursor being queried */
2368 sqlite3_stmt *pStmt; /* shadow table query statment */
2369 char zHash[SPELLFIX_MX_HASH]; /* The current phonehash for zPattern */
2370 const char *zPattern; /* Transliterated input string */
2371 int nPattern; /* Length of zPattern */
2372 EditDist3FromString *pMatchStr3; /* Original unicode string */
2373 EditDist3Config *pConfig3; /* Edit-distance cost coefficients */
2374 const EditDist3Lang *pLang; /* The selected language coefficients */
2375 int iLang; /* The language id */
2376 int iScope; /* Default scope */
2377 int iMaxDist; /* Maximum allowed edit distance, or -1 */
2378 int rc; /* Error code */
2379 int nRun; /* Number of prior runs for the same zPattern */
2380 char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH]; /* Prior hashes */
2381} MatchQuery;
2382
2383/*
2384** Run a query looking for the best matches against zPattern using
2385** zHash as the character class seed hash.
2386*/
2387static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery){
2388 const char *zK1;
2389 const char *zWord;
2390 int iDist;
2391 int iRank;
2392 int iScore;
2393 int iWorst = 0;
2394 int idx;
2395 int idxWorst = -1;
2396 int i;
2397 int iScope = p->iScope;
2398 spellfix1_cursor *pCur = p->pCur;
2399 sqlite3_stmt *pStmt = p->pStmt;
2400 char zHash1[SPELLFIX_MX_HASH];
2401 char zHash2[SPELLFIX_MX_HASH];
2402 char *zClass;
2403 int nClass;
2404 int rc;
2405
2406 if( pCur->a==0 || p->rc ) return; /* Prior memory allocation failure */
2407 zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery);
2408 if( zClass==0 ){
2409 p->rc = SQLITE_NOMEM;
2410 return;
2411 }
2412 nClass = (int)strlen(zClass);
2413 if( nClass>SPELLFIX_MX_HASH-2 ){
2414 nClass = SPELLFIX_MX_HASH-2;
2415 zClass[nClass] = 0;
2416 }
2417 if( nClass<=iScope ){
2418 if( nClass>2 ){
2419 iScope = nClass-1;
2420 }else{
2421 iScope = nClass;
2422 }
2423 }
2424 memcpy(zHash1, zClass, iScope);
2425 sqlite3_free(zClass);
2426 zHash1[iScope] = 0;
2427 memcpy(zHash2, zHash1, iScope);
2428 zHash2[iScope] = 'Z';
2429 zHash2[iScope+1] = 0;
2430#if SPELLFIX_MX_RUN>1
2431 for(i=0; i<p->nRun; i++){
2432 if( strcmp(p->azPrior[i], zHash1)==0 ) return;
2433 }
2434#endif
2435 assert( p->nRun<SPELLFIX_MX_RUN );
2436 memcpy(p->azPrior[p->nRun++], zHash1, iScope+1);
2437 if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM
2438 || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM
2439 ){
2440 p->rc = SQLITE_NOMEM;
2441 return;
2442 }
2443#if SPELLFIX_MX_RUN>1
2444 for(i=0; i<pCur->nRow; i++){
2445 if( pCur->a[i].iScore>iWorst ){
2446 iWorst = pCur->a[i].iScore;
2447 idxWorst = i;
2448 }
2449 }
2450#endif
2451 while( sqlite3_step(pStmt)==SQLITE_ROW ){
2452 int iMatchlen = -1;
2453 iRank = sqlite3_column_int(pStmt, 2);
2454 if( p->pMatchStr3 ){
2455 int nWord = sqlite3_column_bytes(pStmt, 1);
2456 zWord = (const char*)sqlite3_column_text(pStmt, 1);
2457 iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen);
2458 }else{
2459 zK1 = (const char*)sqlite3_column_text(pStmt, 3);
2460 if( zK1==0 ) continue;
2461 iDist = editdist1(p->zPattern, zK1, 0);
2462 }
2463 if( iDist<0 ){
2464 p->rc = SQLITE_NOMEM;
2465 break;
2466 }
2467 pCur->nSearch++;
2468
2469 /* If there is a "distance < $dist" or "distance <= $dist" constraint,
2470 ** check if this row meets it. If not, jump back up to the top of the
2471 ** loop to process the next row. Otherwise, if the row does match the
2472 ** distance constraint, check if the pCur->a[] array is already full.
2473 ** If it is and no explicit "top = ?" constraint was present in the
2474 ** query, grow the array to ensure there is room for the new entry. */
2475 assert( (p->iMaxDist>=0)==((pCur->idxNum & SPELLFIX_IDXNUM_DIST) ? 1 : 0) );
2476 if( p->iMaxDist>=0 ){
2477 if( iDist>p->iMaxDist ) continue;
2478 if( pCur->nRow>=pCur->nAlloc && (pCur->idxNum & SPELLFIX_IDXNUM_TOP)==0 ){
2479 spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10);
2480 if( pCur->a==0 ) break;
2481 }
2482 }
2483
2484 iScore = spellfix1Score(iDist,iRank);
2485 if( pCur->nRow<pCur->nAlloc ){
2486 idx = pCur->nRow;
2487 }else if( iScore<iWorst ){
2488 idx = idxWorst;
2489 sqlite3_free(pCur->a[idx].zWord);
2490 }else{
2491 continue;
2492 }
2493
2494 pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1));
2495 if( pCur->a[idx].zWord==0 ){
2496 p->rc = SQLITE_NOMEM;
2497 break;
2498 }
2499 pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0);
2500 pCur->a[idx].iRank = iRank;
2501 pCur->a[idx].iDistance = iDist;
2502 pCur->a[idx].iScore = iScore;
2503 pCur->a[idx].iMatchlen = iMatchlen;
2504 memcpy(pCur->a[idx].zHash, zHash1, iScope+1);
2505 if( pCur->nRow<pCur->nAlloc ) pCur->nRow++;
2506 if( pCur->nRow==pCur->nAlloc ){
2507 iWorst = pCur->a[0].iScore;
2508 idxWorst = 0;
2509 for(i=1; i<pCur->nRow; i++){
2510 iScore = pCur->a[i].iScore;
2511 if( iWorst<iScore ){
2512 iWorst = iScore;
2513 idxWorst = i;
2514 }
2515 }
2516 }
2517 }
2518 rc = sqlite3_reset(pStmt);
2519 if( rc ) p->rc = rc;
2520}
2521
2522/*
2523** This version of the xFilter method work if the MATCH term is present
2524** and we are doing a scan.
2525*/
2526static int spellfix1FilterForMatch(
2527 spellfix1_cursor *pCur,
2528 int argc,
2529 sqlite3_value **argv
2530){
2531 int idxNum = pCur->idxNum;
2532 const unsigned char *zMatchThis; /* RHS of the MATCH operator */
2533 EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */
2534 char *zPattern; /* Transliteration of zMatchThis */
2535 int nPattern; /* Length of zPattern */
2536 int iLimit = 20; /* Max number of rows of output */
2537 int iScope = 3; /* Use this many characters of zClass */
2538 int iLang = 0; /* Language code */
2539 char *zSql; /* SQL of shadow table query */
2540 sqlite3_stmt *pStmt = 0; /* Shadow table query */
2541 int rc; /* Result code */
2542 int idx = 1; /* Next available filter parameter */
2543 spellfix1_vtab *p = pCur->pVTab; /* The virtual table that owns pCur */
2544 MatchQuery x; /* For passing info to RunQuery() */
2545
2546 /* Load the cost table if we have not already done so */
2547 if( p->zCostTable!=0 && p->pConfig3==0 ){
2548 p->pConfig3 = sqlite3_malloc64( sizeof(p->pConfig3[0]) );
2549 if( p->pConfig3==0 ) return SQLITE_NOMEM;
2550 memset(p->pConfig3, 0, sizeof(p->pConfig3[0]));
2551 rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable);
2552 if( rc ) return rc;
2553 }
2554 memset(&x, 0, sizeof(x));
2555 x.iScope = 3; /* Default scope if none specified by "WHERE scope=N" */
2556 x.iMaxDist = -1; /* Maximum allowed edit distance */
2557
2558 if( idxNum&2 ){
2559 iLang = sqlite3_value_int(argv[idx++]);
2560 }
2561 if( idxNum&4 ){
2562 iLimit = sqlite3_value_int(argv[idx++]);
2563 if( iLimit<1 ) iLimit = 1;
2564 }
2565 if( idxNum&8 ){
2566 x.iScope = sqlite3_value_int(argv[idx++]);
2567 if( x.iScope<1 ) x.iScope = 1;
2568 if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2;
2569 }
2570 if( idxNum&(16|32) ){
2571 x.iMaxDist = sqlite3_value_int(argv[idx++]);
2572 if( idxNum&16 ) x.iMaxDist--;
2573 if( x.iMaxDist<0 ) x.iMaxDist = 0;
2574 }
2575 spellfix1ResetCursor(pCur);
2576 spellfix1ResizeCursor(pCur, iLimit);
2577 zMatchThis = sqlite3_value_text(argv[0]);
2578 if( zMatchThis==0 ) return SQLITE_OK;
2579 if( p->pConfig3 ){
2580 x.pLang = editDist3FindLang(p->pConfig3, iLang);
2581 pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1);
2582 if( pMatchStr3==0 ){
2583 x.rc = SQLITE_NOMEM;
2584 goto filter_exit;
2585 }
2586 }else{
2587 x.pLang = 0;
2588 }
2589 zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0]));
2590 sqlite3_free(pCur->zPattern);
2591 pCur->zPattern = zPattern;
2592 if( zPattern==0 ){
2593 x.rc = SQLITE_NOMEM;
2594 goto filter_exit;
2595 }
2596 nPattern = (int)strlen(zPattern);
2597 if( zPattern[nPattern-1]=='*' ) nPattern--;
2598 zSql = sqlite3_mprintf(
2599 "SELECT id, word, rank, coalesce(k1,word)"
2600 " FROM \"%w\".\"%w_vocab\""
2601 " WHERE langid=%d AND k2>=?1 AND k2<?2",
2602 p->zDbName, p->zTableName, iLang
2603 );
2604 if( zSql==0 ){
2605 x.rc = SQLITE_NOMEM;
2606 pStmt = 0;
2607 goto filter_exit;
2608 }
2609 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
2610 sqlite3_free(zSql);
2611 pCur->iLang = iLang;
2612 x.pCur = pCur;
2613 x.pStmt = pStmt;
2614 x.zPattern = zPattern;
2615 x.nPattern = nPattern;
2616 x.pMatchStr3 = pMatchStr3;
2617 x.iLang = iLang;
2618 x.rc = rc;
2619 x.pConfig3 = p->pConfig3;
2620 if( x.rc==SQLITE_OK ){
2621 spellfix1RunQuery(&x, zPattern, nPattern);
2622 }
2623
2624 if( pCur->a ){
2625 qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare);
2626 pCur->iTop = iLimit;
2627 pCur->iScope = iScope;
2628 }else{
2629 x.rc = SQLITE_NOMEM;
2630 }
2631
2632filter_exit:
2633 sqlite3_finalize(pStmt);
2634 editDist3FromStringDelete(pMatchStr3);
2635 return x.rc;
2636}
2637
2638/*
2639** This version of xFilter handles a full-table scan case
2640*/
2641static int spellfix1FilterForFullScan(
2642 spellfix1_cursor *pCur,
2643 int argc,
2644 sqlite3_value **argv
2645){
2646 int rc = SQLITE_OK;
2647 int idxNum = pCur->idxNum;
2648 char *zSql;
2649 spellfix1_vtab *pVTab = pCur->pVTab;
2650 spellfix1ResetCursor(pCur);
2651 assert( idxNum==0 || idxNum==64 );
2652 zSql = sqlite3_mprintf(
2653 "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"%s",
2654 pVTab->zDbName, pVTab->zTableName,
2655 ((idxNum & 64) ? " WHERE rowid=?" : "")
2656 );
2657 if( zSql==0 ) return SQLITE_NOMEM;
2658 rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0);
2659 sqlite3_free(zSql);
2660 if( rc==SQLITE_OK && (idxNum & 64) ){
2661 assert( argc==1 );
2662 rc = sqlite3_bind_value(pCur->pFullScan, 1, argv[0]);
2663 }
2664 pCur->nRow = pCur->iRow = 0;
2665 if( rc==SQLITE_OK ){
2666 rc = sqlite3_step(pCur->pFullScan);
2667 if( rc==SQLITE_ROW ){ pCur->iRow = -1; rc = SQLITE_OK; }
2668 if( rc==SQLITE_DONE ){ rc = SQLITE_OK; }
2669 }else{
2670 pCur->iRow = 0;
2671 }
2672 return rc;
2673}
2674
2675
2676/*
2677** Called to "rewind" a cursor back to the beginning so that
2678** it starts its output over again. Always called at least once
2679** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call.
2680*/
2681static int spellfix1Filter(
2682 sqlite3_vtab_cursor *cur,
2683 int idxNum, const char *idxStr,
2684 int argc, sqlite3_value **argv
2685){
2686 spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2687 int rc;
2688 pCur->idxNum = idxNum;
2689 if( idxNum & 1 ){
2690 rc = spellfix1FilterForMatch(pCur, argc, argv);
2691 }else{
2692 rc = spellfix1FilterForFullScan(pCur, argc, argv);
2693 }
2694 return rc;
2695}
2696
2697
2698/*
2699** Advance a cursor to its next row of output
2700*/
2701static int spellfix1Next(sqlite3_vtab_cursor *cur){
2702 spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2703 int rc = SQLITE_OK;
2704 if( pCur->iRow < pCur->nRow ){
2705 if( pCur->pFullScan ){
2706 rc = sqlite3_step(pCur->pFullScan);
2707 if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow;
2708 if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK;
2709 }else{
2710 pCur->iRow++;
2711 }
2712 }
2713 return rc;
2714}
2715
2716/*
2717** Return TRUE if we are at the end-of-file
2718*/
2719static int spellfix1Eof(sqlite3_vtab_cursor *cur){
2720 spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2721 return pCur->iRow>=pCur->nRow;
2722}
2723
2724/*
2725** Return columns from the current row.
2726*/
2727static int spellfix1Column(
2728 sqlite3_vtab_cursor *cur,
2729 sqlite3_context *ctx,
2730 int i
2731){
2732 spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2733 if( pCur->pFullScan ){
2734 if( i<=SPELLFIX_COL_LANGID ){
2735 sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i));
2736 }else{
2737 sqlite3_result_null(ctx);
2738 }
2739 return SQLITE_OK;
2740 }
2741 switch( i ){
2742 case SPELLFIX_COL_WORD: {
2743 sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC);
2744 break;
2745 }
2746 case SPELLFIX_COL_RANK: {
2747 sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank);
2748 break;
2749 }
2750 case SPELLFIX_COL_DISTANCE: {
2751 sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance);
2752 break;
2753 }
2754 case SPELLFIX_COL_LANGID: {
2755 sqlite3_result_int(ctx, pCur->iLang);
2756 break;
2757 }
2758 case SPELLFIX_COL_SCORE: {
2759 sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore);
2760 break;
2761 }
2762 case SPELLFIX_COL_MATCHLEN: {
2763 int iMatchlen = pCur->a[pCur->iRow].iMatchlen;
2764 if( iMatchlen<0 ){
2765 int nPattern = (int)strlen(pCur->zPattern);
2766 char *zWord = pCur->a[pCur->iRow].zWord;
2767 int nWord = (int)strlen(zWord);
2768
2769 if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){
2770 char *zTranslit;
2771 int res;
2772 zTranslit = (char *)transliterate((unsigned char *)zWord, nWord);
2773 if( !zTranslit ) return SQLITE_NOMEM;
2774 res = editdist1(pCur->zPattern, zTranslit, &iMatchlen);
2775 sqlite3_free(zTranslit);
2776 if( res<0 ) return SQLITE_NOMEM;
2777 iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen);
2778 }else{
2779 iMatchlen = utf8Charlen(zWord, nWord);
2780 }
2781 }
2782
2783 sqlite3_result_int(ctx, iMatchlen);
2784 break;
2785 }
2786 case SPELLFIX_COL_PHONEHASH: {
2787 sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC);
2788 break;
2789 }
2790 case SPELLFIX_COL_TOP: {
2791 sqlite3_result_int(ctx, pCur->iTop);
2792 break;
2793 }
2794 case SPELLFIX_COL_SCOPE: {
2795 sqlite3_result_int(ctx, pCur->iScope);
2796 break;
2797 }
2798 case SPELLFIX_COL_SRCHCNT: {
2799 sqlite3_result_int(ctx, pCur->nSearch);
2800 break;
2801 }
2802 default: {
2803 sqlite3_result_null(ctx);
2804 break;
2805 }
2806 }
2807 return SQLITE_OK;
2808}
2809
2810/*
2811** The rowid.
2812*/
2813static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
2814 spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2815 if( pCur->pFullScan ){
2816 *pRowid = sqlite3_column_int64(pCur->pFullScan, 4);
2817 }else{
2818 *pRowid = pCur->a[pCur->iRow].iRowid;
2819 }
2820 return SQLITE_OK;
2821}
2822
2823/*
2824** This function is called by the xUpdate() method. It returns a string
2825** containing the conflict mode that xUpdate() should use for the current
2826** operation. One of: "ROLLBACK", "IGNORE", "ABORT" or "REPLACE".
2827*/
2828static const char *spellfix1GetConflict(sqlite3 *db){
2829 static const char *azConflict[] = {
2830 /* Note: Instead of "FAIL" - "ABORT". */
2831 "ROLLBACK", "IGNORE", "ABORT", "ABORT", "REPLACE"
2832 };
2833 int eConflict = sqlite3_vtab_on_conflict(db);
2834
2835 assert( eConflict==SQLITE_ROLLBACK || eConflict==SQLITE_IGNORE
2836 || eConflict==SQLITE_FAIL || eConflict==SQLITE_ABORT
2837 || eConflict==SQLITE_REPLACE
2838 );
2839 assert( SQLITE_ROLLBACK==1 );
2840 assert( SQLITE_IGNORE==2 );
2841 assert( SQLITE_FAIL==3 );
2842 assert( SQLITE_ABORT==4 );
2843 assert( SQLITE_REPLACE==5 );
2844
2845 return azConflict[eConflict-1];
2846}
2847
2848/*
2849** The xUpdate() method.
2850*/
2851static int spellfix1Update(
2852 sqlite3_vtab *pVTab,
2853 int argc,
2854 sqlite3_value **argv,
2855 sqlite_int64 *pRowid
2856){
2857 int rc = SQLITE_OK;
2858 sqlite3_int64 rowid, newRowid;
2859 spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2860 sqlite3 *db = p->db;
2861
2862 if( argc==1 ){
2863 /* A delete operation on the rowid given by argv[0] */
2864 rowid = *pRowid = sqlite3_value_int64(argv[0]);
2865 spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" "
2866 " WHERE id=%lld",
2867 p->zDbName, p->zTableName, rowid);
2868 }else{
2869 const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]);
2870 int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]);
2871 int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]);
2872 int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]);
2873 const unsigned char *zSoundslike =
2874 sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2875 int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2876 char *zK1, *zK2;
2877 int i;
2878 char c;
2879 const char *zConflict = spellfix1GetConflict(db);
2880
2881 if( zWord==0 ){
2882 /* Inserts of the form: INSERT INTO table(command) VALUES('xyzzy');
2883 ** cause zWord to be NULL, so we look at the "command" column to see
2884 ** what special actions to take */
2885 const char *zCmd =
2886 (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]);
2887 if( zCmd==0 ){
2888 pVTab->zErrMsg = sqlite3_mprintf("NOT NULL constraint failed: %s.word",
2889 p->zTableName);
2890 return SQLITE_CONSTRAINT_NOTNULL;
2891 }
2892 if( strcmp(zCmd,"reset")==0 ){
2893 /* Reset the edit cost table (if there is one). */
2894 editDist3ConfigDelete(p->pConfig3);
2895 p->pConfig3 = 0;
2896 return SQLITE_OK;
2897 }
2898 if( strncmp(zCmd,"edit_cost_table=",16)==0 ){
2899 editDist3ConfigDelete(p->pConfig3);
2900 p->pConfig3 = 0;
2901 sqlite3_free(p->zCostTable);
2902 p->zCostTable = spellfix1Dequote(zCmd+16);
2903 if( p->zCostTable==0 ) return SQLITE_NOMEM;
2904 if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ){
2905 sqlite3_free(p->zCostTable);
2906 p->zCostTable = 0;
2907 }
2908 return SQLITE_OK;
2909 }
2910 pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"",
2911 p->zTableName, zCmd);
2912 return SQLITE_ERROR;
2913 }
2914 if( iRank<1 ) iRank = 1;
2915 if( zSoundslike ){
2916 zK1 = (char*)transliterate(zSoundslike, nSoundslike);
2917 }else{
2918 zK1 = (char*)transliterate(zWord, nWord);
2919 }
2920 if( zK1==0 ) return SQLITE_NOMEM;
2921 for(i=0; (c = zK1[i])!=0; i++){
2922 if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A';
2923 }
2924 zK2 = (char*)phoneticHash((const unsigned char*)zK1, i);
2925 if( zK2==0 ){
2926 sqlite3_free(zK1);
2927 return SQLITE_NOMEM;
2928 }
2929 if( sqlite3_value_type(argv[0])==SQLITE_NULL ){
2930 if( sqlite3_value_type(argv[1])==SQLITE_NULL ){
2931 spellfix1DbExec(&rc, db,
2932 "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) "
2933 "VALUES(%d,%d,%Q,nullif(%Q,%Q),%Q)",
2934 p->zDbName, p->zTableName,
2935 iRank, iLang, zWord, zK1, zWord, zK2
2936 );
2937 }else{
2938 newRowid = sqlite3_value_int64(argv[1]);
2939 spellfix1DbExec(&rc, db,
2940 "INSERT OR %s INTO \"%w\".\"%w_vocab\"(id,rank,langid,word,k1,k2) "
2941 "VALUES(%lld,%d,%d,%Q,nullif(%Q,%Q),%Q)",
2942 zConflict, p->zDbName, p->zTableName,
2943 newRowid, iRank, iLang, zWord, zK1, zWord, zK2
2944 );
2945 }
2946 *pRowid = sqlite3_last_insert_rowid(db);
2947 }else{
2948 rowid = sqlite3_value_int64(argv[0]);
2949 newRowid = *pRowid = sqlite3_value_int64(argv[1]);
2950 spellfix1DbExec(&rc, db,
2951 "UPDATE OR %s \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d,"
2952 " word=%Q, k1=nullif(%Q,%Q), k2=%Q WHERE id=%lld",
2953 zConflict, p->zDbName, p->zTableName, newRowid, iRank, iLang,
2954 zWord, zK1, zWord, zK2, rowid
2955 );
2956 }
2957 sqlite3_free(zK1);
2958 sqlite3_free(zK2);
2959 }
2960 return rc;
2961}
2962
2963/*
2964** Rename the spellfix1 table.
2965*/
2966static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew){
2967 spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2968 sqlite3 *db = p->db;
2969 int rc = SQLITE_OK;
2970 char *zNewName = sqlite3_mprintf("%s", zNew);
2971 if( zNewName==0 ){
2972 return SQLITE_NOMEM;
2973 }
2974 spellfix1DbExec(&rc, db,
2975 "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"",
2976 p->zDbName, p->zTableName, zNewName
2977 );
2978 if( rc==SQLITE_OK ){
2979 sqlite3_free(p->zTableName);
2980 p->zTableName = zNewName;
2981 }else{
2982 sqlite3_free(zNewName);
2983 }
2984 return rc;
2985}
2986
2987
2988/*
2989** A virtual table module that provides fuzzy search.
2990*/
2991static sqlite3_module spellfix1Module = {
2992 0, /* iVersion */
2993 spellfix1Create, /* xCreate - handle CREATE VIRTUAL TABLE */
2994 spellfix1Connect, /* xConnect - reconnected to an existing table */
2995 spellfix1BestIndex, /* xBestIndex - figure out how to do a query */
2996 spellfix1Disconnect, /* xDisconnect - close a connection */
2997 spellfix1Destroy, /* xDestroy - handle DROP TABLE */
2998 spellfix1Open, /* xOpen - open a cursor */
2999 spellfix1Close, /* xClose - close a cursor */
3000 spellfix1Filter, /* xFilter - configure scan constraints */
3001 spellfix1Next, /* xNext - advance a cursor */
3002 spellfix1Eof, /* xEof - check for end of scan */
3003 spellfix1Column, /* xColumn - read data */
3004 spellfix1Rowid, /* xRowid - read data */
3005 spellfix1Update, /* xUpdate */
3006 0, /* xBegin */
3007 0, /* xSync */
3008 0, /* xCommit */
3009 0, /* xRollback */
3010 0, /* xFindMethod */
3011 spellfix1Rename, /* xRename */
3012 0, /* xSavepoint */
3013 0, /* xRelease */
3014 0, /* xRollbackTo */
3015 0, /* xShadowName */
3016 0 /* xIntegrity */
3017};
3018
3019/*
3020** Register the various functions and the virtual table.
3021*/
3022static int spellfix1Register(sqlite3 *db){
3023 int rc = SQLITE_OK;
3024 int i;
3025 rc = sqlite3_create_function(db, "spellfix1_translit", 1,
3026 SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3027 transliterateSqlFunc, 0, 0);
3028 if( rc==SQLITE_OK ){
3029 rc = sqlite3_create_function(db, "spellfix1_editdist", 2,
3030 SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3031 editdistSqlFunc, 0, 0);
3032 }
3033 if( rc==SQLITE_OK ){
3034 rc = sqlite3_create_function(db, "spellfix1_phonehash", 1,
3035 SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3036 phoneticHashSqlFunc, 0, 0);
3037 }
3038 if( rc==SQLITE_OK ){
3039 rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1,
3040 SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3041 scriptCodeSqlFunc, 0, 0);
3042 }
3043 if( rc==SQLITE_OK ){
3044 rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0);
3045 }
3046 if( rc==SQLITE_OK ){
3047 rc = editDist3Install(db);
3048 }
3049
3050 /* Verify sanity of the translit[] table */
3051 for(i=0; i<sizeof(translit)/sizeof(translit[0])-1; i++){
3052 assert( translit[i].cFrom<translit[i+1].cFrom );
3053 }
3054
3055 return rc;
3056}
3057
3058#endif /* SQLITE_OMIT_VIRTUALTABLE */
3059
3060/*
3061** Extension load function.
3062*/
3063#ifdef _WIN32
3064__declspec(dllexport)
3065#endif
3066int sqlite3_spellfix_init(
3067 sqlite3 *db,
3068 char **pzErrMsg,
3069 const sqlite3_api_routines *pApi
3070){
3071 SQLITE_EXTENSION_INIT2(pApi);
3072#ifndef SQLITE_OMIT_VIRTUALTABLE
3073 return spellfix1Register(db);
3074#endif
3075 return SQLITE_OK;
3076}