dict @ f2200e22b05c3801b722cd46617e7bcf64538d17

   1/*
   2** 2012 April 10
   3**
   4** The author disclaims copyright to this source code.  In place of
   5** a legal notice, here is a blessing:
   6**
   7**    May you do good and not evil.
   8**    May you find forgiveness for yourself and forgive others.
   9**    May you share freely, never taking more than you give.
  10**
  11*************************************************************************
  12**
  13** This module implements the spellfix1 VIRTUAL TABLE that can be used
  14** to search a large vocabulary for close matches.  See separate
  15** documentation (http://www.sqlite.org/spellfix1.html) for details.
  16*/
  17#include "sqlite3ext.h"
  18SQLITE_EXTENSION_INIT1
  19
  20#ifndef SQLITE_AMALGAMATION
  21# if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
  22#  define NDEBUG 1
  23# endif
  24# if defined(NDEBUG) && defined(SQLITE_DEBUG)
  25#  undef NDEBUG
  26# endif
  27# include <string.h>
  28# include <stdio.h>
  29# include <stdlib.h>
  30# include <assert.h>
  31# define ALWAYS(X)  1
  32# define NEVER(X)   0
  33typedef unsigned char u8;
  34typedef unsigned short u16;
  35#endif
  36#include <ctype.h>
  37
  38#ifndef SQLITE_OMIT_VIRTUALTABLE
  39
  40/*
  41** Character classes for ASCII characters:
  42**
  43**   0   ''        Silent letters:   H W
  44**   1   'A'       Any vowel:   A E I O U (Y)
  45**   2   'B'       A bilabeal stop or fricative:  B F P V W
  46**   3   'C'       Other fricatives or back stops:  C G J K Q S X Z
  47**   4   'D'       Alveolar stops:  D T
  48**   5   'H'       Letter H at the beginning of a word
  49**   6   'L'       Glide:  L
  50**   7   'R'       Semivowel:  R
  51**   8   'M'       Nasals:  M N
  52**   9   'Y'       Letter Y at the beginning of a word.
  53**   10  '9'       Digits: 0 1 2 3 4 5 6 7 8 9
  54**   11  ' '       White space
  55**   12  '?'       Other.
  56*/
  57#define CCLASS_SILENT         0
  58#define CCLASS_VOWEL          1
  59#define CCLASS_B              2
  60#define CCLASS_C              3
  61#define CCLASS_D              4
  62#define CCLASS_H              5
  63#define CCLASS_L              6
  64#define CCLASS_R              7
  65#define CCLASS_M              8
  66#define CCLASS_Y              9
  67#define CCLASS_DIGIT         10
  68#define CCLASS_SPACE         11
  69#define CCLASS_OTHER         12
  70
  71/*
  72** The following table gives the character class for non-initial ASCII
  73** characters.
  74*/
  75static const unsigned char midClass[] = {
  76    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  77    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  78    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  79    /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  80    /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
  81    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  82    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  83    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  84    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  85    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  86    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
  87    /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
  88    /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
  89    /* ' */ CCLASS_SILENT,   /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
  90    /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
  91    /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
  92    /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
  93    /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
  94    /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
  95    /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
  96    /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
  97    /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
  98    /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
  99    /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
 100    /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
 101    /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
 102    /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
 103    /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
 104    /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
 105    /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_VOWEL,
 106    /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
 107    /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
 108    /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
 109    /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
 110    /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
 111    /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
 112    /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
 113    /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
 114    /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
 115    /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
 116    /* x */ CCLASS_C,        /* y */ CCLASS_VOWEL,   /* z */ CCLASS_C,
 117    /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
 118    /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
 119};
 120/*
 121** This tables gives the character class for ASCII characters that form the
 122** initial character of a word.  The only difference from midClass is with
 123** the letters H, W, and Y.
 124*/
 125static const unsigned char initClass[] = {
 126    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 127    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 128    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 129    /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 130    /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
 131    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 132    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 133    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 134    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 135    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 136    /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
 137    /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
 138    /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
 139    /* ' */ CCLASS_OTHER,    /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
 140    /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
 141    /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
 142    /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
 143    /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
 144    /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
 145    /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
 146    /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
 147    /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
 148    /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
 149    /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
 150    /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
 151    /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
 152    /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
 153    /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
 154    /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
 155    /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_Y,
 156    /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
 157    /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
 158    /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
 159    /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
 160    /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
 161    /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
 162    /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
 163    /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
 164    /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
 165    /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
 166    /* x */ CCLASS_C,        /* y */ CCLASS_Y,       /* z */ CCLASS_C,
 167    /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
 168    /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
 169};
 170
 171/*
 172** Mapping from the character class number (0-13) to a symbol for each
 173** character class.  Note that initClass[] can be used to map the class
 174** symbol back into the class number.
 175*/
 176static const unsigned char className[] = ".ABCDHLRMY9 ?";
 177
 178/*
 179** Generate a "phonetic hash" from a string of ASCII characters
 180** in zIn[0..nIn-1].
 181**
 182**   * Map characters by character class as defined above.
 183**   * Omit double-letters
 184**   * Omit vowels beside R and L
 185**   * Omit T when followed by CH
 186**   * Omit W when followed by R
 187**   * Omit D when followed by J or G
 188**   * Omit K in KN or G in GN at the beginning of a word
 189**
 190** Space to hold the result is obtained from sqlite3_malloc()
 191**
 192** Return NULL if memory allocation fails.
 193*/
 194static unsigned char *phoneticHash(const unsigned char *zIn, int nIn)
 195{
 196    unsigned char *zOut = sqlite3_malloc64( nIn + 1 );
 197    int i;
 198    int nOut = 0;
 199    char cPrev = 0x77;
 200    char cPrevX = 0x77;
 201    const unsigned char *aClass = initClass;
 202
 203    if( zOut==0 ) return 0;
 204    if( nIn>2 ) {
 205        switch( zIn[0] ) {
 206        case 'g':
 207        case 'k': {
 208            if( zIn[1]=='n' ) {
 209                zIn++;
 210                nIn--;
 211            }
 212            break;
 213        }
 214        }
 215    }
 216    for(i=0; i<nIn; i++) {
 217        unsigned char c = zIn[i];
 218        if( i+1<nIn ) {
 219            if( c=='w' && zIn[i+1]=='r' ) continue;
 220            if( c=='d' && (zIn[i+1]=='j' || zIn[i+1]=='g') ) continue;
 221            if( i+2<nIn ) {
 222                if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue;
 223            }
 224        }
 225        c = aClass[c&0x7f];
 226        if( c==CCLASS_SPACE ) continue;
 227        if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue;
 228        aClass = midClass;
 229        if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ) {
 230            continue; /* No vowels beside L or R */
 231        }
 232        if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ) {
 233            nOut--;   /* No vowels beside L or R */
 234        }
 235        cPrev = c;
 236        if( c==CCLASS_SILENT ) continue;
 237        cPrevX = c;
 238        c = className[c];
 239        assert( nOut>=0 );
 240        if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c;
 241    }
 242    zOut[nOut] = 0;
 243    return zOut;
 244}
 245
 246/*
 247** This is an SQL function wrapper around phoneticHash().  See
 248** the description of phoneticHash() for additional information.
 249*/
 250static void phoneticHashSqlFunc(
 251    sqlite3_context *context,
 252    int argc,
 253    sqlite3_value **argv
 254)
 255{
 256    const unsigned char *zIn;
 257    unsigned char *zOut;
 258
 259    zIn = sqlite3_value_text(argv[0]);
 260    if( zIn==0 ) return;
 261    zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0]));
 262    if( zOut==0 ) {
 263        sqlite3_result_error_nomem(context);
 264    } else {
 265        sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
 266    }
 267}
 268
 269/*
 270** Return the character class number for a character given its
 271** context.
 272*/
 273static char characterClass(char cPrev, char c)
 274{
 275    return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f];
 276}
 277
 278/*
 279** Return the cost of inserting or deleting character c immediately
 280** following character cPrev.  If cPrev==0, that means c is the first
 281** character of the word.
 282*/
 283static int insertOrDeleteCost(char cPrev, char c, char cNext)
 284{
 285    char classC = characterClass(cPrev, c);
 286    char classCprev;
 287
 288    if( classC==CCLASS_SILENT ) {
 289        /* Insert or delete "silent" characters such as H or W */
 290        return 1;
 291    }
 292    if( cPrev==c ) {
 293        /* Repeated characters, or miss a repeat */
 294        return 10;
 295    }
 296    if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ) {
 297        return 20;  /* Insert a vowel before or after 'r' */
 298    }
 299    classCprev = characterClass(cPrev, cPrev);
 300    if( classC==classCprev ) {
 301        if( classC==CCLASS_VOWEL ) {
 302            /* Remove or add a new vowel to a vowel cluster */
 303            return 15;
 304        } else {
 305            /* Remove or add a consonant not in the same class */
 306            return 50;
 307        }
 308    }
 309
 310    /* any other character insertion or deletion */
 311    return 100;
 312}
 313
 314/*
 315** Divide the insertion cost by this factor when appending to the
 316** end of the word.
 317*/
 318#define FINAL_INS_COST_DIV  4
 319
 320/*
 321** Return the cost of substituting cTo in place of cFrom assuming
 322** the previous character is cPrev.  If cPrev==0 then cTo is the first
 323** character of the word.
 324*/
 325static int substituteCost(char cPrev, char cFrom, char cTo)
 326{
 327    char classFrom, classTo;
 328    if( cFrom==cTo ) {
 329        /* Exact match */
 330        return 0;
 331    }
 332    if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ) {
 333        /* differ only in case */
 334        return 0;
 335    }
 336    classFrom = characterClass(cPrev, cFrom);
 337    classTo = characterClass(cPrev, cTo);
 338    if( classFrom==classTo ) {
 339        /* Same character class */
 340        return 40;
 341    }
 342    if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y
 343        && classTo>=CCLASS_B && classTo<=CCLASS_Y ) {
 344        /* Convert from one consonant to another, but in a different class */
 345        return 75;
 346    }
 347    /* Any other subsitution */
 348    return 100;
 349}
 350
 351/*
 352** Given two strings zA and zB which are pure ASCII, return the cost
 353** of transforming zA into zB.  If zA ends with '*' assume that it is
 354** a prefix of zB and give only minimal penalty for extra characters
 355** on the end of zB.
 356**
 357** Smaller numbers mean a closer match.
 358**
 359** Negative values indicate an error:
 360**    -1  One of the inputs is NULL
 361**    -2  Non-ASCII characters on input
 362**    -3  Unable to allocate memory
 363**
 364** If pnMatch is not NULL, then *pnMatch is set to the number of bytes
 365** of zB that matched the pattern in zA. If zA does not end with a '*',
 366** then this value is always the number of bytes in zB (i.e. strlen(zB)).
 367** If zA does end in a '*', then it is the number of bytes in the prefix
 368** of zB that was deemed to match zA.
 369*/
 370static int editdist1(const char *zA, const char *zB, int *pnMatch)
 371{
 372    int nA, nB;            /* Number of characters in zA[] and zB[] */
 373    int xA, xB;            /* Loop counters for zA[] and zB[] */
 374    char cA = 0, cB;       /* Current character of zA and zB */
 375    char cAprev, cBprev;   /* Previous character of zA and zB */
 376    char cAnext, cBnext;   /* Next character in zA and zB */
 377    int d;                 /* North-west cost value */
 378    int dc = 0;            /* North-west character value */
 379    int res;               /* Final result */
 380    int *m;                /* The cost matrix */
 381    char *cx;              /* Corresponding character values */
 382    int *toFree = 0;       /* Malloced space */
 383    int nMatch = 0;
 384    int mStack[60+15];     /* Stack space to use if not too much is needed */
 385
 386    /* Early out if either input is NULL */
 387    if( zA==0 || zB==0 ) return -1;
 388
 389    /* Skip any common prefix */
 390    while( zA[0] && zA[0]==zB[0] ) {
 391        dc = zA[0];
 392        zA++;
 393        zB++;
 394        nMatch++;
 395    }
 396    if( pnMatch ) *pnMatch = nMatch;
 397    if( zA[0]==0 && zB[0]==0 ) return 0;
 398
 399#if 0
 400    printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' ');
 401#endif
 402
 403    /* Verify input strings and measure their lengths */
 404    for(nA=0; zA[nA]; nA++) {
 405        if( zA[nA]&0x80 ) return -2;
 406    }
 407    for(nB=0; zB[nB]; nB++) {
 408        if( zB[nB]&0x80 ) return -2;
 409    }
 410
 411    /* Special processing if either string is empty */
 412    if( nA==0 ) {
 413        cBprev = (char)dc;
 414        for(xB=res=0; (cB = zB[xB])!=0; xB++) {
 415            res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV;
 416            cBprev = cB;
 417        }
 418        return res;
 419    }
 420    if( nB==0 ) {
 421        cAprev = (char)dc;
 422        for(xA=res=0; (cA = zA[xA])!=0; xA++) {
 423            res += insertOrDeleteCost(cAprev, cA, zA[xA+1]);
 424            cAprev = cA;
 425        }
 426        return res;
 427    }
 428
 429    /* A is a prefix of B */
 430    if( zA[0]=='*' && zA[1]==0 ) return 0;
 431
 432    /* Allocate and initialize the Wagner matrix */
 433    if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ) {
 434        m = mStack;
 435    } else {
 436        m = toFree = sqlite3_malloc64( (nB+1)*5*sizeof(m[0])/4 );
 437        if( m==0 ) return -3;
 438    }
 439    cx = (char*)&m[nB+1];
 440
 441    /* Compute the Wagner edit distance */
 442    m[0] = 0;
 443    cx[0] = (char)dc;
 444    cBprev = (char)dc;
 445    for(xB=1; xB<=nB; xB++) {
 446        cBnext = zB[xB];
 447        cB = zB[xB-1];
 448        cx[xB] = cB;
 449        m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext);
 450        cBprev = cB;
 451    }
 452    cAprev = (char)dc;
 453    for(xA=1; xA<=nA; xA++) {
 454        int lastA = (xA==nA);
 455        cA = zA[xA-1];
 456        cAnext = zA[xA];
 457        if( cA=='*' && lastA ) break;
 458        d = m[0];
 459        dc = cx[0];
 460        m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext);
 461        cBprev = 0;
 462        for(xB=1; xB<=nB; xB++) {
 463            int totalCost, insCost, delCost, subCost, ncx;
 464            cB = zB[xB-1];
 465            cBnext = zB[xB];
 466
 467            /* Cost to insert cB */
 468            insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext);
 469            if( lastA ) insCost /= FINAL_INS_COST_DIV;
 470
 471            /* Cost to delete cA */
 472            delCost = insertOrDeleteCost(cx[xB], cA, cBnext);
 473
 474            /* Cost to substitute cA->cB */
 475            subCost = substituteCost(cx[xB-1], cA, cB);
 476
 477            /* Best cost */
 478            totalCost = insCost + m[xB-1];
 479            ncx = cB;
 480            if( (delCost + m[xB])<totalCost ) {
 481                totalCost = delCost + m[xB];
 482                ncx = cA;
 483            }
 484            if( (subCost + d)<totalCost ) {
 485                totalCost = subCost + d;
 486            }
 487
 488#if 0
 489            printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c"
 490                   " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n",
 491                   xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB,
 492                   insCost, delCost, subCost, totalCost, ncx?ncx:' ');
 493#endif
 494
 495            /* Update the matrix */
 496            d = m[xB];
 497            dc = cx[xB];
 498            m[xB] = totalCost;
 499            cx[xB] = (char)ncx;
 500            cBprev = cB;
 501        }
 502        cAprev = cA;
 503    }
 504
 505    /* Free the wagner matrix and return the result */
 506    if( cA=='*' ) {
 507        res = m[1];
 508        for(xB=1; xB<=nB; xB++) {
 509            if( m[xB]<res ) {
 510                res = m[xB];
 511                if( pnMatch ) *pnMatch = xB+nMatch;
 512            }
 513        }
 514    } else {
 515        res = m[nB];
 516        /* In the current implementation, pnMatch is always NULL if zA does
 517        ** not end in "*" */
 518        assert( pnMatch==0 );
 519    }
 520    sqlite3_free(toFree);
 521    return res;
 522}
 523
 524/*
 525** Function:    editdist(A,B)
 526**
 527** Return the cost of transforming string A into string B.  Both strings
 528** must be pure ASCII text.  If A ends with '*' then it is assumed to be
 529** a prefix of B and extra characters on the end of B have minimal additional
 530** cost.
 531*/
 532static void editdistSqlFunc(
 533    sqlite3_context *context,
 534    int argc,
 535    sqlite3_value **argv
 536)
 537{
 538    int res = editdist1(
 539                  (const char*)sqlite3_value_text(argv[0]),
 540                  (const char*)sqlite3_value_text(argv[1]),
 541                  0);
 542    if( res<0 ) {
 543        if( res==(-3) ) {
 544            sqlite3_result_error_nomem(context);
 545        } else if( res==(-2) ) {
 546            sqlite3_result_error(context, "non-ASCII input to editdist()", -1);
 547        } else {
 548            sqlite3_result_error(context, "NULL input to editdist()", -1);
 549        }
 550    } else {
 551        sqlite3_result_int(context, res);
 552    }
 553}
 554
 555/* End of the fixed-cost edit distance implementation
 556******************************************************************************
 557*****************************************************************************
 558** Begin: Configurable cost unicode edit distance routines
 559*/
 560/* Forward declaration of structures */
 561typedef struct EditDist3Cost EditDist3Cost;
 562typedef struct EditDist3Config EditDist3Config;
 563typedef struct EditDist3Point EditDist3Point;
 564typedef struct EditDist3From EditDist3From;
 565typedef struct EditDist3FromString EditDist3FromString;
 566typedef struct EditDist3To EditDist3To;
 567typedef struct EditDist3ToString EditDist3ToString;
 568typedef struct EditDist3Lang EditDist3Lang;
 569
 570/*
 571** An entry in the edit cost table
 572*/
 573struct EditDist3Cost {
 574    EditDist3Cost *pNext;     /* Next cost element */
 575    u8 nFrom;                 /* Number of bytes in aFrom */
 576    u8 nTo;                   /* Number of bytes in aTo */
 577    u16 iCost;                /* Cost of this transformation */
 578    char a[4]    ;            /* FROM string followed by TO string */
 579    /* Additional TO and FROM string bytes appended as necessary */
 580};
 581
 582/*
 583** Edit costs for a particular language ID
 584*/
 585struct EditDist3Lang {
 586    int iLang;             /* Language ID */
 587    int iInsCost;          /* Default insertion cost */
 588    int iDelCost;          /* Default deletion cost */
 589    int iSubCost;          /* Default substitution cost */
 590    EditDist3Cost *pCost;  /* Costs */
 591};
 592
 593/*
 594** The default EditDist3Lang object, with default costs.
 595*/
 596static const EditDist3Lang editDist3Lang = { 0, 100, 100, 150, 0 };
 597
 598/*
 599** Complete configuration
 600*/
 601struct EditDist3Config {
 602    int nLang;             /* Number of language IDs.  Size of a[] */
 603    EditDist3Lang *a;      /* One for each distinct language ID */
 604};
 605
 606/*
 607** Extra information about each character in the FROM string.
 608*/
 609struct EditDist3From {
 610    int nSubst;              /* Number of substitution cost entries */
 611    int nDel;                /* Number of deletion cost entries */
 612    int nByte;               /* Number of bytes in this character */
 613    EditDist3Cost **apSubst; /* Array of substitution costs for this element */
 614    EditDist3Cost **apDel;   /* Array of deletion cost entries */
 615};
 616
 617/*
 618** A precompiled FROM string.
 619*
 620** In the common case we expect the FROM string to be reused multiple times.
 621** In other words, the common case will be to measure the edit distance
 622** from a single origin string to multiple target strings.
 623*/
 624struct EditDist3FromString {
 625    char *z;                 /* The complete text of the FROM string */
 626    int n;                   /* Number of characters in the FROM string */
 627    int isPrefix;            /* True if ends with '*' character */
 628    EditDist3From *a;        /* Extra info about each char of the FROM string */
 629};
 630
 631/*
 632** Extra information about each character in the TO string.
 633*/
 634struct EditDist3To {
 635    int nIns;                /* Number of insertion cost entries */
 636    int nByte;               /* Number of bytes in this character */
 637    EditDist3Cost **apIns;   /* Array of deletion cost entries */
 638};
 639
 640/*
 641** A precompiled FROM string
 642*/
 643struct EditDist3ToString {
 644    char *z;                 /* The complete text of the TO string */
 645    int n;                   /* Number of characters in the TO string */
 646    EditDist3To *a;          /* Extra info about each char of the TO string */
 647};
 648
 649/*
 650** Clear or delete an instance of the object that records all edit-distance
 651** weights.
 652*/
 653static void editDist3ConfigClear(EditDist3Config *p)
 654{
 655    int i;
 656    if( p==0 ) return;
 657    for(i=0; i<p->nLang; i++) {
 658        EditDist3Cost *pCost, *pNext;
 659        pCost = p->a[i].pCost;
 660        while( pCost ) {
 661            pNext = pCost->pNext;
 662            sqlite3_free(pCost);
 663            pCost = pNext;
 664        }
 665    }
 666    sqlite3_free(p->a);
 667    memset(p, 0, sizeof(*p));
 668}
 669static void editDist3ConfigDelete(void *pIn)
 670{
 671    EditDist3Config *p = (EditDist3Config*)pIn;
 672    editDist3ConfigClear(p);
 673    sqlite3_free(p);
 674}
 675
 676/* Compare the FROM values of two EditDist3Cost objects, for sorting.
 677** Return negative, zero, or positive if the A is less than, equal to,
 678** or greater than B.
 679*/
 680static int editDist3CostCompare(EditDist3Cost *pA, EditDist3Cost *pB)
 681{
 682    int n = pA->nFrom;
 683    int rc;
 684    if( n>pB->nFrom ) n = pB->nFrom;
 685    rc = strncmp(pA->a, pB->a, n);
 686    if( rc==0 ) rc = pA->nFrom - pB->nFrom;
 687    return rc;
 688}
 689
 690/*
 691** Merge together two sorted lists of EditDist3Cost objects, in order
 692** of increasing FROM.
 693*/
 694static EditDist3Cost *editDist3CostMerge(
 695    EditDist3Cost *pA,
 696    EditDist3Cost *pB
 697)
 698{
 699    EditDist3Cost *pHead = 0;
 700    EditDist3Cost **ppTail = &pHead;
 701    EditDist3Cost *p;
 702    while( pA && pB ) {
 703        if( editDist3CostCompare(pA,pB)<=0 ) {
 704            p = pA;
 705            pA = pA->pNext;
 706        } else {
 707            p = pB;
 708            pB = pB->pNext;
 709        }
 710        *ppTail = p;
 711        ppTail =  &p->pNext;
 712    }
 713    if( pA ) {
 714        *ppTail = pA;
 715    } else {
 716        *ppTail = pB;
 717    }
 718    return pHead;
 719}
 720
 721/*
 722** Sort a list of EditDist3Cost objects into order of increasing FROM
 723*/
 724static EditDist3Cost *editDist3CostSort(EditDist3Cost *pList)
 725{
 726    EditDist3Cost *ap[60], *p;
 727    int i;
 728    int mx = 0;
 729    ap[0] = 0;
 730    ap[1] = 0;
 731    while( pList ) {
 732        p = pList;
 733        pList = p->pNext;
 734        p->pNext = 0;
 735        for(i=0; ap[i]; i++) {
 736            p = editDist3CostMerge(ap[i],p);
 737            ap[i] = 0;
 738        }
 739        ap[i] = p;
 740        if( i>mx ) {
 741            mx = i;
 742            ap[i+1] = 0;
 743        }
 744    }
 745    p = 0;
 746    for(i=0; i<=mx; i++) {
 747        if( ap[i] ) p = editDist3CostMerge(p,ap[i]);
 748    }
 749    return p;
 750}
 751
 752/*
 753** Load all edit-distance weights from a table.
 754*/
 755static int editDist3ConfigLoad(
 756    EditDist3Config *p,      /* The edit distance configuration to load */
 757    sqlite3 *db,            /* Load from this database */
 758    const char *zTable      /* Name of the table from which to load */
 759)
 760{
 761    sqlite3_stmt *pStmt;
 762    int rc, rc2;
 763    char *zSql;
 764    int iLangPrev = -9999;
 765    EditDist3Lang *pLang = 0;
 766
 767    zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost"
 768                           " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable);
 769    if( zSql==0 ) return SQLITE_NOMEM;
 770    rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
 771    sqlite3_free(zSql);
 772    if( rc ) return rc;
 773    editDist3ConfigClear(p);
 774    while( sqlite3_step(pStmt)==SQLITE_ROW ) {
 775        int iLang = sqlite3_column_int(pStmt, 0);
 776        const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1);
 777        int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0;
 778        const char *zTo = (const char*)sqlite3_column_text(pStmt, 2);
 779        int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0;
 780        int iCost = sqlite3_column_int(pStmt, 3);
 781
 782        assert( zFrom!=0 || nFrom==0 );
 783        assert( zTo!=0 || nTo==0 );
 784        if( nFrom>100 || nTo>100 ) continue;
 785        if( iCost<0 ) continue;
 786        if( iCost>=10000 ) continue;  /* Costs above 10K are considered infinite */
 787        if( pLang==0 || iLang!=iLangPrev ) {
 788            EditDist3Lang *pNew;
 789            pNew = sqlite3_realloc64(p->a, (p->nLang+1)*sizeof(p->a[0]));
 790            if( pNew==0 ) {
 791                rc = SQLITE_NOMEM;
 792                break;
 793            }
 794            p->a = pNew;
 795            pLang = &p->a[p->nLang];
 796            p->nLang++;
 797            pLang->iLang = iLang;
 798            pLang->iInsCost = 100;
 799            pLang->iDelCost = 100;
 800            pLang->iSubCost = 150;
 801            pLang->pCost = 0;
 802            iLangPrev = iLang;
 803        }
 804        if( nFrom==1 && zFrom[0]=='?' && nTo==0 ) {
 805            pLang->iDelCost = iCost;
 806        } else if( nFrom==0 && nTo==1 && zTo[0]=='?' ) {
 807            pLang->iInsCost = iCost;
 808        } else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ) {
 809            pLang->iSubCost = iCost;
 810        } else {
 811            EditDist3Cost *pCost;
 812            int nExtra = nFrom + nTo - 4;
 813            if( nExtra<0 ) nExtra = 0;
 814            pCost = sqlite3_malloc64( sizeof(*pCost) + nExtra );
 815            if( pCost==0 ) {
 816                rc = SQLITE_NOMEM;
 817                break;
 818            }
 819            pCost->nFrom = (u8)nFrom;
 820            pCost->nTo = (u8)nTo;
 821            pCost->iCost = (u16)iCost;
 822            memcpy(pCost->a, zFrom, nFrom);
 823            memcpy(pCost->a + nFrom, zTo, nTo);
 824            pCost->pNext = pLang->pCost;
 825            pLang->pCost = pCost;
 826        }
 827    }
 828    rc2 = sqlite3_finalize(pStmt);
 829    if( rc==SQLITE_OK ) rc = rc2;
 830    if( rc==SQLITE_OK ) {
 831        int iLang;
 832        for(iLang=0; iLang<p->nLang; iLang++) {
 833            p->a[iLang].pCost = editDist3CostSort(p->a[iLang].pCost);
 834        }
 835    }
 836    return rc;
 837}
 838
 839/*
 840** Return the length (in bytes) of a utf-8 character.  Or return a maximum
 841** of N.
 842*/
 843static int utf8Len(unsigned char c, int N)
 844{
 845    int len = 1;
 846    if( c>0x7f ) {
 847        if( (c&0xe0)==0xc0 ) {
 848            len = 2;
 849        } else if( (c&0xf0)==0xe0 ) {
 850            len = 3;
 851        } else {
 852            len = 4;
 853        }
 854    }
 855    if( len>N ) len = N;
 856    return len;
 857}
 858
 859/*
 860** Return TRUE (non-zero) if the To side of the given cost matches
 861** the given string.
 862*/
 863static int matchTo(EditDist3Cost *p, const char *z, int n)
 864{
 865    assert( n>0 );
 866    if( p->a[p->nFrom]!=z[0] ) return 0;
 867    if( p->nTo>n ) return 0;
 868    if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0;
 869    return 1;
 870}
 871
 872/*
 873** Return TRUE (non-zero) if the From side of the given cost matches
 874** the given string.
 875*/
 876static int matchFrom(EditDist3Cost *p, const char *z, int n)
 877{
 878    assert( p->nFrom<=n );
 879    if( p->nFrom ) {
 880        if( p->a[0]!=z[0] ) return 0;
 881        if( strncmp(p->a, z, p->nFrom)!=0 ) return 0;
 882    }
 883    return 1;
 884}
 885
 886/*
 887** Return TRUE (non-zero) of the next FROM character and the next TO
 888** character are the same.
 889*/
 890static int matchFromTo(
 891    EditDist3FromString *pStr,  /* Left hand string */
 892    int n1,                     /* Index of comparison character on the left */
 893    const char *z2,             /* Right-handl comparison character */
 894    int n2                      /* Bytes remaining in z2[] */
 895)
 896{
 897    int b1 = pStr->a[n1].nByte;
 898    if( b1>n2 ) return 0;
 899    assert( b1>0 );
 900    if( pStr->z[n1]!=z2[0] ) return 0;
 901    if( strncmp(pStr->z+n1, z2, b1)!=0 ) return 0;
 902    return 1;
 903}
 904
 905/*
 906** Delete an EditDist3FromString objecct
 907*/
 908static void editDist3FromStringDelete(EditDist3FromString *p)
 909{
 910    int i;
 911    if( p ) {
 912        for(i=0; i<p->n; i++) {
 913            sqlite3_free(p->a[i].apDel);
 914            sqlite3_free(p->a[i].apSubst);
 915        }
 916        sqlite3_free(p);
 917    }
 918}
 919
 920/*
 921** Create a EditDist3FromString object.
 922*/
 923static EditDist3FromString *editDist3FromStringNew(
 924    const EditDist3Lang *pLang,
 925    const char *z,
 926    int n
 927)
 928{
 929    EditDist3FromString *pStr;
 930    EditDist3Cost *p;
 931    int i;
 932
 933    if( z==0 ) return 0;
 934    if( n<0 ) n = (int)strlen(z);
 935    pStr = sqlite3_malloc64( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 );
 936    if( pStr==0 ) return 0;
 937    pStr->a = (EditDist3From*)&pStr[1];
 938    memset(pStr->a, 0, sizeof(pStr->a[0])*n);
 939    pStr->n = n;
 940    pStr->z = (char*)&pStr->a[n];
 941    memcpy(pStr->z, z, n+1);
 942    if( n && z[n-1]=='*' ) {
 943        pStr->isPrefix = 1;
 944        n--;
 945        pStr->n--;
 946        pStr->z[n] = 0;
 947    } else {
 948        pStr->isPrefix = 0;
 949    }
 950
 951    for(i=0; i<n; i++) {
 952        EditDist3From *pFrom = &pStr->a[i];
 953        memset(pFrom, 0, sizeof(*pFrom));
 954        pFrom->nByte = utf8Len((unsigned char)z[i], n-i);
 955        for(p=pLang->pCost; p; p=p->pNext) {
 956            EditDist3Cost **apNew;
 957            if( i+p->nFrom>n ) continue;
 958            if( matchFrom(p, z+i, n-i)==0 ) continue;
 959            if( p->nTo==0 ) {
 960                apNew = sqlite3_realloc64(pFrom->apDel,
 961                                          sizeof(*apNew)*(pFrom->nDel+1));
 962                if( apNew==0 ) break;
 963                pFrom->apDel = apNew;
 964                apNew[pFrom->nDel++] = p;
 965            } else {
 966                apNew = sqlite3_realloc64(pFrom->apSubst,
 967                                          sizeof(*apNew)*(pFrom->nSubst+1));
 968                if( apNew==0 ) break;
 969                pFrom->apSubst = apNew;
 970                apNew[pFrom->nSubst++] = p;
 971            }
 972        }
 973        if( p ) {
 974            editDist3FromStringDelete(pStr);
 975            pStr = 0;
 976            break;
 977        }
 978    }
 979    return pStr;
 980}
 981
 982/*
 983** Update entry m[i] such that it is the minimum of its current value
 984** and m[j]+iCost.
 985*/
 986static void updateCost(
 987    unsigned int *m,
 988    int i,
 989    int j,
 990    int iCost
 991)
 992{
 993    unsigned int b;
 994    assert( iCost>=0 );
 995    assert( iCost<10000 );
 996    b = m[j] + iCost;
 997    if( b<m[i] ) m[i] = b;
 998}
 999
1000/*
1001** How much stack space (int bytes) to use for Wagner matrix in
1002** editDist3Core().  If more space than this is required, the entire
1003** matrix is taken from the heap.  To reduce the load on the memory
1004** allocator, make this value as large as practical for the
1005** architecture in use.
1006*/
1007#ifndef SQLITE_SPELLFIX_STACKALLOC_SZ
1008# define SQLITE_SPELLFIX_STACKALLOC_SZ  (1024)
1009#endif
1010
1011/* Compute the edit distance between two strings.
1012**
1013** If an error occurs, return a negative number which is the error code.
1014**
1015** If pnMatch is not NULL, then *pnMatch is set to the number of characters
1016** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does
1017** not contain the pattern for a prefix-search, then this is always the number
1018** of characters in z2. If pFrom does contain a prefix search pattern, then
1019** it is the number of characters in the prefix of z2 that was deemed to
1020** match pFrom.
1021*/
1022static int editDist3Core(
1023    EditDist3FromString *pFrom,  /* The FROM string */
1024    const char *z2,              /* The TO string */
1025    int n2,                      /* Length of the TO string */
1026    const EditDist3Lang *pLang,  /* Edit weights for a particular language ID */
1027    int *pnMatch                 /* OUT: Characters in matched prefix */
1028)
1029{
1030    int k, n;
1031    int i1, b1;
1032    int i2, b2;
1033    EditDist3FromString f = *pFrom;
1034    EditDist3To *a2;
1035    unsigned int *m;
1036    unsigned int *pToFree;
1037    int szRow;
1038    EditDist3Cost *p;
1039    int res;
1040    sqlite3_uint64 nByte;
1041    unsigned int stackSpace[SQLITE_SPELLFIX_STACKALLOC_SZ/sizeof(unsigned int)];
1042
1043    /* allocate the Wagner matrix and the aTo[] array for the TO string */
1044    n = (f.n+1)*(n2+1);
1045    n = (n+1)&~1;
1046    nByte = n*sizeof(m[0]) + sizeof(a2[0])*n2;
1047    if( nByte<=sizeof(stackSpace) ) {
1048        m = stackSpace;
1049        pToFree = 0;
1050    } else {
1051        m = pToFree = sqlite3_malloc64( nByte );
1052        if( m==0 ) return -1;            /* Out of memory */
1053    }
1054    a2 = (EditDist3To*)&m[n];
1055    memset(a2, 0, sizeof(a2[0])*n2);
1056
1057    /* Fill in the a1[] matrix for all characters of the TO string */
1058    for(i2=0; i2<n2; i2++) {
1059        a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2);
1060        for(p=pLang->pCost; p; p=p->pNext) {
1061            EditDist3Cost **apNew;
1062            if( p->nFrom>0 ) break;
1063            if( i2+p->nTo>n2 ) continue;
1064            if( p->a[0]>z2[i2] ) break;
1065            if( matchTo(p, z2+i2, n2-i2)==0 ) continue;
1066            a2[i2].nIns++;
1067            apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns);
1068            if( apNew==0 ) {
1069                res = -1;  /* Out of memory */
1070                goto editDist3Abort;
1071            }
1072            a2[i2].apIns = apNew;
1073            a2[i2].apIns[a2[i2].nIns-1] = p;
1074        }
1075    }
1076
1077    /* Prepare to compute the minimum edit distance */
1078    szRow = f.n+1;
1079    memset(m, 0x01, (n2+1)*szRow*sizeof(m[0]));
1080    m[0] = 0;
1081
1082    /* First fill in the top-row of the matrix with FROM deletion costs */
1083    for(i1=0; i1<f.n; i1 += b1) {
1084        b1 = f.a[i1].nByte;
1085        updateCost(m, i1+b1, i1, pLang->iDelCost);
1086        for(k=0; k<f.a[i1].nDel; k++) {
1087            p = f.a[i1].apDel[k];
1088            updateCost(m, i1+p->nFrom, i1, p->iCost);
1089        }
1090    }
1091
1092    /* Fill in all subsequent rows, top-to-bottom, left-to-right */
1093    for(i2=0; i2<n2; i2 += b2) {
1094        int rx;      /* Starting index for current row */
1095        int rxp;     /* Starting index for previous row */
1096        b2 = a2[i2].nByte;
1097        rx = szRow*(i2+b2);
1098        rxp = szRow*i2;
1099        updateCost(m, rx, rxp, pLang->iInsCost);
1100        for(k=0; k<a2[i2].nIns; k++) {
1101            p = a2[i2].apIns[k];
1102            updateCost(m, szRow*(i2+p->nTo), rxp, p->iCost);
1103        }
1104        for(i1=0; i1<f.n; i1+=b1) {
1105            int cx;    /* Index of current cell */
1106            int cxp;   /* Index of cell immediately to the left */
1107            int cxd;   /* Index of cell to the left and one row above */
1108            int cxu;   /* Index of cell immediately above */
1109            b1 = f.a[i1].nByte;
1110            cxp = rx + i1;
1111            cx = cxp + b1;
1112            cxd = rxp + i1;
1113            cxu = cxd + b1;
1114            updateCost(m, cx, cxp, pLang->iDelCost);
1115            for(k=0; k<f.a[i1].nDel; k++) {
1116                p = f.a[i1].apDel[k];
1117                updateCost(m, cxp+p->nFrom, cxp, p->iCost);
1118            }
1119            updateCost(m, cx, cxu, pLang->iInsCost);
1120            if( matchFromTo(&f, i1, z2+i2, n2-i2) ) {
1121                updateCost(m, cx, cxd, 0);
1122            }
1123            updateCost(m, cx, cxd, pLang->iSubCost);
1124            for(k=0; k<f.a[i1].nSubst; k++) {
1125                p = f.a[i1].apSubst[k];
1126                if( matchTo(p, z2+i2, n2-i2) ) {
1127                    updateCost(m, cxd+p->nFrom+szRow*p->nTo, cxd, p->iCost);
1128                }
1129            }
1130        }
1131    }
1132
1133#if 0  /* Enable for debugging */
1134    printf("         ^");
1135    for(i1=0; i1<f.n; i1++) printf(" %c-%2x", f.z[i1], f.z[i1]&0xff);
1136    printf("\n   ^:");
1137    for(i1=0; i1<szRow; i1++) {
1138        int v = m[i1];
1139        if( v>9999 ) printf(" ****");
1140        else         printf(" %4d", v);
1141    }
1142    printf("\n");
1143    for(i2=0; i2<n2; i2++) {
1144        printf("%c-%02x:", z2[i2], z2[i2]&0xff);
1145        for(i1=0; i1<szRow; i1++) {
1146            int v = m[(i2+1)*szRow+i1];
1147            if( v>9999 ) printf(" ****");
1148            else         printf(" %4d", v);
1149        }
1150        printf("\n");
1151    }
1152#endif
1153
1154    /* Free memory allocations and return the result */
1155    res = (int)m[szRow*(n2+1)-1];
1156    n = n2;
1157    if( f.isPrefix ) {
1158        for(i2=1; i2<=n2; i2++) {
1159            int b = m[szRow*i2-1];
1160            if( b<=res ) {
1161                res = b;
1162                n = i2 - 1;
1163            }
1164        }
1165    }
1166    if( pnMatch ) {
1167        int nExtra = 0;
1168        for(k=0; k<n; k++) {
1169            if( (z2[k] & 0xc0)==0x80 ) nExtra++;
1170        }
1171        *pnMatch = n - nExtra;
1172    }
1173
1174editDist3Abort:
1175    for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns);
1176    sqlite3_free(pToFree);
1177    return res;
1178}
1179
1180/*
1181** Get an appropriate EditDist3Lang object.
1182*/
1183static const EditDist3Lang *editDist3FindLang(
1184    EditDist3Config *pConfig,
1185    int iLang
1186)
1187{
1188    int i;
1189    for(i=0; i<pConfig->nLang; i++) {
1190        if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i];
1191    }
1192    return &editDist3Lang;
1193}
1194
1195/*
1196** Function:    editdist3(A,B,iLang)
1197**              editdist3(tablename)
1198**
1199** Return the cost of transforming string A into string B using edit
1200** weights for iLang.
1201**
1202** The second form loads edit weights into memory from a table.
1203*/
1204static void editDist3SqlFunc(
1205    sqlite3_context *context,
1206    int argc,
1207    sqlite3_value **argv
1208)
1209{
1210    EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context);
1211    sqlite3 *db = sqlite3_context_db_handle(context);
1212    int rc;
1213    if( argc==1 ) {
1214        const char *zTable = (const char*)sqlite3_value_text(argv[0]);
1215        rc = editDist3ConfigLoad(pConfig, db, zTable);
1216        if( rc ) sqlite3_result_error_code(context, rc);
1217    } else {
1218        const char *zA = (const char*)sqlite3_value_text(argv[0]);
1219        const char *zB = (const char*)sqlite3_value_text(argv[1]);
1220        int nA = sqlite3_value_bytes(argv[0]);
1221        int nB = sqlite3_value_bytes(argv[1]);
1222        int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0;
1223        const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang);
1224        EditDist3FromString *pFrom;
1225        int dist;
1226
1227        pFrom = editDist3FromStringNew(pLang, zA, nA);
1228        if( pFrom==0 ) {
1229            sqlite3_result_error_nomem(context);
1230            return;
1231        }
1232        dist = editDist3Core(pFrom, zB, nB, pLang, 0);
1233        editDist3FromStringDelete(pFrom);
1234        if( dist==(-1) ) {
1235            sqlite3_result_error_nomem(context);
1236        } else {
1237            sqlite3_result_int(context, dist);
1238        }
1239    }
1240}
1241
1242/*
1243** Register the editDist3 function with SQLite
1244*/
1245static int editDist3Install(sqlite3 *db)
1246{
1247    int rc;
1248    EditDist3Config *pConfig = sqlite3_malloc64( sizeof(*pConfig) );
1249    if( pConfig==0 ) return SQLITE_NOMEM;
1250    memset(pConfig, 0, sizeof(*pConfig));
1251    rc = sqlite3_create_function_v2(db, "editdist3",
1252                                    2, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1253                                    editDist3SqlFunc, 0, 0, 0);
1254    if( rc==SQLITE_OK ) {
1255        rc = sqlite3_create_function_v2(db, "editdist3",
1256                                        3, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1257                                        editDist3SqlFunc, 0, 0, 0);
1258    }
1259    if( rc==SQLITE_OK ) {
1260        rc = sqlite3_create_function_v2(db, "editdist3",
1261                                        1, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1262                                        editDist3SqlFunc, 0, 0, editDist3ConfigDelete);
1263    } else {
1264        sqlite3_free(pConfig);
1265    }
1266    return rc;
1267}
1268/* End configurable cost unicode edit distance routines
1269******************************************************************************
1270******************************************************************************
1271** Begin transliterate unicode-to-ascii implementation
1272*/
1273
1274#if !SQLITE_AMALGAMATION
1275/*
1276** This lookup table is used to help decode the first byte of
1277** a multi-byte UTF8 character.
1278*/
1279static const unsigned char sqlite3Utf8Trans1[] = {
1280    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1281    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1282    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1283    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1284    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1285    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1286    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1287    0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
1288};
1289#endif
1290
1291/*
1292** Return the value of the first UTF-8 character in the string.
1293*/
1294static int utf8Read(const unsigned char *z, int n, int *pSize)
1295{
1296    int c, i;
1297
1298    /* All callers to this routine (in the current implementation)
1299    ** always have n>0. */
1300    if( NEVER(n==0) ) {
1301        c = i = 0;
1302    } else {
1303        c = z[0];
1304        i = 1;
1305        if( c>=0xc0 ) {
1306            c = sqlite3Utf8Trans1[c-0xc0];
1307            while( i<n && (z[i] & 0xc0)==0x80 ) {
1308                c = (c<<6) + (0x3f & z[i++]);
1309            }
1310        }
1311    }
1312    *pSize = i;
1313    return c;
1314}
1315
1316/*
1317** Return the number of characters in the utf-8 string in the nIn byte
1318** buffer pointed to by zIn.
1319*/
1320static int utf8Charlen(const char *zIn, int nIn)
1321{
1322    int i;
1323    int nChar = 0;
1324    for(i=0; i<nIn; nChar++) {
1325        int sz;
1326        utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1327        i += sz;
1328    }
1329    return nChar;
1330}
1331
1332typedef struct Transliteration Transliteration;
1333struct Transliteration {
1334    unsigned short int cFrom;
1335    unsigned char cTo0, cTo1, cTo2, cTo3;
1336#ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS
1337    unsigned char cTo4;
1338#endif
1339};
1340
1341/*
1342** Table of translations from unicode characters into ASCII.
1343*/
1344static const Transliteration translit[] = {
1345    { 0x00A0,  0x20, 0x00, 0x00, 0x00 },  /*   to   */
1346    { 0x00B5,  0x75, 0x00, 0x00, 0x00 },  /* µ to u */
1347    { 0x00C0,  0x41, 0x00, 0x00, 0x00 },  /* À to A */
1348    { 0x00C1,  0x41, 0x00, 0x00, 0x00 },  /* Á to A */
1349    { 0x00C2,  0x41, 0x00, 0x00, 0x00 },  /* Â to A */
1350    { 0x00C3,  0x41, 0x00, 0x00, 0x00 },  /* Ã to A */
1351    { 0x00C4,  0x41, 0x65, 0x00, 0x00 },  /* Ä to Ae */
1352    { 0x00C5,  0x41, 0x61, 0x00, 0x00 },  /* Å to Aa */
1353    { 0x00C6,  0x41, 0x45, 0x00, 0x00 },  /* Æ to AE */
1354    { 0x00C7,  0x43, 0x00, 0x00, 0x00 },  /* Ç to C */
1355    { 0x00C8,  0x45, 0x00, 0x00, 0x00 },  /* È to E */
1356    { 0x00C9,  0x45, 0x00, 0x00, 0x00 },  /* É to E */
1357    { 0x00CA,  0x45, 0x00, 0x00, 0x00 },  /* Ê to E */
1358    { 0x00CB,  0x45, 0x00, 0x00, 0x00 },  /* Ë to E */
1359    { 0x00CC,  0x49, 0x00, 0x00, 0x00 },  /* Ì to I */
1360    { 0x00CD,  0x49, 0x00, 0x00, 0x00 },  /* Í to I */
1361    { 0x00CE,  0x49, 0x00, 0x00, 0x00 },  /* Î to I */
1362    { 0x00CF,  0x49, 0x00, 0x00, 0x00 },  /* Ï to I */
1363    { 0x00D0,  0x44, 0x00, 0x00, 0x00 },  /* Ð to D */
1364    { 0x00D1,  0x4E, 0x00, 0x00, 0x00 },  /* Ñ to N */
1365    { 0x00D2,  0x4F, 0x00, 0x00, 0x00 },  /* Ò to O */
1366    { 0x00D3,  0x4F, 0x00, 0x00, 0x00 },  /* Ó to O */
1367    { 0x00D4,  0x4F, 0x00, 0x00, 0x00 },  /* Ô to O */
1368    { 0x00D5,  0x4F, 0x00, 0x00, 0x00 },  /* Õ to O */
1369    { 0x00D6,  0x4F, 0x65, 0x00, 0x00 },  /* Ö to Oe */
1370    { 0x00D7,  0x78, 0x00, 0x00, 0x00 },  /* × to x */
1371    { 0x00D8,  0x4F, 0x00, 0x00, 0x00 },  /* Ø to O */
1372    { 0x00D9,  0x55, 0x00, 0x00, 0x00 },  /* Ù to U */
1373    { 0x00DA,  0x55, 0x00, 0x00, 0x00 },  /* Ú to U */
1374    { 0x00DB,  0x55, 0x00, 0x00, 0x00 },  /* Û to U */
1375    { 0x00DC,  0x55, 0x65, 0x00, 0x00 },  /* Ü to Ue */
1376    { 0x00DD,  0x59, 0x00, 0x00, 0x00 },  /* Ý to Y */
1377    { 0x00DE,  0x54, 0x68, 0x00, 0x00 },  /* Þ to Th */
1378    { 0x00DF,  0x73, 0x73, 0x00, 0x00 },  /* ß to ss */
1379    { 0x00E0,  0x61, 0x00, 0x00, 0x00 },  /* à to a */
1380    { 0x00E1,  0x61, 0x00, 0x00, 0x00 },  /* á to a */
1381    { 0x00E2,  0x61, 0x00, 0x00, 0x00 },  /* â to a */
1382    { 0x00E3,  0x61, 0x00, 0x00, 0x00 },  /* ã to a */
1383    { 0x00E4,  0x61, 0x65, 0x00, 0x00 },  /* ä to ae */
1384    { 0x00E5,  0x61, 0x61, 0x00, 0x00 },  /* å to aa */
1385    { 0x00E6,  0x61, 0x65, 0x00, 0x00 },  /* æ to ae */
1386    { 0x00E7,  0x63, 0x00, 0x00, 0x00 },  /* ç to c */
1387    { 0x00E8,  0x65, 0x00, 0x00, 0x00 },  /* è to e */
1388    { 0x00E9,  0x65, 0x00, 0x00, 0x00 },  /* é to e */
1389    { 0x00EA,  0x65, 0x00, 0x00, 0x00 },  /* ê to e */
1390    { 0x00EB,  0x65, 0x00, 0x00, 0x00 },  /* ë to e */
1391    { 0x00EC,  0x69, 0x00, 0x00, 0x00 },  /* ì to i */
1392    { 0x00ED,  0x69, 0x00, 0x00, 0x00 },  /* í to i */
1393    { 0x00EE,  0x69, 0x00, 0x00, 0x00 },  /* î to i */
1394    { 0x00EF,  0x69, 0x00, 0x00, 0x00 },  /* ï to i */
1395    { 0x00F0,  0x64, 0x00, 0x00, 0x00 },  /* ð to d */
1396    { 0x00F1,  0x6E, 0x00, 0x00, 0x00 },  /* ñ to n */
1397    { 0x00F2,  0x6F, 0x00, 0x00, 0x00 },  /* ò to o */
1398    { 0x00F3,  0x6F, 0x00, 0x00, 0x00 },  /* ó to o */
1399    { 0x00F4,  0x6F, 0x00, 0x00, 0x00 },  /* ô to o */
1400    { 0x00F5,  0x6F, 0x00, 0x00, 0x00 },  /* õ to o */
1401    { 0x00F6,  0x6F, 0x65, 0x00, 0x00 },  /* ö to oe */
1402    { 0x00F7,  0x3A, 0x00, 0x00, 0x00 },  /* ÷ to : */
1403    { 0x00F8,  0x6F, 0x00, 0x00, 0x00 },  /* ø to o */
1404    { 0x00F9,  0x75, 0x00, 0x00, 0x00 },  /* ù to u */
1405    { 0x00FA,  0x75, 0x00, 0x00, 0x00 },  /* ú to u */
1406    { 0x00FB,  0x75, 0x00, 0x00, 0x00 },  /* û to u */
1407    { 0x00FC,  0x75, 0x65, 0x00, 0x00 },  /* ü to ue */
1408    { 0x00FD,  0x79, 0x00, 0x00, 0x00 },  /* ý to y */
1409    { 0x00FE,  0x74, 0x68, 0x00, 0x00 },  /* þ to th */
1410    { 0x00FF,  0x79, 0x00, 0x00, 0x00 },  /* ÿ to y */
1411    { 0x0100,  0x41, 0x00, 0x00, 0x00 },  /* Ā to A */
1412    { 0x0101,  0x61, 0x00, 0x00, 0x00 },  /* ā to a */
1413    { 0x0102,  0x41, 0x00, 0x00, 0x00 },  /* Ă to A */
1414    { 0x0103,  0x61, 0x00, 0x00, 0x00 },  /* ă to a */
1415    { 0x0104,  0x41, 0x00, 0x00, 0x00 },  /* Ą to A */
1416    { 0x0105,  0x61, 0x00, 0x00, 0x00 },  /* ą to a */
1417    { 0x0106,  0x43, 0x00, 0x00, 0x00 },  /* Ć to C */
1418    { 0x0107,  0x63, 0x00, 0x00, 0x00 },  /* ć to c */
1419    { 0x0108,  0x43, 0x68, 0x00, 0x00 },  /* Ĉ to Ch */
1420    { 0x0109,  0x63, 0x68, 0x00, 0x00 },  /* ĉ to ch */
1421    { 0x010A,  0x43, 0x00, 0x00, 0x00 },  /* Ċ to C */
1422    { 0x010B,  0x63, 0x00, 0x00, 0x00 },  /* ċ to c */
1423    { 0x010C,  0x43, 0x00, 0x00, 0x00 },  /* Č to C */
1424    { 0x010D,  0x63, 0x00, 0x00, 0x00 },  /* č to c */
1425    { 0x010E,  0x44, 0x00, 0x00, 0x00 },  /* Ď to D */
1426    { 0x010F,  0x64, 0x00, 0x00, 0x00 },  /* ď to d */
1427    { 0x0110,  0x44, 0x00, 0x00, 0x00 },  /* Đ to D */
1428    { 0x0111,  0x64, 0x00, 0x00, 0x00 },  /* đ to d */
1429    { 0x0112,  0x45, 0x00, 0x00, 0x00 },  /* Ē to E */
1430    { 0x0113,  0x65, 0x00, 0x00, 0x00 },  /* ē to e */
1431    { 0x0114,  0x45, 0x00, 0x00, 0x00 },  /* Ĕ to E */
1432    { 0x0115,  0x65, 0x00, 0x00, 0x00 },  /* ĕ to e */
1433    { 0x0116,  0x45, 0x00, 0x00, 0x00 },  /* Ė to E */
1434    { 0x0117,  0x65, 0x00, 0x00, 0x00 },  /* ė to e */
1435    { 0x0118,  0x45, 0x00, 0x00, 0x00 },  /* Ę to E */
1436    { 0x0119,  0x65, 0x00, 0x00, 0x00 },  /* ę to e */
1437    { 0x011A,  0x45, 0x00, 0x00, 0x00 },  /* Ě to E */
1438    { 0x011B,  0x65, 0x00, 0x00, 0x00 },  /* ě to e */
1439    { 0x011C,  0x47, 0x68, 0x00, 0x00 },  /* Ĝ to Gh */
1440    { 0x011D,  0x67, 0x68, 0x00, 0x00 },  /* ĝ to gh */
1441    { 0x011E,  0x47, 0x00, 0x00, 0x00 },  /* Ğ to G */
1442    { 0x011F,  0x67, 0x00, 0x00, 0x00 },  /* ğ to g */
1443    { 0x0120,  0x47, 0x00, 0x00, 0x00 },  /* Ġ to G */
1444    { 0x0121,  0x67, 0x00, 0x00, 0x00 },  /* ġ to g */
1445    { 0x0122,  0x47, 0x00, 0x00, 0x00 },  /* Ģ to G */
1446    { 0x0123,  0x67, 0x00, 0x00, 0x00 },  /* ģ to g */
1447    { 0x0124,  0x48, 0x68, 0x00, 0x00 },  /* Ĥ to Hh */
1448    { 0x0125,  0x68, 0x68, 0x00, 0x00 },  /* ĥ to hh */
1449    { 0x0126,  0x48, 0x00, 0x00, 0x00 },  /* Ħ to H */
1450    { 0x0127,  0x68, 0x00, 0x00, 0x00 },  /* ħ to h */
1451    { 0x0128,  0x49, 0x00, 0x00, 0x00 },  /* Ĩ to I */
1452    { 0x0129,  0x69, 0x00, 0x00, 0x00 },  /* ĩ to i */
1453    { 0x012A,  0x49, 0x00, 0x00, 0x00 },  /* Ī to I */
1454    { 0x012B,  0x69, 0x00, 0x00, 0x00 },  /* ī to i */
1455    { 0x012C,  0x49, 0x00, 0x00, 0x00 },  /* Ĭ to I */
1456    { 0x012D,  0x69, 0x00, 0x00, 0x00 },  /* ĭ to i */
1457    { 0x012E,  0x49, 0x00, 0x00, 0x00 },  /* Į to I */
1458    { 0x012F,  0x69, 0x00, 0x00, 0x00 },  /* į to i */
1459    { 0x0130,  0x49, 0x00, 0x00, 0x00 },  /* İ to I */
1460    { 0x0131,  0x69, 0x00, 0x00, 0x00 },  /* ı to i */
1461    { 0x0132,  0x49, 0x4A, 0x00, 0x00 },  /* IJ to IJ */
1462    { 0x0133,  0x69, 0x6A, 0x00, 0x00 },  /* ij to ij */
1463    { 0x0134,  0x4A, 0x68, 0x00, 0x00 },  /* Ĵ to Jh */
1464    { 0x0135,  0x6A, 0x68, 0x00, 0x00 },  /* ĵ to jh */
1465    { 0x0136,  0x4B, 0x00, 0x00, 0x00 },  /* Ķ to K */
1466    { 0x0137,  0x6B, 0x00, 0x00, 0x00 },  /* ķ to k */
1467    { 0x0138,  0x6B, 0x00, 0x00, 0x00 },  /* ĸ to k */
1468    { 0x0139,  0x4C, 0x00, 0x00, 0x00 },  /* Ĺ to L */
1469    { 0x013A,  0x6C, 0x00, 0x00, 0x00 },  /* ĺ to l */
1470    { 0x013B,  0x4C, 0x00, 0x00, 0x00 },  /* Ļ to L */
1471    { 0x013C,  0x6C, 0x00, 0x00, 0x00 },  /* ļ to l */
1472    { 0x013D,  0x4C, 0x00, 0x00, 0x00 },  /* Ľ to L */
1473    { 0x013E,  0x6C, 0x00, 0x00, 0x00 },  /* ľ to l */
1474    { 0x013F,  0x4C, 0x2E, 0x00, 0x00 },  /* Ŀ to L. */
1475    { 0x0140,  0x6C, 0x2E, 0x00, 0x00 },  /* ŀ to l. */
1476    { 0x0141,  0x4C, 0x00, 0x00, 0x00 },  /* Ł to L */
1477    { 0x0142,  0x6C, 0x00, 0x00, 0x00 },  /* ł to l */
1478    { 0x0143,  0x4E, 0x00, 0x00, 0x00 },  /* Ń to N */
1479    { 0x0144,  0x6E, 0x00, 0x00, 0x00 },  /* ń to n */
1480    { 0x0145,  0x4E, 0x00, 0x00, 0x00 },  /* Ņ to N */
1481    { 0x0146,  0x6E, 0x00, 0x00, 0x00 },  /* ņ to n */
1482    { 0x0147,  0x4E, 0x00, 0x00, 0x00 },  /* Ň to N */
1483    { 0x0148,  0x6E, 0x00, 0x00, 0x00 },  /* ň to n */
1484    { 0x0149,  0x27, 0x6E, 0x00, 0x00 },  /* ʼn to 'n */
1485    { 0x014A,  0x4E, 0x47, 0x00, 0x00 },  /* Ŋ to NG */
1486    { 0x014B,  0x6E, 0x67, 0x00, 0x00 },  /* ŋ to ng */
1487    { 0x014C,  0x4F, 0x00, 0x00, 0x00 },  /* Ō to O */
1488    { 0x014D,  0x6F, 0x00, 0x00, 0x00 },  /* ō to o */
1489    { 0x014E,  0x4F, 0x00, 0x00, 0x00 },  /* Ŏ to O */
1490    { 0x014F,  0x6F, 0x00, 0x00, 0x00 },  /* ŏ to o */
1491    { 0x0150,  0x4F, 0x00, 0x00, 0x00 },  /* Ő to O */
1492    { 0x0151,  0x6F, 0x00, 0x00, 0x00 },  /* ő to o */
1493    { 0x0152,  0x4F, 0x45, 0x00, 0x00 },  /* Œ to OE */
1494    { 0x0153,  0x6F, 0x65, 0x00, 0x00 },  /* œ to oe */
1495    { 0x0154,  0x52, 0x00, 0x00, 0x00 },  /* Ŕ to R */
1496    { 0x0155,  0x72, 0x00, 0x00, 0x00 },  /* ŕ to r */
1497    { 0x0156,  0x52, 0x00, 0x00, 0x00 },  /* Ŗ to R */
1498    { 0x0157,  0x72, 0x00, 0x00, 0x00 },  /* ŗ to r */
1499    { 0x0158,  0x52, 0x00, 0x00, 0x00 },  /* Ř to R */
1500    { 0x0159,  0x72, 0x00, 0x00, 0x00 },  /* ř to r */
1501    { 0x015A,  0x53, 0x00, 0x00, 0x00 },  /* Ś to S */
1502    { 0x015B,  0x73, 0x00, 0x00, 0x00 },  /* ś to s */
1503    { 0x015C,  0x53, 0x68, 0x00, 0x00 },  /* Ŝ to Sh */
1504    { 0x015D,  0x73, 0x68, 0x00, 0x00 },  /* ŝ to sh */
1505    { 0x015E,  0x53, 0x00, 0x00, 0x00 },  /* Ş to S */
1506    { 0x015F,  0x73, 0x00, 0x00, 0x00 },  /* ş to s */
1507    { 0x0160,  0x53, 0x00, 0x00, 0x00 },  /* Š to S */
1508    { 0x0161,  0x73, 0x00, 0x00, 0x00 },  /* š to s */
1509    { 0x0162,  0x54, 0x00, 0x00, 0x00 },  /* Ţ to T */
1510    { 0x0163,  0x74, 0x00, 0x00, 0x00 },  /* ţ to t */
1511    { 0x0164,  0x54, 0x00, 0x00, 0x00 },  /* Ť to T */
1512    { 0x0165,  0x74, 0x00, 0x00, 0x00 },  /* ť to t */
1513    { 0x0166,  0x54, 0x00, 0x00, 0x00 },  /* Ŧ to T */
1514    { 0x0167,  0x74, 0x00, 0x00, 0x00 },  /* ŧ to t */
1515    { 0x0168,  0x55, 0x00, 0x00, 0x00 },  /* Ũ to U */
1516    { 0x0169,  0x75, 0x00, 0x00, 0x00 },  /* ũ to u */
1517    { 0x016A,  0x55, 0x00, 0x00, 0x00 },  /* Ū to U */
1518    { 0x016B,  0x75, 0x00, 0x00, 0x00 },  /* ū to u */
1519    { 0x016C,  0x55, 0x00, 0x00, 0x00 },  /* Ŭ to U */
1520    { 0x016D,  0x75, 0x00, 0x00, 0x00 },  /* ŭ to u */
1521    { 0x016E,  0x55, 0x00, 0x00, 0x00 },  /* Ů to U */
1522    { 0x016F,  0x75, 0x00, 0x00, 0x00 },  /* ů to u */
1523    { 0x0170,  0x55, 0x00, 0x00, 0x00 },  /* Ű to U */
1524    { 0x0171,  0x75, 0x00, 0x00, 0x00 },  /* ű to u */
1525    { 0x0172,  0x55, 0x00, 0x00, 0x00 },  /* Ų to U */
1526    { 0x0173,  0x75, 0x00, 0x00, 0x00 },  /* ų to u */
1527    { 0x0174,  0x57, 0x00, 0x00, 0x00 },  /* Ŵ to W */
1528    { 0x0175,  0x77, 0x00, 0x00, 0x00 },  /* ŵ to w */
1529    { 0x0176,  0x59, 0x00, 0x00, 0x00 },  /* Ŷ to Y */
1530    { 0x0177,  0x79, 0x00, 0x00, 0x00 },  /* ŷ to y */
1531    { 0x0178,  0x59, 0x00, 0x00, 0x00 },  /* Ÿ to Y */
1532    { 0x0179,  0x5A, 0x00, 0x00, 0x00 },  /* Ź to Z */
1533    { 0x017A,  0x7A, 0x00, 0x00, 0x00 },  /* ź to z */
1534    { 0x017B,  0x5A, 0x00, 0x00, 0x00 },  /* Ż to Z */
1535    { 0x017C,  0x7A, 0x00, 0x00, 0x00 },  /* ż to z */
1536    { 0x017D,  0x5A, 0x00, 0x00, 0x00 },  /* Ž to Z */
1537    { 0x017E,  0x7A, 0x00, 0x00, 0x00 },  /* ž to z */
1538    { 0x017F,  0x73, 0x00, 0x00, 0x00 },  /* ſ to s */
1539    { 0x0192,  0x66, 0x00, 0x00, 0x00 },  /* ƒ to f */
1540    { 0x0218,  0x53, 0x00, 0x00, 0x00 },  /* Ș to S */
1541    { 0x0219,  0x73, 0x00, 0x00, 0x00 },  /* ș to s */
1542    { 0x021A,  0x54, 0x00, 0x00, 0x00 },  /* Ț to T */
1543    { 0x021B,  0x74, 0x00, 0x00, 0x00 },  /* ț to t */
1544    { 0x0386,  0x41, 0x00, 0x00, 0x00 },  /* Ά to A */
1545    { 0x0388,  0x45, 0x00, 0x00, 0x00 },  /* Έ to E */
1546    { 0x0389,  0x49, 0x00, 0x00, 0x00 },  /* Ή to I */
1547    { 0x038A,  0x49, 0x00, 0x00, 0x00 },  /* Ί to I */
1548    { 0x038C,  0x4f, 0x00, 0x00, 0x00 },  /* Ό to O */
1549    { 0x038E,  0x59, 0x00, 0x00, 0x00 },  /* Ύ to Y */
1550    { 0x038F,  0x4f, 0x00, 0x00, 0x00 },  /* Ώ to O */
1551    { 0x0390,  0x69, 0x00, 0x00, 0x00 },  /* ΐ to i */
1552    { 0x0391,  0x41, 0x00, 0x00, 0x00 },  /* Α to A */
1553    { 0x0392,  0x42, 0x00, 0x00, 0x00 },  /* Β to B */
1554    { 0x0393,  0x47, 0x00, 0x00, 0x00 },  /* Γ to G */
1555    { 0x0394,  0x44, 0x00, 0x00, 0x00 },  /* Δ to D */
1556    { 0x0395,  0x45, 0x00, 0x00, 0x00 },  /* Ε to E */
1557    { 0x0396,  0x5a, 0x00, 0x00, 0x00 },  /* Ζ to Z */
1558    { 0x0397,  0x49, 0x00, 0x00, 0x00 },  /* Η to I */
1559    { 0x0398,  0x54, 0x68, 0x00, 0x00 },  /* Θ to Th */
1560    { 0x0399,  0x49, 0x00, 0x00, 0x00 },  /* Ι to I */
1561    { 0x039A,  0x4b, 0x00, 0x00, 0x00 },  /* Κ to K */
1562    { 0x039B,  0x4c, 0x00, 0x00, 0x00 },  /* Λ to L */
1563    { 0x039C,  0x4d, 0x00, 0x00, 0x00 },  /* Μ to M */
1564    { 0x039D,  0x4e, 0x00, 0x00, 0x00 },  /* Ν to N */
1565    { 0x039E,  0x58, 0x00, 0x00, 0x00 },  /* Ξ to X */
1566    { 0x039F,  0x4f, 0x00, 0x00, 0x00 },  /* Ο to O */
1567    { 0x03A0,  0x50, 0x00, 0x00, 0x00 },  /* Π to P */
1568    { 0x03A1,  0x52, 0x00, 0x00, 0x00 },  /* Ρ to R */
1569    { 0x03A3,  0x53, 0x00, 0x00, 0x00 },  /* Σ to S */
1570    { 0x03A4,  0x54, 0x00, 0x00, 0x00 },  /* Τ to T */
1571    { 0x03A5,  0x59, 0x00, 0x00, 0x00 },  /* Υ to Y */
1572    { 0x03A6,  0x46, 0x00, 0x00, 0x00 },  /* Φ to F */
1573    { 0x03A7,  0x43, 0x68, 0x00, 0x00 },  /* Χ to Ch */
1574    { 0x03A8,  0x50, 0x73, 0x00, 0x00 },  /* Ψ to Ps */
1575    { 0x03A9,  0x4f, 0x00, 0x00, 0x00 },  /* Ω to O */
1576    { 0x03AA,  0x49, 0x00, 0x00, 0x00 },  /* Ϊ to I */
1577    { 0x03AB,  0x59, 0x00, 0x00, 0x00 },  /* Ϋ to Y */
1578    { 0x03AC,  0x61, 0x00, 0x00, 0x00 },  /* ά to a */
1579    { 0x03AD,  0x65, 0x00, 0x00, 0x00 },  /* έ to e */
1580    { 0x03AE,  0x69, 0x00, 0x00, 0x00 },  /* ή to i */
1581    { 0x03AF,  0x69, 0x00, 0x00, 0x00 },  /* ί to i */
1582    { 0x03B1,  0x61, 0x00, 0x00, 0x00 },  /* α to a */
1583    { 0x03B2,  0x62, 0x00, 0x00, 0x00 },  /* β to b */
1584    { 0x03B3,  0x67, 0x00, 0x00, 0x00 },  /* γ to g */
1585    { 0x03B4,  0x64, 0x00, 0x00, 0x00 },  /* δ to d */
1586    { 0x03B5,  0x65, 0x00, 0x00, 0x00 },  /* ε to e */
1587    { 0x03B6,  0x7a, 0x00, 0x00, 0x00 },  /* ζ to z */
1588    { 0x03B7,  0x69, 0x00, 0x00, 0x00 },  /* η to i */
1589    { 0x03B8,  0x74, 0x68, 0x00, 0x00 },  /* θ to th */
1590    { 0x03B9,  0x69, 0x00, 0x00, 0x00 },  /* ι to i */
1591    { 0x03BA,  0x6b, 0x00, 0x00, 0x00 },  /* κ to k */
1592    { 0x03BB,  0x6c, 0x00, 0x00, 0x00 },  /* λ to l */
1593    { 0x03BC,  0x6d, 0x00, 0x00, 0x00 },  /* μ to m */
1594    { 0x03BD,  0x6e, 0x00, 0x00, 0x00 },  /* ν to n */
1595    { 0x03BE,  0x78, 0x00, 0x00, 0x00 },  /* ξ to x */
1596    { 0x03BF,  0x6f, 0x00, 0x00, 0x00 },  /* ο to o */
1597    { 0x03C0,  0x70, 0x00, 0x00, 0x00 },  /* π to p */
1598    { 0x03C1,  0x72, 0x00, 0x00, 0x00 },  /* ρ to r */
1599    { 0x03C3,  0x73, 0x00, 0x00, 0x00 },  /* σ to s */
1600    { 0x03C4,  0x74, 0x00, 0x00, 0x00 },  /* τ to t */
1601    { 0x03C5,  0x79, 0x00, 0x00, 0x00 },  /* υ to y */
1602    { 0x03C6,  0x66, 0x00, 0x00, 0x00 },  /* φ to f */
1603    { 0x03C7,  0x63, 0x68, 0x00, 0x00 },  /* χ to ch */
1604    { 0x03C8,  0x70, 0x73, 0x00, 0x00 },  /* ψ to ps */
1605    { 0x03C9,  0x6f, 0x00, 0x00, 0x00 },  /* ω to o */
1606    { 0x03CA,  0x69, 0x00, 0x00, 0x00 },  /* ϊ to i */
1607    { 0x03CB,  0x79, 0x00, 0x00, 0x00 },  /* ϋ to y */
1608    { 0x03CC,  0x6f, 0x00, 0x00, 0x00 },  /* ό to o */
1609    { 0x03CD,  0x79, 0x00, 0x00, 0x00 },  /* ύ to y */
1610    { 0x03CE,  0x69, 0x00, 0x00, 0x00 },  /* ώ to i */
1611    { 0x0400,  0x45, 0x00, 0x00, 0x00 },  /* Ѐ to E */
1612    { 0x0401,  0x45, 0x00, 0x00, 0x00 },  /* Ё to E */
1613    { 0x0402,  0x44, 0x00, 0x00, 0x00 },  /* Ђ to D */
1614    { 0x0403,  0x47, 0x00, 0x00, 0x00 },  /* Ѓ to G */
1615    { 0x0404,  0x45, 0x00, 0x00, 0x00 },  /* Є to E */
1616    { 0x0405,  0x5a, 0x00, 0x00, 0x00 },  /* Ѕ to Z */
1617    { 0x0406,  0x49, 0x00, 0x00, 0x00 },  /* І to I */
1618    { 0x0407,  0x49, 0x00, 0x00, 0x00 },  /* Ї to I */
1619    { 0x0408,  0x4a, 0x00, 0x00, 0x00 },  /* Ј to J */
1620    { 0x0409,  0x49, 0x00, 0x00, 0x00 },  /* Љ to I */
1621    { 0x040A,  0x4e, 0x00, 0x00, 0x00 },  /* Њ to N */
1622    { 0x040B,  0x44, 0x00, 0x00, 0x00 },  /* Ћ to D */
1623    { 0x040C,  0x4b, 0x00, 0x00, 0x00 },  /* Ќ to K */
1624    { 0x040D,  0x49, 0x00, 0x00, 0x00 },  /* Ѝ to I */
1625    { 0x040E,  0x55, 0x00, 0x00, 0x00 },  /* Ў to U */
1626    { 0x040F,  0x44, 0x00, 0x00, 0x00 },  /* Џ to D */
1627    { 0x0410,  0x41, 0x00, 0x00, 0x00 },  /* А to A */
1628    { 0x0411,  0x42, 0x00, 0x00, 0x00 },  /* Б to B */
1629    { 0x0412,  0x56, 0x00, 0x00, 0x00 },  /* В to V */
1630    { 0x0413,  0x47, 0x00, 0x00, 0x00 },  /* Г to G */
1631    { 0x0414,  0x44, 0x00, 0x00, 0x00 },  /* Д to D */
1632    { 0x0415,  0x45, 0x00, 0x00, 0x00 },  /* Е to E */
1633    { 0x0416,  0x5a, 0x68, 0x00, 0x00 },  /* Ж to Zh */
1634    { 0x0417,  0x5a, 0x00, 0x00, 0x00 },  /* З to Z */
1635    { 0x0418,  0x49, 0x00, 0x00, 0x00 },  /* И to I */
1636    { 0x0419,  0x49, 0x00, 0x00, 0x00 },  /* Й to I */
1637    { 0x041A,  0x4b, 0x00, 0x00, 0x00 },  /* К to K */
1638    { 0x041B,  0x4c, 0x00, 0x00, 0x00 },  /* Л to L */
1639    { 0x041C,  0x4d, 0x00, 0x00, 0x00 },  /* М to M */
1640    { 0x041D,  0x4e, 0x00, 0x00, 0x00 },  /* Н to N */
1641    { 0x041E,  0x4f, 0x00, 0x00, 0x00 },  /* О to O */
1642    { 0x041F,  0x50, 0x00, 0x00, 0x00 },  /* П to P */
1643    { 0x0420,  0x52, 0x00, 0x00, 0x00 },  /* Р to R */
1644    { 0x0421,  0x53, 0x00, 0x00, 0x00 },  /* С to S */
1645    { 0x0422,  0x54, 0x00, 0x00, 0x00 },  /* Т to T */
1646    { 0x0423,  0x55, 0x00, 0x00, 0x00 },  /* У to U */
1647    { 0x0424,  0x46, 0x00, 0x00, 0x00 },  /* Ф to F */
1648    { 0x0425,  0x4b, 0x68, 0x00, 0x00 },  /* Х to Kh */
1649    { 0x0426,  0x54, 0x63, 0x00, 0x00 },  /* Ц to Tc */
1650    { 0x0427,  0x43, 0x68, 0x00, 0x00 },  /* Ч to Ch */
1651    { 0x0428,  0x53, 0x68, 0x00, 0x00 },  /* Ш to Sh */
1652    { 0x0429,  0x53, 0x68, 0x63, 0x68 },  /* Щ to Shch */
1653    { 0x042A,  0x61, 0x00, 0x00, 0x00 },  /*  to A */
1654    { 0x042B,  0x59, 0x00, 0x00, 0x00 },  /* Ы to Y */
1655    { 0x042C,  0x59, 0x00, 0x00, 0x00 },  /*  to Y */
1656    { 0x042D,  0x45, 0x00, 0x00, 0x00 },  /* Э to E */
1657    { 0x042E,  0x49, 0x75, 0x00, 0x00 },  /* Ю to Iu */
1658    { 0x042F,  0x49, 0x61, 0x00, 0x00 },  /* Я to Ia */
1659    { 0x0430,  0x61, 0x00, 0x00, 0x00 },  /* а to a */
1660    { 0x0431,  0x62, 0x00, 0x00, 0x00 },  /* б to b */
1661    { 0x0432,  0x76, 0x00, 0x00, 0x00 },  /* в to v */
1662    { 0x0433,  0x67, 0x00, 0x00, 0x00 },  /* г to g */
1663    { 0x0434,  0x64, 0x00, 0x00, 0x00 },  /* д to d */
1664    { 0x0435,  0x65, 0x00, 0x00, 0x00 },  /* е to e */
1665    { 0x0436,  0x7a, 0x68, 0x00, 0x00 },  /* ж to zh */
1666    { 0x0437,  0x7a, 0x00, 0x00, 0x00 },  /* з to z */
1667    { 0x0438,  0x69, 0x00, 0x00, 0x00 },  /* и to i */
1668    { 0x0439,  0x69, 0x00, 0x00, 0x00 },  /* й to i */
1669    { 0x043A,  0x6b, 0x00, 0x00, 0x00 },  /* к to k */
1670    { 0x043B,  0x6c, 0x00, 0x00, 0x00 },  /* л to l */
1671    { 0x043C,  0x6d, 0x00, 0x00, 0x00 },  /* м to m */
1672    { 0x043D,  0x6e, 0x00, 0x00, 0x00 },  /* н to n */
1673    { 0x043E,  0x6f, 0x00, 0x00, 0x00 },  /* о to o */
1674    { 0x043F,  0x70, 0x00, 0x00, 0x00 },  /* п to p */
1675    { 0x0440,  0x72, 0x00, 0x00, 0x00 },  /* р to r */
1676    { 0x0441,  0x73, 0x00, 0x00, 0x00 },  /* с to s */
1677    { 0x0442,  0x74, 0x00, 0x00, 0x00 },  /* т to t */
1678    { 0x0443,  0x75, 0x00, 0x00, 0x00 },  /* у to u */
1679    { 0x0444,  0x66, 0x00, 0x00, 0x00 },  /* ф to f */
1680    { 0x0445,  0x6b, 0x68, 0x00, 0x00 },  /* х to kh */
1681    { 0x0446,  0x74, 0x63, 0x00, 0x00 },  /* ц to tc */
1682    { 0x0447,  0x63, 0x68, 0x00, 0x00 },  /* ч to ch */
1683    { 0x0448,  0x73, 0x68, 0x00, 0x00 },  /* ш to sh */
1684    { 0x0449,  0x73, 0x68, 0x63, 0x68 },  /* щ to shch */
1685    { 0x044A,  0x61, 0x00, 0x00, 0x00 },  /*  to a */
1686    { 0x044B,  0x79, 0x00, 0x00, 0x00 },  /* ы to y */
1687    { 0x044C,  0x79, 0x00, 0x00, 0x00 },  /*  to y */
1688    { 0x044D,  0x65, 0x00, 0x00, 0x00 },  /* э to e */
1689    { 0x044E,  0x69, 0x75, 0x00, 0x00 },  /* ю to iu */
1690    { 0x044F,  0x69, 0x61, 0x00, 0x00 },  /* я to ia */
1691    { 0x0450,  0x65, 0x00, 0x00, 0x00 },  /* ѐ to e */
1692    { 0x0451,  0x65, 0x00, 0x00, 0x00 },  /* ё to e */
1693    { 0x0452,  0x64, 0x00, 0x00, 0x00 },  /* ђ to d */
1694    { 0x0453,  0x67, 0x00, 0x00, 0x00 },  /* ѓ to g */
1695    { 0x0454,  0x65, 0x00, 0x00, 0x00 },  /* є to e */
1696    { 0x0455,  0x7a, 0x00, 0x00, 0x00 },  /* ѕ to z */
1697    { 0x0456,  0x69, 0x00, 0x00, 0x00 },  /* і to i */
1698    { 0x0457,  0x69, 0x00, 0x00, 0x00 },  /* ї to i */
1699    { 0x0458,  0x6a, 0x00, 0x00, 0x00 },  /* ј to j */
1700    { 0x0459,  0x69, 0x00, 0x00, 0x00 },  /* љ to i */
1701    { 0x045A,  0x6e, 0x00, 0x00, 0x00 },  /* њ to n */
1702    { 0x045B,  0x64, 0x00, 0x00, 0x00 },  /* ћ to d */
1703    { 0x045C,  0x6b, 0x00, 0x00, 0x00 },  /* ќ to k */
1704    { 0x045D,  0x69, 0x00, 0x00, 0x00 },  /* ѝ to i */
1705    { 0x045E,  0x75, 0x00, 0x00, 0x00 },  /* ў to u */
1706    { 0x045F,  0x64, 0x00, 0x00, 0x00 },  /* џ to d */
1707    { 0x1E02,  0x42, 0x00, 0x00, 0x00 },  /* Ḃ to B */
1708    { 0x1E03,  0x62, 0x00, 0x00, 0x00 },  /* ḃ to b */
1709    { 0x1E0A,  0x44, 0x00, 0x00, 0x00 },  /* Ḋ to D */
1710    { 0x1E0B,  0x64, 0x00, 0x00, 0x00 },  /* ḋ to d */
1711    { 0x1E1E,  0x46, 0x00, 0x00, 0x00 },  /* Ḟ to F */
1712    { 0x1E1F,  0x66, 0x00, 0x00, 0x00 },  /* ḟ to f */
1713    { 0x1E40,  0x4D, 0x00, 0x00, 0x00 },  /* Ṁ to M */
1714    { 0x1E41,  0x6D, 0x00, 0x00, 0x00 },  /* ṁ to m */
1715    { 0x1E56,  0x50, 0x00, 0x00, 0x00 },  /* Ṗ to P */
1716    { 0x1E57,  0x70, 0x00, 0x00, 0x00 },  /* ṗ to p */
1717    { 0x1E60,  0x53, 0x00, 0x00, 0x00 },  /* Ṡ to S */
1718    { 0x1E61,  0x73, 0x00, 0x00, 0x00 },  /* ṡ to s */
1719    { 0x1E6A,  0x54, 0x00, 0x00, 0x00 },  /* Ṫ to T */
1720    { 0x1E6B,  0x74, 0x00, 0x00, 0x00 },  /* ṫ to t */
1721    { 0x1E80,  0x57, 0x00, 0x00, 0x00 },  /* Ẁ to W */
1722    { 0x1E81,  0x77, 0x00, 0x00, 0x00 },  /* ẁ to w */
1723    { 0x1E82,  0x57, 0x00, 0x00, 0x00 },  /* Ẃ to W */
1724    { 0x1E83,  0x77, 0x00, 0x00, 0x00 },  /* ẃ to w */
1725    { 0x1E84,  0x57, 0x00, 0x00, 0x00 },  /* Ẅ to W */
1726    { 0x1E85,  0x77, 0x00, 0x00, 0x00 },  /* ẅ to w */
1727    { 0x1EF2,  0x59, 0x00, 0x00, 0x00 },  /* Ỳ to Y */
1728    { 0x1EF3,  0x79, 0x00, 0x00, 0x00 },  /* ỳ to y */
1729    { 0xFB00,  0x66, 0x66, 0x00, 0x00 },  /* ff to ff */
1730    { 0xFB01,  0x66, 0x69, 0x00, 0x00 },  /* fi to fi */
1731    { 0xFB02,  0x66, 0x6C, 0x00, 0x00 },  /* fl to fl */
1732    { 0xFB05,  0x73, 0x74, 0x00, 0x00 },  /* ſt to st */
1733    { 0xFB06,  0x73, 0x74, 0x00, 0x00 },  /* st to st */
1734};
1735
1736static const Transliteration *spellfixFindTranslit(int c, int *pxTop)
1737{
1738    *pxTop = (sizeof(translit)/sizeof(translit[0])) - 1;
1739    return translit;
1740}
1741
1742/*
1743** Convert the input string from UTF-8 into pure ASCII by converting
1744** all non-ASCII characters to some combination of characters in the
1745** ASCII subset.
1746**
1747** The returned string might contain more characters than the input.
1748**
1749** Space to hold the returned string comes from sqlite3_malloc() and
1750** should be freed by the caller.
1751*/
1752static unsigned char *transliterate(const unsigned char *zIn, int nIn)
1753{
1754#ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS
1755    unsigned char *zOut = sqlite3_malloc64( nIn*5 + 1 );
1756#else
1757    unsigned char *zOut = sqlite3_malloc64( nIn*4 + 1 );
1758#endif
1759    int c, sz, nOut;
1760    if( zOut==0 ) return 0;
1761    nOut = 0;
1762    while( nIn>0 ) {
1763        c = utf8Read(zIn, nIn, &sz);
1764        zIn += sz;
1765        nIn -= sz;
1766        if( c<=127 ) {
1767            zOut[nOut++] = (unsigned char)c;
1768        } else {
1769            int xTop, xBtm, x;
1770            const Transliteration *tbl = spellfixFindTranslit(c, &xTop);
1771            xBtm = 0;
1772            while( xTop>=xBtm ) {
1773                x = (xTop + xBtm)/2;
1774                if( tbl[x].cFrom==c ) {
1775                    zOut[nOut++] = tbl[x].cTo0;
1776                    if( tbl[x].cTo1 ) {
1777                        zOut[nOut++] = tbl[x].cTo1;
1778                        if( tbl[x].cTo2 ) {
1779                            zOut[nOut++] = tbl[x].cTo2;
1780                            if( tbl[x].cTo3 ) {
1781                                zOut[nOut++] = tbl[x].cTo3;
1782#ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS
1783                                if( tbl[x].cTo4 ) {
1784                                    zOut[nOut++] = tbl[x].cTo4;
1785                                }
1786#endif /* SQLITE_SPELLFIX_5BYTE_MAPPINGS */
1787                            }
1788                        }
1789                    }
1790                    c = 0;
1791                    break;
1792                } else if( tbl[x].cFrom>c ) {
1793                    xTop = x-1;
1794                } else {
1795                    xBtm = x+1;
1796                }
1797            }
1798            if( c ) zOut[nOut++] = '?';
1799        }
1800    }
1801    zOut[nOut] = 0;
1802    return zOut;
1803}
1804
1805/*
1806** Return the number of characters in the shortest prefix of the input
1807** string that transliterates to an ASCII string nTrans bytes or longer.
1808** Or, if the transliteration of the input string is less than nTrans
1809** bytes in size, return the number of characters in the input string.
1810*/
1811static int translen_to_charlen(const char *zIn, int nIn, int nTrans)
1812{
1813    int i, c, sz, nOut;
1814    int nChar;
1815
1816    i = nOut = 0;
1817    for(nChar=0; i<nIn && nOut<nTrans; nChar++) {
1818        c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1819        i += sz;
1820
1821        nOut++;
1822        if( c>=128 ) {
1823            int xTop, xBtm, x;
1824            const Transliteration *tbl = spellfixFindTranslit(c, &xTop);
1825            xBtm = 0;
1826            while( xTop>=xBtm ) {
1827                x = (xTop + xBtm)/2;
1828                if( tbl[x].cFrom==c ) {
1829                    if( tbl[x].cTo1 ) {
1830                        nOut++;
1831                        if( tbl[x].cTo2 ) {
1832                            nOut++;
1833                            if( tbl[x].cTo3 ) {
1834                                nOut++;
1835                            }
1836                        }
1837                    }
1838                    break;
1839                } else if( tbl[x].cFrom>c ) {
1840                    xTop = x-1;
1841                } else {
1842                    xBtm = x+1;
1843                }
1844            }
1845        }
1846    }
1847
1848    return nChar;
1849}
1850
1851/*
1852**    spellfix1_translit(X)
1853**
1854** Convert a string that contains non-ASCII Roman characters into
1855** pure ASCII.
1856*/
1857static void transliterateSqlFunc(
1858    sqlite3_context *context,
1859    int argc,
1860    sqlite3_value **argv
1861)
1862{
1863    const unsigned char *zIn = sqlite3_value_text(argv[0]);
1864    int nIn = sqlite3_value_bytes(argv[0]);
1865    unsigned char *zOut = transliterate(zIn, nIn);
1866    if( zOut==0 ) {
1867        sqlite3_result_error_nomem(context);
1868    } else {
1869        sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
1870    }
1871}
1872
1873/*
1874**    spellfix1_scriptcode(X)
1875**
1876** Try to determine the dominant script used by the word X and return
1877** its ISO 15924 numeric code.
1878**
1879** The current implementation only understands the following scripts:
1880**
1881**    215  (Latin)
1882**    220  (Cyrillic)
1883**    200  (Greek)
1884**
1885** This routine will return 998 if the input X contains characters from
1886** two or more of the above scripts or 999 if X contains no characters
1887** from any of the above scripts.
1888*/
1889static void scriptCodeSqlFunc(
1890    sqlite3_context *context,
1891    int argc,
1892    sqlite3_value **argv
1893)
1894{
1895    const unsigned char *zIn = sqlite3_value_text(argv[0]);
1896    int nIn = sqlite3_value_bytes(argv[0]);
1897    int c, sz;
1898    int scriptMask = 0;
1899    int res;
1900    int seenDigit = 0;
1901# define SCRIPT_LATIN       0x0001
1902# define SCRIPT_CYRILLIC    0x0002
1903# define SCRIPT_GREEK       0x0004
1904# define SCRIPT_HEBREW      0x0008
1905# define SCRIPT_ARABIC      0x0010
1906
1907    while( nIn>0 ) {
1908        c = utf8Read(zIn, nIn, &sz);
1909        zIn += sz;
1910        nIn -= sz;
1911        if( c<0x02af ) {
1912            if( c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT ) {
1913                scriptMask |= SCRIPT_LATIN;
1914            } else if( c>='0' && c<='9' ) {
1915                seenDigit = 1;
1916            }
1917        } else if( c>=0x0400 && c<=0x04ff ) {
1918            scriptMask |= SCRIPT_CYRILLIC;
1919        } else if( c>=0x0386 && c<=0x03ce ) {
1920            scriptMask |= SCRIPT_GREEK;
1921        } else if( c>=0x0590 && c<=0x05ff ) {
1922            scriptMask |= SCRIPT_HEBREW;
1923        } else if( c>=0x0600 && c<=0x06ff ) {
1924            scriptMask |= SCRIPT_ARABIC;
1925        }
1926    }
1927    if( scriptMask==0 && seenDigit ) scriptMask = SCRIPT_LATIN;
1928    switch( scriptMask ) {
1929    case 0:
1930        res = 999;
1931        break;
1932    case SCRIPT_LATIN:
1933        res = 215;
1934        break;
1935    case SCRIPT_CYRILLIC:
1936        res = 220;
1937        break;
1938    case SCRIPT_GREEK:
1939        res = 200;
1940        break;
1941    case SCRIPT_HEBREW:
1942        res = 125;
1943        break;
1944    case SCRIPT_ARABIC:
1945        res = 160;
1946        break;
1947    default:
1948        res = 998;
1949        break;
1950    }
1951    sqlite3_result_int(context, res);
1952}
1953
1954/* End transliterate
1955******************************************************************************
1956******************************************************************************
1957** Begin spellfix1 virtual table.
1958*/
1959
1960/* Maximum length of a phonehash used for querying the shadow table */
1961#define SPELLFIX_MX_HASH  32
1962
1963/* Maximum number of hash strings to examine per query */
1964#define SPELLFIX_MX_RUN   1
1965
1966typedef struct spellfix1_vtab spellfix1_vtab;
1967typedef struct spellfix1_cursor spellfix1_cursor;
1968
1969/* Fuzzy-search virtual table object */
1970struct spellfix1_vtab {
1971    sqlite3_vtab base;         /* Base class - must be first */
1972    sqlite3 *db;               /* Database connection */
1973    char *zDbName;             /* Name of database holding this table */
1974    char *zTableName;          /* Name of the virtual table */
1975    char *zCostTable;          /* Table holding edit-distance cost numbers */
1976    EditDist3Config *pConfig3; /* Parsed edit distance costs */
1977};
1978
1979/* Fuzzy-search cursor object */
1980struct spellfix1_cursor {
1981    sqlite3_vtab_cursor base;    /* Base class - must be first */
1982    spellfix1_vtab *pVTab;       /* The table to which this cursor belongs */
1983    char *zPattern;              /* rhs of MATCH clause */
1984    int idxNum;                  /* idxNum value passed to xFilter() */
1985    int nRow;                    /* Number of rows of content */
1986    int nAlloc;                  /* Number of allocated rows */
1987    int iRow;                    /* Current row of content */
1988    int iLang;                   /* Value of the langid= constraint */
1989    int iTop;                    /* Value of the top= constraint */
1990    int iScope;                  /* Value of the scope= constraint */
1991    int nSearch;                 /* Number of vocabulary items checked */
1992    sqlite3_stmt *pFullScan;     /* Shadow query for a full table scan */
1993    struct spellfix1_row {       /* For each row of content */
1994        sqlite3_int64 iRowid;         /* Rowid for this row */
1995        char *zWord;                  /* Text for this row */
1996        int iRank;                    /* Rank for this row */
1997        int iDistance;                /* Distance from pattern for this row */
1998        int iScore;                   /* Score for sorting */
1999        int iMatchlen;                /* Value of matchlen column (or -1) */
2000        char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */
2001    } *a;
2002};
2003
2004/*
2005** Construct one or more SQL statements from the format string given
2006** and then evaluate those statements. The success code is written
2007** into *pRc.
2008**
2009** If *pRc is initially non-zero then this routine is a no-op.
2010*/
2011static void spellfix1DbExec(
2012    int *pRc,              /* Success code */
2013    sqlite3 *db,           /* Database in which to run SQL */
2014    const char *zFormat,   /* Format string for SQL */
2015    ...                    /* Arguments to the format string */
2016)
2017{
2018    va_list ap;
2019    char *zSql;
2020    if( *pRc ) return;
2021    va_start(ap, zFormat);
2022    zSql = sqlite3_vmprintf(zFormat, ap);
2023    va_end(ap);
2024    if( zSql==0 ) {
2025        *pRc = SQLITE_NOMEM;
2026    } else {
2027        *pRc = sqlite3_exec(db, zSql, 0, 0, 0);
2028        sqlite3_free(zSql);
2029    }
2030}
2031
2032/*
2033** xDisconnect/xDestroy method for the fuzzy-search module.
2034*/
2035static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab)
2036{
2037    spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2038    int rc = SQLITE_OK;
2039    if( isDestroy ) {
2040        sqlite3 *db = p->db;
2041        spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"",
2042                        p->zDbName, p->zTableName);
2043    }
2044    if( rc==SQLITE_OK ) {
2045        sqlite3_free(p->zTableName);
2046        editDist3ConfigDelete(p->pConfig3);
2047        sqlite3_free(p->zCostTable);
2048        sqlite3_free(p);
2049    }
2050    return rc;
2051}
2052static int spellfix1Disconnect(sqlite3_vtab *pVTab)
2053{
2054    return spellfix1Uninit(0, pVTab);
2055}
2056static int spellfix1Destroy(sqlite3_vtab *pVTab)
2057{
2058    return spellfix1Uninit(1, pVTab);
2059}
2060
2061/*
2062** Make a copy of a string.  Remove leading and trailing whitespace
2063** and dequote it.
2064*/
2065static char *spellfix1Dequote(const char *zIn)
2066{
2067    char *zOut;
2068    int i, j;
2069    char c;
2070    while( isspace((unsigned char)zIn[0]) ) zIn++;
2071    zOut = sqlite3_mprintf("%s", zIn);
2072    if( zOut==0 ) return 0;
2073    i = (int)strlen(zOut);
2074#if 0  /* The parser will never leave spaces at the end */
2075    while( i>0 && isspace(zOut[i-1]) ) {
2076        i--;
2077    }
2078#endif
2079    zOut[i] = 0;
2080    c = zOut[0];
2081    if( c=='\'' || c=='"' ) {
2082        for(i=1, j=0; ALWAYS(zOut[i]); i++) {
2083            zOut[j++] = zOut[i];
2084            if( zOut[i]==c ) {
2085                if( zOut[i+1]==c ) {
2086                    i++;
2087                } else {
2088                    zOut[j-1] = 0;
2089                    break;
2090                }
2091            }
2092        }
2093    }
2094    return zOut;
2095}
2096
2097/*
2098** xConnect/xCreate method for the spellfix1 module. Arguments are:
2099**
2100**   argv[0]   -> module name  ("spellfix1")
2101**   argv[1]   -> database name
2102**   argv[2]   -> table name
2103**   argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter)
2104*/
2105static int spellfix1Init(
2106    int isCreate,
2107    sqlite3 *db,
2108    void *pAux,
2109    int argc, const char *const*argv,
2110    sqlite3_vtab **ppVTab,
2111    char **pzErr
2112)
2113{
2114    spellfix1_vtab *pNew = 0;
2115    /* const char *zModule = argv[0]; // not used */
2116    const char *zDbName = argv[1];
2117    const char *zTableName = argv[2];
2118    int nDbName;
2119    int rc = SQLITE_OK;
2120    int i;
2121
2122    nDbName = (int)strlen(zDbName);
2123    pNew = sqlite3_malloc64( sizeof(*pNew) + nDbName + 1);
2124    if( pNew==0 ) {
2125        rc = SQLITE_NOMEM;
2126    } else {
2127        memset(pNew, 0, sizeof(*pNew));
2128        pNew->zDbName = (char*)&pNew[1];
2129        memcpy(pNew->zDbName, zDbName, nDbName+1);
2130        pNew->zTableName = sqlite3_mprintf("%s", zTableName);
2131        pNew->db = db;
2132        if( pNew->zTableName==0 ) {
2133            rc = SQLITE_NOMEM;
2134        } else {
2135            sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS);
2136            rc = sqlite3_declare_vtab(db,
2137                                      "CREATE TABLE x(word,rank,distance,langid, "
2138                                      "score, matchlen, phonehash HIDDEN, "
2139                                      "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, "
2140                                      "soundslike HIDDEN, command HIDDEN)"
2141                                     );
2142#define SPELLFIX_COL_WORD            0
2143#define SPELLFIX_COL_RANK            1
2144#define SPELLFIX_COL_DISTANCE        2
2145#define SPELLFIX_COL_LANGID          3
2146#define SPELLFIX_COL_SCORE           4
2147#define SPELLFIX_COL_MATCHLEN        5
2148#define SPELLFIX_COL_PHONEHASH       6
2149#define SPELLFIX_COL_TOP             7
2150#define SPELLFIX_COL_SCOPE           8
2151#define SPELLFIX_COL_SRCHCNT         9
2152#define SPELLFIX_COL_SOUNDSLIKE     10
2153#define SPELLFIX_COL_COMMAND        11
2154        }
2155        if( rc==SQLITE_OK && isCreate ) {
2156            spellfix1DbExec(&rc, db,
2157                            "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n"
2158                            "  id INTEGER PRIMARY KEY,\n"
2159                            "  rank INT,\n"
2160                            "  langid INT,\n"
2161                            "  word TEXT,\n"
2162                            "  k1 TEXT,\n"
2163                            "  k2 TEXT\n"
2164                            ");\n",
2165                            zDbName, zTableName
2166                           );
2167            spellfix1DbExec(&rc, db,
2168                            "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_vocab_index_langid_k2\" "
2169                            "ON \"%w_vocab\"(langid,k2);",
2170                            zDbName, zTableName, zTableName
2171                           );
2172        }
2173        for(i=3; rc==SQLITE_OK && i<argc; i++) {
2174            if( strncmp(argv[i],"edit_cost_table=",16)==0 && pNew->zCostTable==0 ) {
2175                pNew->zCostTable = spellfix1Dequote(&argv[i][16]);
2176                if( pNew->zCostTable==0 ) rc = SQLITE_NOMEM;
2177                continue;
2178            }
2179            *pzErr = sqlite3_mprintf("bad argument to spellfix1(): \"%s\"", argv[i]);
2180            rc = SQLITE_ERROR;
2181        }
2182    }
2183
2184    if( rc && pNew ) {
2185        *ppVTab = 0;
2186        spellfix1Uninit(0, &pNew->base);
2187    } else {
2188        *ppVTab = (sqlite3_vtab *)pNew;
2189    }
2190    return rc;
2191}
2192
2193/*
2194** The xConnect and xCreate methods
2195*/
2196static int spellfix1Connect(
2197    sqlite3 *db,
2198    void *pAux,
2199    int argc, const char *const*argv,
2200    sqlite3_vtab **ppVTab,
2201    char **pzErr
2202)
2203{
2204    return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr);
2205}
2206static int spellfix1Create(
2207    sqlite3 *db,
2208    void *pAux,
2209    int argc, const char *const*argv,
2210    sqlite3_vtab **ppVTab,
2211    char **pzErr
2212)
2213{
2214    return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr);
2215}
2216
2217/*
2218** Clear all of the content from a cursor.
2219*/
2220static void spellfix1ResetCursor(spellfix1_cursor *pCur)
2221{
2222    int i;
2223    for(i=0; i<pCur->nRow; i++) {
2224        sqlite3_free(pCur->a[i].zWord);
2225    }
2226    pCur->nRow = 0;
2227    pCur->iRow = 0;
2228    pCur->nSearch = 0;
2229    if( pCur->pFullScan ) {
2230        sqlite3_finalize(pCur->pFullScan);
2231        pCur->pFullScan = 0;
2232    }
2233}
2234
2235/*
2236** Resize the cursor to hold up to N rows of content
2237*/
2238static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N)
2239{
2240    struct spellfix1_row *aNew;
2241    assert( N>=pCur->nRow );
2242    aNew = sqlite3_realloc64(pCur->a, sizeof(pCur->a[0])*N);
2243    if( aNew==0 && N>0 ) {
2244        spellfix1ResetCursor(pCur);
2245        sqlite3_free(pCur->a);
2246        pCur->nAlloc = 0;
2247        pCur->a = 0;
2248    } else {
2249        pCur->nAlloc = N;
2250        pCur->a = aNew;
2251    }
2252}
2253
2254/*
2255** Close a fuzzy-search cursor.
2256*/
2257static int spellfix1Close(sqlite3_vtab_cursor *cur)
2258{
2259    spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2260    spellfix1ResetCursor(pCur);
2261    spellfix1ResizeCursor(pCur, 0);
2262    sqlite3_free(pCur->zPattern);
2263    sqlite3_free(pCur);
2264    return SQLITE_OK;
2265}
2266
2267#define SPELLFIX_IDXNUM_MATCH  0x01         /* word MATCH $str */
2268#define SPELLFIX_IDXNUM_LANGID 0x02         /* langid == $langid */
2269#define SPELLFIX_IDXNUM_TOP    0x04         /* top = $top */
2270#define SPELLFIX_IDXNUM_SCOPE  0x08         /* scope = $scope */
2271#define SPELLFIX_IDXNUM_DISTLT 0x10         /* distance < $distance */
2272#define SPELLFIX_IDXNUM_DISTLE 0x20         /* distance <= $distance */
2273#define SPELLFIX_IDXNUM_ROWID  0x40         /* rowid = $rowid */
2274#define SPELLFIX_IDXNUM_DIST   (0x10|0x20)  /* DISTLT and DISTLE */
2275
2276/*
2277**
2278** The plan number is a bitmask of the SPELLFIX_IDXNUM_* values defined
2279** above.
2280**
2281** filter.argv[*] values contains $str, $langid, $top, $scope and $rowid
2282** if specified and in that order.
2283*/
2284static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo)
2285{
2286    int iPlan = 0;
2287    int iLangTerm = -1;
2288    int iTopTerm = -1;
2289    int iScopeTerm = -1;
2290    int iDistTerm = -1;
2291    int iRowidTerm = -1;
2292    int i;
2293    const struct sqlite3_index_constraint *pConstraint;
2294    pConstraint = pIdxInfo->aConstraint;
2295    for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++) {
2296        if( pConstraint->usable==0 ) continue;
2297
2298        /* Terms of the form:  word MATCH $str */
2299        if( (iPlan & SPELLFIX_IDXNUM_MATCH)==0
2300            && pConstraint->iColumn==SPELLFIX_COL_WORD
2301            && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
2302          ) {
2303            iPlan |= SPELLFIX_IDXNUM_MATCH;
2304            pIdxInfo->aConstraintUsage[i].argvIndex = 1;
2305            pIdxInfo->aConstraintUsage[i].omit = 1;
2306        }
2307
2308        /* Terms of the form:  langid = $langid  */
2309        if( (iPlan & SPELLFIX_IDXNUM_LANGID)==0
2310            && pConstraint->iColumn==SPELLFIX_COL_LANGID
2311            && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2312          ) {
2313            iPlan |= SPELLFIX_IDXNUM_LANGID;
2314            iLangTerm = i;
2315        }
2316
2317        /* Terms of the form:  top = $top */
2318        if( (iPlan & SPELLFIX_IDXNUM_TOP)==0
2319            && pConstraint->iColumn==SPELLFIX_COL_TOP
2320            && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2321          ) {
2322            iPlan |= SPELLFIX_IDXNUM_TOP;
2323            iTopTerm = i;
2324        }
2325
2326        /* Terms of the form:  scope = $scope */
2327        if( (iPlan & SPELLFIX_IDXNUM_SCOPE)==0
2328            && pConstraint->iColumn==SPELLFIX_COL_SCOPE
2329            && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2330          ) {
2331            iPlan |= SPELLFIX_IDXNUM_SCOPE;
2332            iScopeTerm = i;
2333        }
2334
2335        /* Terms of the form:  distance < $dist or distance <= $dist */
2336        if( (iPlan & SPELLFIX_IDXNUM_DIST)==0
2337            && pConstraint->iColumn==SPELLFIX_COL_DISTANCE
2338            && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
2339                || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
2340          ) {
2341            if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ) {
2342                iPlan |= SPELLFIX_IDXNUM_DISTLT;
2343            } else {
2344                iPlan |= SPELLFIX_IDXNUM_DISTLE;
2345            }
2346            iDistTerm = i;
2347        }
2348
2349        /* Terms of the form:  distance < $dist or distance <= $dist */
2350        if( (iPlan & SPELLFIX_IDXNUM_ROWID)==0
2351            && pConstraint->iColumn<0
2352            && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2353          ) {
2354            iPlan |= SPELLFIX_IDXNUM_ROWID;
2355            iRowidTerm = i;
2356        }
2357    }
2358    if( iPlan&SPELLFIX_IDXNUM_MATCH ) {
2359        int idx = 2;
2360        pIdxInfo->idxNum = iPlan;
2361        if( pIdxInfo->nOrderBy==1
2362            && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE
2363            && pIdxInfo->aOrderBy[0].desc==0
2364          ) {
2365            pIdxInfo->orderByConsumed = 1;  /* Default order by iScore */
2366        }
2367        if( iPlan&SPELLFIX_IDXNUM_LANGID ) {
2368            pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++;
2369            pIdxInfo->aConstraintUsage[iLangTerm].omit = 1;
2370        }
2371        if( iPlan&SPELLFIX_IDXNUM_TOP ) {
2372            pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++;
2373            pIdxInfo->aConstraintUsage[iTopTerm].omit = 1;
2374        }
2375        if( iPlan&SPELLFIX_IDXNUM_SCOPE ) {
2376            pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++;
2377            pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1;
2378        }
2379        if( iPlan&SPELLFIX_IDXNUM_DIST ) {
2380            pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++;
2381            pIdxInfo->aConstraintUsage[iDistTerm].omit = 1;
2382        }
2383        pIdxInfo->estimatedCost = 1e5;
2384    } else if( (iPlan & SPELLFIX_IDXNUM_ROWID) ) {
2385        pIdxInfo->idxNum = SPELLFIX_IDXNUM_ROWID;
2386        pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1;
2387        pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1;
2388        pIdxInfo->estimatedCost = 5;
2389    } else {
2390        pIdxInfo->idxNum = 0;
2391        pIdxInfo->estimatedCost = 1e50;
2392    }
2393    return SQLITE_OK;
2394}
2395
2396/*
2397** Open a new fuzzy-search cursor.
2398*/
2399static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor)
2400{
2401    spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2402    spellfix1_cursor *pCur;
2403    pCur = sqlite3_malloc64( sizeof(*pCur) );
2404    if( pCur==0 ) return SQLITE_NOMEM;
2405    memset(pCur, 0, sizeof(*pCur));
2406    pCur->pVTab = p;
2407    *ppCursor = &pCur->base;
2408    return SQLITE_OK;
2409}
2410
2411/*
2412** Adjust a distance measurement by the words rank in order to show
2413** preference to common words.
2414*/
2415static int spellfix1Score(int iDistance, int iRank)
2416{
2417    int iLog2;
2418    for(iLog2=0; iRank>0; iLog2++, iRank>>=1) {}
2419    return iDistance + 32 - iLog2;
2420}
2421
2422/*
2423** Compare two spellfix1_row objects for sorting purposes in qsort() such
2424** that they sort in order of increasing distance.
2425*/
2426static int SQLITE_CDECL spellfix1RowCompare(const void *A, const void *B)
2427{
2428    const struct spellfix1_row *a = (const struct spellfix1_row*)A;
2429    const struct spellfix1_row *b = (const struct spellfix1_row*)B;
2430    return a->iScore - b->iScore;
2431}
2432
2433/*
2434** A structure used to pass information from spellfix1FilterForMatch()
2435** into spellfix1RunQuery().
2436*/
2437typedef struct MatchQuery {
2438    spellfix1_cursor *pCur;          /* The cursor being queried */
2439    sqlite3_stmt *pStmt;             /* shadow table query statment */
2440    char zHash[SPELLFIX_MX_HASH];    /* The current phonehash for zPattern */
2441    const char *zPattern;            /* Transliterated input string */
2442    int nPattern;                    /* Length of zPattern */
2443    EditDist3FromString *pMatchStr3; /* Original unicode string */
2444    EditDist3Config *pConfig3;       /* Edit-distance cost coefficients */
2445    const EditDist3Lang *pLang;      /* The selected language coefficients */
2446    int iLang;                       /* The language id */
2447    int iScope;                      /* Default scope */
2448    int iMaxDist;                    /* Maximum allowed edit distance, or -1 */
2449    int rc;                          /* Error code */
2450    int nRun;                  /* Number of prior runs for the same zPattern */
2451    char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH];  /* Prior hashes */
2452} MatchQuery;
2453
2454/*
2455** Run a query looking for the best matches against zPattern using
2456** zHash as the character class seed hash.
2457*/
2458static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery)
2459{
2460    const char *zK1;
2461    const char *zWord;
2462    int iDist;
2463    int iRank;
2464    int iScore;
2465    int iWorst = 0;
2466    int idx;
2467    int idxWorst = -1;
2468    int i;
2469    int iScope = p->iScope;
2470    spellfix1_cursor *pCur = p->pCur;
2471    sqlite3_stmt *pStmt = p->pStmt;
2472    char zHash1[SPELLFIX_MX_HASH];
2473    char zHash2[SPELLFIX_MX_HASH];
2474    char *zClass;
2475    int nClass;
2476    int rc;
2477
2478    if( pCur->a==0 || p->rc ) return;   /* Prior memory allocation failure */
2479    zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery);
2480    if( zClass==0 ) {
2481        p->rc = SQLITE_NOMEM;
2482        return;
2483    }
2484    nClass = (int)strlen(zClass);
2485    if( nClass>SPELLFIX_MX_HASH-2 ) {
2486        nClass = SPELLFIX_MX_HASH-2;
2487        zClass[nClass] = 0;
2488    }
2489    if( nClass<=iScope ) {
2490        if( nClass>2 ) {
2491            iScope = nClass-1;
2492        } else {
2493            iScope = nClass;
2494        }
2495    }
2496    memcpy(zHash1, zClass, iScope);
2497    sqlite3_free(zClass);
2498    zHash1[iScope] = 0;
2499    memcpy(zHash2, zHash1, iScope);
2500    zHash2[iScope] = 'Z';
2501    zHash2[iScope+1] = 0;
2502#if SPELLFIX_MX_RUN>1
2503    for(i=0; i<p->nRun; i++) {
2504        if( strcmp(p->azPrior[i], zHash1)==0 ) return;
2505    }
2506#endif
2507    assert( p->nRun<SPELLFIX_MX_RUN );
2508    memcpy(p->azPrior[p->nRun++], zHash1, iScope+1);
2509    if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM
2510        || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM
2511      ) {
2512        p->rc = SQLITE_NOMEM;
2513        return;
2514    }
2515#if SPELLFIX_MX_RUN>1
2516    for(i=0; i<pCur->nRow; i++) {
2517        if( pCur->a[i].iScore>iWorst ) {
2518            iWorst = pCur->a[i].iScore;
2519            idxWorst = i;
2520        }
2521    }
2522#endif
2523    while( sqlite3_step(pStmt)==SQLITE_ROW ) {
2524        int iMatchlen = -1;
2525        iRank = sqlite3_column_int(pStmt, 2);
2526        if( p->pMatchStr3 ) {
2527            int nWord = sqlite3_column_bytes(pStmt, 1);
2528            zWord = (const char*)sqlite3_column_text(pStmt, 1);
2529            iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen);
2530        } else {
2531            zK1 = (const char*)sqlite3_column_text(pStmt, 3);
2532            if( zK1==0 ) continue;
2533            iDist = editdist1(p->zPattern, zK1, 0);
2534        }
2535        if( iDist<0 ) {
2536            p->rc = SQLITE_NOMEM;
2537            break;
2538        }
2539        pCur->nSearch++;
2540
2541        /* If there is a "distance < $dist" or "distance <= $dist" constraint,
2542        ** check if this row meets it. If not, jump back up to the top of the
2543        ** loop to process the next row. Otherwise, if the row does match the
2544        ** distance constraint, check if the pCur->a[] array is already full.
2545        ** If it is and no explicit "top = ?" constraint was present in the
2546        ** query, grow the array to ensure there is room for the new entry. */
2547        assert( (p->iMaxDist>=0)==((pCur->idxNum & SPELLFIX_IDXNUM_DIST) ? 1 : 0) );
2548        if( p->iMaxDist>=0 ) {
2549            if( iDist>p->iMaxDist ) continue;
2550            if( pCur->nRow>=pCur->nAlloc && (pCur->idxNum & SPELLFIX_IDXNUM_TOP)==0 ) {
2551                spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10);
2552                if( pCur->a==0 ) break;
2553            }
2554        }
2555
2556        iScore = spellfix1Score(iDist,iRank);
2557        if( pCur->nRow<pCur->nAlloc ) {
2558            idx = pCur->nRow;
2559        } else if( iScore<iWorst ) {
2560            idx = idxWorst;
2561            sqlite3_free(pCur->a[idx].zWord);
2562        } else {
2563            continue;
2564        }
2565
2566        pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1));
2567        if( pCur->a[idx].zWord==0 ) {
2568            p->rc = SQLITE_NOMEM;
2569            break;
2570        }
2571        pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0);
2572        pCur->a[idx].iRank = iRank;
2573        pCur->a[idx].iDistance = iDist;
2574        pCur->a[idx].iScore = iScore;
2575        pCur->a[idx].iMatchlen = iMatchlen;
2576        memcpy(pCur->a[idx].zHash, zHash1, iScope+1);
2577        if( pCur->nRow<pCur->nAlloc ) pCur->nRow++;
2578        if( pCur->nRow==pCur->nAlloc ) {
2579            iWorst = pCur->a[0].iScore;
2580            idxWorst = 0;
2581            for(i=1; i<pCur->nRow; i++) {
2582                iScore = pCur->a[i].iScore;
2583                if( iWorst<iScore ) {
2584                    iWorst = iScore;
2585                    idxWorst = i;
2586                }
2587            }
2588        }
2589    }
2590    rc = sqlite3_reset(pStmt);
2591    if( rc ) p->rc = rc;
2592}
2593
2594/*
2595** This version of the xFilter method work if the MATCH term is present
2596** and we are doing a scan.
2597*/
2598static int spellfix1FilterForMatch(
2599    spellfix1_cursor *pCur,
2600    int argc,
2601    sqlite3_value **argv
2602)
2603{
2604    int idxNum = pCur->idxNum;
2605    const unsigned char *zMatchThis;   /* RHS of the MATCH operator */
2606    EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */
2607    char *zPattern;                    /* Transliteration of zMatchThis */
2608    int nPattern;                      /* Length of zPattern */
2609    int iLimit = 20;                   /* Max number of rows of output */
2610    int iScope = 3;                    /* Use this many characters of zClass */
2611    int iLang = 0;                     /* Language code */
2612    char *zSql;                        /* SQL of shadow table query */
2613    sqlite3_stmt *pStmt = 0;           /* Shadow table query */
2614    int rc;                            /* Result code */
2615    int idx = 1;                       /* Next available filter parameter */
2616    spellfix1_vtab *p = pCur->pVTab;   /* The virtual table that owns pCur */
2617    MatchQuery x;                      /* For passing info to RunQuery() */
2618
2619    /* Load the cost table if we have not already done so */
2620    if( p->zCostTable!=0 && p->pConfig3==0 ) {
2621        p->pConfig3 = sqlite3_malloc64( sizeof(p->pConfig3[0]) );
2622        if( p->pConfig3==0 ) return SQLITE_NOMEM;
2623        memset(p->pConfig3, 0, sizeof(p->pConfig3[0]));
2624        rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable);
2625        if( rc ) return rc;
2626    }
2627    memset(&x, 0, sizeof(x));
2628    x.iScope = 3;  /* Default scope if none specified by "WHERE scope=N" */
2629    x.iMaxDist = -1;   /* Maximum allowed edit distance */
2630
2631    if( idxNum&2 ) {
2632        iLang = sqlite3_value_int(argv[idx++]);
2633    }
2634    if( idxNum&4 ) {
2635        iLimit = sqlite3_value_int(argv[idx++]);
2636        if( iLimit<1 ) iLimit = 1;
2637    }
2638    if( idxNum&8 ) {
2639        x.iScope = sqlite3_value_int(argv[idx++]);
2640        if( x.iScope<1 ) x.iScope = 1;
2641        if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2;
2642    }
2643    if( idxNum&(16|32) ) {
2644        x.iMaxDist = sqlite3_value_int(argv[idx++]);
2645        if( idxNum&16 ) x.iMaxDist--;
2646        if( x.iMaxDist<0 ) x.iMaxDist = 0;
2647    }
2648    spellfix1ResetCursor(pCur);
2649    spellfix1ResizeCursor(pCur, iLimit);
2650    zMatchThis = sqlite3_value_text(argv[0]);
2651    if( zMatchThis==0 ) return SQLITE_OK;
2652    if( p->pConfig3 ) {
2653        x.pLang = editDist3FindLang(p->pConfig3, iLang);
2654        pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1);
2655        if( pMatchStr3==0 ) {
2656            x.rc = SQLITE_NOMEM;
2657            goto filter_exit;
2658        }
2659    } else {
2660        x.pLang = 0;
2661    }
2662    zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0]));
2663    sqlite3_free(pCur->zPattern);
2664    pCur->zPattern = zPattern;
2665    if( zPattern==0 ) {
2666        x.rc = SQLITE_NOMEM;
2667        goto filter_exit;
2668    }
2669    nPattern = (int)strlen(zPattern);
2670    if( zPattern[nPattern-1]=='*' ) nPattern--;
2671    zSql = sqlite3_mprintf(
2672               "SELECT id, word, rank, coalesce(k1,word)"
2673               "  FROM \"%w\".\"%w_vocab\""
2674               " WHERE langid=%d AND k2>=?1 AND k2<?2",
2675               p->zDbName, p->zTableName, iLang
2676           );
2677    if( zSql==0 ) {
2678        x.rc = SQLITE_NOMEM;
2679        pStmt = 0;
2680        goto filter_exit;
2681    }
2682    rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
2683    sqlite3_free(zSql);
2684    pCur->iLang = iLang;
2685    x.pCur = pCur;
2686    x.pStmt = pStmt;
2687    x.zPattern = zPattern;
2688    x.nPattern = nPattern;
2689    x.pMatchStr3 = pMatchStr3;
2690    x.iLang = iLang;
2691    x.rc = rc;
2692    x.pConfig3 = p->pConfig3;
2693    if( x.rc==SQLITE_OK ) {
2694        spellfix1RunQuery(&x, zPattern, nPattern);
2695    }
2696
2697    if( pCur->a ) {
2698        qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare);
2699        pCur->iTop = iLimit;
2700        pCur->iScope = iScope;
2701    } else {
2702        x.rc = SQLITE_NOMEM;
2703    }
2704
2705filter_exit:
2706    sqlite3_finalize(pStmt);
2707    editDist3FromStringDelete(pMatchStr3);
2708    return x.rc;
2709}
2710
2711/*
2712** This version of xFilter handles a full-table scan case
2713*/
2714static int spellfix1FilterForFullScan(
2715    spellfix1_cursor *pCur,
2716    int argc,
2717    sqlite3_value **argv
2718)
2719{
2720    int rc = SQLITE_OK;
2721    int idxNum = pCur->idxNum;
2722    char *zSql;
2723    spellfix1_vtab *pVTab = pCur->pVTab;
2724    spellfix1ResetCursor(pCur);
2725    assert( idxNum==0 || idxNum==64 );
2726    zSql = sqlite3_mprintf(
2727               "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"%s",
2728               pVTab->zDbName, pVTab->zTableName,
2729               ((idxNum & 64) ? " WHERE rowid=?" : "")
2730           );
2731    if( zSql==0 ) return SQLITE_NOMEM;
2732    rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0);
2733    sqlite3_free(zSql);
2734    if( rc==SQLITE_OK && (idxNum & 64) ) {
2735        assert( argc==1 );
2736        rc = sqlite3_bind_value(pCur->pFullScan, 1, argv[0]);
2737    }
2738    pCur->nRow = pCur->iRow = 0;
2739    if( rc==SQLITE_OK ) {
2740        rc = sqlite3_step(pCur->pFullScan);
2741        if( rc==SQLITE_ROW ) {
2742            pCur->iRow = -1;
2743            rc = SQLITE_OK;
2744        }
2745        if( rc==SQLITE_DONE ) {
2746            rc = SQLITE_OK;
2747        }
2748    } else {
2749        pCur->iRow = 0;
2750    }
2751    return rc;
2752}
2753
2754/*
2755** Called to "rewind" a cursor back to the beginning so that
2756** it starts its output over again.  Always called at least once
2757** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call.
2758*/
2759static int spellfix1Filter(
2760    sqlite3_vtab_cursor *cur,
2761    int idxNum, const char *idxStr,
2762    int argc, sqlite3_value **argv
2763)
2764{
2765    spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2766    int rc;
2767    pCur->idxNum = idxNum;
2768    if( idxNum & 1 ) {
2769        rc = spellfix1FilterForMatch(pCur, argc, argv);
2770    } else {
2771        rc = spellfix1FilterForFullScan(pCur, argc, argv);
2772    }
2773    return rc;
2774}
2775
2776/*
2777** Advance a cursor to its next row of output
2778*/
2779static int spellfix1Next(sqlite3_vtab_cursor *cur)
2780{
2781    spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2782    int rc = SQLITE_OK;
2783    if( pCur->iRow < pCur->nRow ) {
2784        if( pCur->pFullScan ) {
2785            rc = sqlite3_step(pCur->pFullScan);
2786            if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow;
2787            if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK;
2788        } else {
2789            pCur->iRow++;
2790        }
2791    }
2792    return rc;
2793}
2794
2795/*
2796** Return TRUE if we are at the end-of-file
2797*/
2798static int spellfix1Eof(sqlite3_vtab_cursor *cur)
2799{
2800    spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2801    return pCur->iRow>=pCur->nRow;
2802}
2803
2804/*
2805** Return columns from the current row.
2806*/
2807static int spellfix1Column(
2808    sqlite3_vtab_cursor *cur,
2809    sqlite3_context *ctx,
2810    int i
2811)
2812{
2813    spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2814    if( pCur->pFullScan ) {
2815        if( i<=SPELLFIX_COL_LANGID ) {
2816            sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i));
2817        } else {
2818            sqlite3_result_null(ctx);
2819        }
2820        return SQLITE_OK;
2821    }
2822    switch( i ) {
2823    case SPELLFIX_COL_WORD: {
2824        sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC);
2825        break;
2826    }
2827    case SPELLFIX_COL_RANK: {
2828        sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank);
2829        break;
2830    }
2831    case SPELLFIX_COL_DISTANCE: {
2832        sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance);
2833        break;
2834    }
2835    case SPELLFIX_COL_LANGID: {
2836        sqlite3_result_int(ctx, pCur->iLang);
2837        break;
2838    }
2839    case SPELLFIX_COL_SCORE: {
2840        sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore);
2841        break;
2842    }
2843    case SPELLFIX_COL_MATCHLEN: {
2844        int iMatchlen = pCur->a[pCur->iRow].iMatchlen;
2845        if( iMatchlen<0 ) {
2846            int nPattern = (int)strlen(pCur->zPattern);
2847            char *zWord = pCur->a[pCur->iRow].zWord;
2848            int nWord = (int)strlen(zWord);
2849
2850            if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ) {
2851                char *zTranslit;
2852                int res;
2853                zTranslit = (char *)transliterate((unsigned char *)zWord, nWord);
2854                if( !zTranslit ) return SQLITE_NOMEM;
2855                res = editdist1(pCur->zPattern, zTranslit, &iMatchlen);
2856                sqlite3_free(zTranslit);
2857                if( res<0 ) return SQLITE_NOMEM;
2858                iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen);
2859            } else {
2860                iMatchlen = utf8Charlen(zWord, nWord);
2861            }
2862        }
2863
2864        sqlite3_result_int(ctx, iMatchlen);
2865        break;
2866    }
2867    case SPELLFIX_COL_PHONEHASH: {
2868        sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC);
2869        break;
2870    }
2871    case SPELLFIX_COL_TOP: {
2872        sqlite3_result_int(ctx, pCur->iTop);
2873        break;
2874    }
2875    case SPELLFIX_COL_SCOPE: {
2876        sqlite3_result_int(ctx, pCur->iScope);
2877        break;
2878    }
2879    case SPELLFIX_COL_SRCHCNT: {
2880        sqlite3_result_int(ctx, pCur->nSearch);
2881        break;
2882    }
2883    default: {
2884        sqlite3_result_null(ctx);
2885        break;
2886    }
2887    }
2888    return SQLITE_OK;
2889}
2890
2891/*
2892** The rowid.
2893*/
2894static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid)
2895{
2896    spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2897    if( pCur->pFullScan ) {
2898        *pRowid = sqlite3_column_int64(pCur->pFullScan, 4);
2899    } else {
2900        *pRowid = pCur->a[pCur->iRow].iRowid;
2901    }
2902    return SQLITE_OK;
2903}
2904
2905/*
2906** This function is called by the xUpdate() method. It returns a string
2907** containing the conflict mode that xUpdate() should use for the current
2908** operation. One of: "ROLLBACK", "IGNORE", "ABORT" or "REPLACE".
2909*/
2910static const char *spellfix1GetConflict(sqlite3 *db)
2911{
2912    static const char *azConflict[] = {
2913        /* Note: Instead of "FAIL" - "ABORT". */
2914        "ROLLBACK", "IGNORE", "ABORT", "ABORT", "REPLACE"
2915    };
2916    int eConflict = sqlite3_vtab_on_conflict(db);
2917
2918    assert( eConflict==SQLITE_ROLLBACK || eConflict==SQLITE_IGNORE
2919            || eConflict==SQLITE_FAIL || eConflict==SQLITE_ABORT
2920            || eConflict==SQLITE_REPLACE
2921          );
2922    assert( SQLITE_ROLLBACK==1 );
2923    assert( SQLITE_IGNORE==2 );
2924    assert( SQLITE_FAIL==3 );
2925    assert( SQLITE_ABORT==4 );
2926    assert( SQLITE_REPLACE==5 );
2927
2928    return azConflict[eConflict-1];
2929}
2930
2931/*
2932** The xUpdate() method.
2933*/
2934static int spellfix1Update(
2935    sqlite3_vtab *pVTab,
2936    int argc,
2937    sqlite3_value **argv,
2938    sqlite_int64 *pRowid
2939)
2940{
2941    int rc = SQLITE_OK;
2942    sqlite3_int64 rowid, newRowid;
2943    spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2944    sqlite3 *db = p->db;
2945
2946    if( argc==1 ) {
2947        /* A delete operation on the rowid given by argv[0] */
2948        rowid = *pRowid = sqlite3_value_int64(argv[0]);
2949        spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" "
2950                        " WHERE id=%lld",
2951                        p->zDbName, p->zTableName, rowid);
2952    } else {
2953        const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]);
2954        int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]);
2955        int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]);
2956        int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]);
2957        const unsigned char *zSoundslike =
2958            sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2959        int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2960        char *zK1, *zK2;
2961        int i;
2962        char c;
2963        const char *zConflict = spellfix1GetConflict(db);
2964
2965        if( zWord==0 ) {
2966            /* Inserts of the form:  INSERT INTO table(command) VALUES('xyzzy');
2967            ** cause zWord to be NULL, so we look at the "command" column to see
2968            ** what special actions to take */
2969            const char *zCmd =
2970                (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]);
2971            if( zCmd==0 ) {
2972                pVTab->zErrMsg = sqlite3_mprintf("NOT NULL constraint failed: %s.word",
2973                                                 p->zTableName);
2974                return SQLITE_CONSTRAINT_NOTNULL;
2975            }
2976            if( strcmp(zCmd,"reset")==0 ) {
2977                /* Reset the  edit cost table (if there is one). */
2978                editDist3ConfigDelete(p->pConfig3);
2979                p->pConfig3 = 0;
2980                return SQLITE_OK;
2981            }
2982            if( strncmp(zCmd,"edit_cost_table=",16)==0 ) {
2983                editDist3ConfigDelete(p->pConfig3);
2984                p->pConfig3 = 0;
2985                sqlite3_free(p->zCostTable);
2986                p->zCostTable = spellfix1Dequote(zCmd+16);
2987                if( p->zCostTable==0 ) return SQLITE_NOMEM;
2988                if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ) {
2989                    sqlite3_free(p->zCostTable);
2990                    p->zCostTable = 0;
2991                }
2992                return SQLITE_OK;
2993            }
2994            pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"",
2995                                             p->zTableName, zCmd);
2996            return SQLITE_ERROR;
2997        }
2998        if( iRank<1 ) iRank = 1;
2999        if( zSoundslike ) {
3000            zK1 = (char*)transliterate(zSoundslike, nSoundslike);
3001        } else {
3002            zK1 = (char*)transliterate(zWord, nWord);
3003        }
3004        if( zK1==0 ) return SQLITE_NOMEM;
3005        for(i=0; (c = zK1[i])!=0; i++) {
3006            if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A';
3007        }
3008        zK2 = (char*)phoneticHash((const unsigned char*)zK1, i);
3009        if( zK2==0 ) {
3010            sqlite3_free(zK1);
3011            return SQLITE_NOMEM;
3012        }
3013        if( sqlite3_value_type(argv[0])==SQLITE_NULL ) {
3014            if( sqlite3_value_type(argv[1])==SQLITE_NULL ) {
3015                spellfix1DbExec(&rc, db,
3016                                "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) "
3017                                "VALUES(%d,%d,%Q,nullif(%Q,%Q),%Q)",
3018                                p->zDbName, p->zTableName,
3019                                iRank, iLang, zWord, zK1, zWord, zK2
3020                               );
3021            } else {
3022                newRowid = sqlite3_value_int64(argv[1]);
3023                spellfix1DbExec(&rc, db,
3024                                "INSERT OR %s INTO \"%w\".\"%w_vocab\"(id,rank,langid,word,k1,k2) "
3025                                "VALUES(%lld,%d,%d,%Q,nullif(%Q,%Q),%Q)",
3026                                zConflict, p->zDbName, p->zTableName,
3027                                newRowid, iRank, iLang, zWord, zK1, zWord, zK2
3028                               );
3029            }
3030            *pRowid = sqlite3_last_insert_rowid(db);
3031        } else {
3032            rowid = sqlite3_value_int64(argv[0]);
3033            newRowid = *pRowid = sqlite3_value_int64(argv[1]);
3034            spellfix1DbExec(&rc, db,
3035                            "UPDATE OR %s \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d,"
3036                            " word=%Q, k1=nullif(%Q,%Q), k2=%Q WHERE id=%lld",
3037                            zConflict, p->zDbName, p->zTableName, newRowid, iRank, iLang,
3038                            zWord, zK1, zWord, zK2, rowid
3039                           );
3040        }
3041        sqlite3_free(zK1);
3042        sqlite3_free(zK2);
3043    }
3044    return rc;
3045}
3046
3047/*
3048** Rename the spellfix1 table.
3049*/
3050static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew)
3051{
3052    spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
3053    sqlite3 *db = p->db;
3054    int rc = SQLITE_OK;
3055    char *zNewName = sqlite3_mprintf("%s", zNew);
3056    if( zNewName==0 ) {
3057        return SQLITE_NOMEM;
3058    }
3059    spellfix1DbExec(&rc, db,
3060                    "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"",
3061                    p->zDbName, p->zTableName, zNewName
3062                   );
3063    if( rc==SQLITE_OK ) {
3064        sqlite3_free(p->zTableName);
3065        p->zTableName = zNewName;
3066    } else {
3067        sqlite3_free(zNewName);
3068    }
3069    return rc;
3070}
3071
3072/*
3073** A virtual table module that provides fuzzy search.
3074*/
3075static sqlite3_module spellfix1Module = {
3076    0,                       /* iVersion */
3077    spellfix1Create,         /* xCreate - handle CREATE VIRTUAL TABLE */
3078    spellfix1Connect,        /* xConnect - reconnected to an existing table */
3079    spellfix1BestIndex,      /* xBestIndex - figure out how to do a query */
3080    spellfix1Disconnect,     /* xDisconnect - close a connection */
3081    spellfix1Destroy,        /* xDestroy - handle DROP TABLE */
3082    spellfix1Open,           /* xOpen - open a cursor */
3083    spellfix1Close,          /* xClose - close a cursor */
3084    spellfix1Filter,         /* xFilter - configure scan constraints */
3085    spellfix1Next,           /* xNext - advance a cursor */
3086    spellfix1Eof,            /* xEof - check for end of scan */
3087    spellfix1Column,         /* xColumn - read data */
3088    spellfix1Rowid,          /* xRowid - read data */
3089    spellfix1Update,         /* xUpdate */
3090    0,                       /* xBegin */
3091    0,                       /* xSync */
3092    0,                       /* xCommit */
3093    0,                       /* xRollback */
3094    0,                       /* xFindMethod */
3095    spellfix1Rename,         /* xRename */
3096    0,                       /* xSavepoint */
3097    0,                       /* xRelease */
3098    0,                       /* xRollbackTo */
3099    0,                       /* xShadowName */
3100    0                        /* xIntegrity */
3101};
3102
3103/*
3104** Register the various functions and the virtual table.
3105*/
3106static int spellfix1Register(sqlite3 *db)
3107{
3108    int rc = SQLITE_OK;
3109    int i;
3110    rc = sqlite3_create_function(db, "spellfix1_translit", 1,
3111                                 SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3112                                 transliterateSqlFunc, 0, 0);
3113    if( rc==SQLITE_OK ) {
3114        rc = sqlite3_create_function(db, "spellfix1_editdist", 2,
3115                                     SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3116                                     editdistSqlFunc, 0, 0);
3117    }
3118    if( rc==SQLITE_OK ) {
3119        rc = sqlite3_create_function(db, "spellfix1_phonehash", 1,
3120                                     SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3121                                     phoneticHashSqlFunc, 0, 0);
3122    }
3123    if( rc==SQLITE_OK ) {
3124        rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1,
3125                                     SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3126                                     scriptCodeSqlFunc, 0, 0);
3127    }
3128    if( rc==SQLITE_OK ) {
3129        rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0);
3130    }
3131    if( rc==SQLITE_OK ) {
3132        rc = editDist3Install(db);
3133    }
3134
3135    /* Verify sanity of the translit[] table */
3136    for(i=0; i<sizeof(translit)/sizeof(translit[0])-1; i++) {
3137        assert( translit[i].cFrom<translit[i+1].cFrom );
3138    }
3139
3140    return rc;
3141}
3142
3143#endif /* SQLITE_OMIT_VIRTUALTABLE */
3144
3145/*
3146** Extension load function.
3147*/
3148#ifdef _WIN32
3149__declspec(dllexport)
3150#endif
3151int sqlite3_spellfix_init(
3152    sqlite3 *db,
3153    char **pzErrMsg,
3154    const sqlite3_api_routines *pApi
3155)
3156{
3157    SQLITE_EXTENSION_INIT2(pApi);
3158#ifndef SQLITE_OMIT_VIRTUALTABLE
3159    return spellfix1Register(db);
3160#endif
3161    return SQLITE_OK;
3162}