dict @ 78b0ba12073b0940541d91a7568e8b7ada572848

   1/*
   2** 2012 April 10
   3**
   4** The author disclaims copyright to this source code.  In place of
   5** a legal notice, here is a blessing:
   6**
   7**    May you do good and not evil.
   8**    May you find forgiveness for yourself and forgive others.
   9**    May you share freely, never taking more than you give.
  10**
  11*************************************************************************
  12**
  13** This module implements the spellfix1 VIRTUAL TABLE that can be used
  14** to search a large vocabulary for close matches.  See separate
  15** documentation (http://www.sqlite.org/spellfix1.html) for details.
  16*/
  17#include "sqlite3ext.h"
  18SQLITE_EXTENSION_INIT1
  19
  20#ifndef SQLITE_AMALGAMATION
  21# if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
  22#  define NDEBUG 1
  23# endif
  24# if defined(NDEBUG) && defined(SQLITE_DEBUG)
  25#  undef NDEBUG
  26# endif
  27# include <string.h>
  28# include <stdio.h>
  29# include <stdlib.h>
  30# include <assert.h>
  31# define ALWAYS(X)  1
  32# define NEVER(X)   0
  33  typedef unsigned char u8;
  34  typedef unsigned short u16;
  35#endif
  36#include <ctype.h>
  37
  38#ifndef SQLITE_OMIT_VIRTUALTABLE
  39
  40/*
  41** Character classes for ASCII characters:
  42**
  43**   0   ''        Silent letters:   H W
  44**   1   'A'       Any vowel:   A E I O U (Y)
  45**   2   'B'       A bilabeal stop or fricative:  B F P V W
  46**   3   'C'       Other fricatives or back stops:  C G J K Q S X Z
  47**   4   'D'       Alveolar stops:  D T
  48**   5   'H'       Letter H at the beginning of a word
  49**   6   'L'       Glide:  L
  50**   7   'R'       Semivowel:  R
  51**   8   'M'       Nasals:  M N
  52**   9   'Y'       Letter Y at the beginning of a word.
  53**   10  '9'       Digits: 0 1 2 3 4 5 6 7 8 9
  54**   11  ' '       White space
  55**   12  '?'       Other.
  56*/
  57#define CCLASS_SILENT         0
  58#define CCLASS_VOWEL          1
  59#define CCLASS_B              2
  60#define CCLASS_C              3
  61#define CCLASS_D              4
  62#define CCLASS_H              5
  63#define CCLASS_L              6
  64#define CCLASS_R              7
  65#define CCLASS_M              8
  66#define CCLASS_Y              9
  67#define CCLASS_DIGIT         10
  68#define CCLASS_SPACE         11
  69#define CCLASS_OTHER         12
  70
  71/*
  72** The following table gives the character class for non-initial ASCII
  73** characters.
  74*/
  75static const unsigned char midClass[] = {
  76 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  77 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  78 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  79 /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  80 /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
  81 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  82 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  83 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  84 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  85 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
  86 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
  87 /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
  88 /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
  89 /* ' */ CCLASS_SILENT,   /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
  90 /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
  91 /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
  92 /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
  93 /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
  94 /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
  95 /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
  96 /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
  97 /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
  98 /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
  99 /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
 100 /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
 101 /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
 102 /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
 103 /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
 104 /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
 105 /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_VOWEL,
 106 /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
 107 /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
 108 /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
 109 /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
 110 /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
 111 /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
 112 /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
 113 /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
 114 /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
 115 /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
 116 /* x */ CCLASS_C,        /* y */ CCLASS_VOWEL,   /* z */ CCLASS_C,
 117 /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
 118 /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   
 119};
 120/* 
 121** This tables gives the character class for ASCII characters that form the
 122** initial character of a word.  The only difference from midClass is with
 123** the letters H, W, and Y.
 124*/
 125static const unsigned char initClass[] = {
 126 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 127 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 128 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 129 /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 130 /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
 131 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 132 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 133 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 134 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 135 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
 136 /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
 137 /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
 138 /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
 139 /* ' */ CCLASS_OTHER,    /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
 140 /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
 141 /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
 142 /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
 143 /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
 144 /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
 145 /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
 146 /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
 147 /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
 148 /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
 149 /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
 150 /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
 151 /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
 152 /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
 153 /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
 154 /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
 155 /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_Y,
 156 /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
 157 /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
 158 /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
 159 /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
 160 /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
 161 /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
 162 /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
 163 /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
 164 /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
 165 /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
 166 /* x */ CCLASS_C,        /* y */ CCLASS_Y,       /* z */ CCLASS_C,
 167 /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
 168 /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   
 169};
 170
 171/*
 172** Mapping from the character class number (0-13) to a symbol for each
 173** character class.  Note that initClass[] can be used to map the class
 174** symbol back into the class number.
 175*/
 176static const unsigned char className[] = ".ABCDHLRMY9 ?";
 177
 178/*
 179** Generate a "phonetic hash" from a string of ASCII characters
 180** in zIn[0..nIn-1].
 181**
 182**   * Map characters by character class as defined above.
 183**   * Omit double-letters
 184**   * Omit vowels beside R and L
 185**   * Omit T when followed by CH
 186**   * Omit W when followed by R
 187**   * Omit D when followed by J or G
 188**   * Omit K in KN or G in GN at the beginning of a word
 189**
 190** Space to hold the result is obtained from sqlite3_malloc()
 191**
 192** Return NULL if memory allocation fails.  
 193*/
 194static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){
 195  unsigned char *zOut = sqlite3_malloc64( nIn + 1 );
 196  int i;
 197  int nOut = 0;
 198  char cPrev = 0x77;
 199  char cPrevX = 0x77;
 200  const unsigned char *aClass = initClass;
 201
 202  if( zOut==0 ) return 0;
 203  if( nIn>2 ){
 204    switch( zIn[0] ){
 205      case 'g': 
 206      case 'k': {
 207        if( zIn[1]=='n' ){ zIn++; nIn--; }
 208        break;
 209      }
 210    }
 211  }
 212  for(i=0; i<nIn; i++){
 213    unsigned char c = zIn[i];
 214    if( i+1<nIn ){
 215      if( c=='w' && zIn[i+1]=='r' ) continue;
 216      if( c=='d' && (zIn[i+1]=='j' || zIn[i+1]=='g') ) continue;
 217      if( i+2<nIn ){
 218        if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue;
 219      }
 220    }
 221    c = aClass[c&0x7f];
 222    if( c==CCLASS_SPACE ) continue;
 223    if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue;
 224    aClass = midClass;
 225    if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ){
 226       continue; /* No vowels beside L or R */ 
 227    }
 228    if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ){
 229       nOut--;   /* No vowels beside L or R */
 230    }
 231    cPrev = c;
 232    if( c==CCLASS_SILENT ) continue;
 233    cPrevX = c;
 234    c = className[c];
 235    assert( nOut>=0 );
 236    if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c;
 237  }
 238  zOut[nOut] = 0;
 239  return zOut;
 240}
 241
 242/*
 243** This is an SQL function wrapper around phoneticHash().  See
 244** the description of phoneticHash() for additional information.
 245*/
 246static void phoneticHashSqlFunc(
 247  sqlite3_context *context,
 248  int argc,
 249  sqlite3_value **argv
 250){
 251  const unsigned char *zIn;
 252  unsigned char *zOut;
 253
 254  zIn = sqlite3_value_text(argv[0]);
 255  if( zIn==0 ) return;
 256  zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0]));
 257  if( zOut==0 ){
 258    sqlite3_result_error_nomem(context);
 259  }else{
 260    sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
 261  }
 262}
 263
 264/*
 265** Return the character class number for a character given its
 266** context.
 267*/
 268static char characterClass(char cPrev, char c){
 269  return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f];
 270}
 271
 272/*
 273** Return the cost of inserting or deleting character c immediately
 274** following character cPrev.  If cPrev==0, that means c is the first
 275** character of the word.
 276*/
 277static int insertOrDeleteCost(char cPrev, char c, char cNext){
 278  char classC = characterClass(cPrev, c);
 279  char classCprev;
 280
 281  if( classC==CCLASS_SILENT ){
 282    /* Insert or delete "silent" characters such as H or W */
 283    return 1;
 284  }
 285  if( cPrev==c ){
 286    /* Repeated characters, or miss a repeat */
 287    return 10;
 288  }
 289  if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ){
 290    return 20;  /* Insert a vowel before or after 'r' */
 291  }
 292  classCprev = characterClass(cPrev, cPrev);
 293  if( classC==classCprev ){
 294    if( classC==CCLASS_VOWEL ){
 295      /* Remove or add a new vowel to a vowel cluster */
 296      return 15;
 297    }else{
 298      /* Remove or add a consonant not in the same class */
 299      return 50;
 300    }
 301  }
 302
 303  /* any other character insertion or deletion */
 304  return 100;
 305}
 306
 307/*
 308** Divide the insertion cost by this factor when appending to the
 309** end of the word.
 310*/
 311#define FINAL_INS_COST_DIV  4
 312
 313/*
 314** Return the cost of substituting cTo in place of cFrom assuming
 315** the previous character is cPrev.  If cPrev==0 then cTo is the first
 316** character of the word.
 317*/
 318static int substituteCost(char cPrev, char cFrom, char cTo){
 319  char classFrom, classTo;
 320  if( cFrom==cTo ){
 321    /* Exact match */
 322    return 0;
 323  }
 324  if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ){
 325    /* differ only in case */
 326    return 0;
 327  }
 328  classFrom = characterClass(cPrev, cFrom);
 329  classTo = characterClass(cPrev, cTo);
 330  if( classFrom==classTo ){
 331    /* Same character class */
 332    return 40;
 333  }
 334  if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y
 335      && classTo>=CCLASS_B && classTo<=CCLASS_Y ){
 336    /* Convert from one consonant to another, but in a different class */
 337    return 75;
 338  }
 339  /* Any other subsitution */
 340  return 100;
 341}
 342
 343/*
 344** Given two strings zA and zB which are pure ASCII, return the cost
 345** of transforming zA into zB.  If zA ends with '*' assume that it is
 346** a prefix of zB and give only minimal penalty for extra characters
 347** on the end of zB.
 348**
 349** Smaller numbers mean a closer match.
 350**
 351** Negative values indicate an error:
 352**    -1  One of the inputs is NULL
 353**    -2  Non-ASCII characters on input
 354**    -3  Unable to allocate memory 
 355**
 356** If pnMatch is not NULL, then *pnMatch is set to the number of bytes
 357** of zB that matched the pattern in zA. If zA does not end with a '*',
 358** then this value is always the number of bytes in zB (i.e. strlen(zB)).
 359** If zA does end in a '*', then it is the number of bytes in the prefix
 360** of zB that was deemed to match zA.
 361*/
 362static int editdist1(const char *zA, const char *zB, int *pnMatch){
 363  int nA, nB;            /* Number of characters in zA[] and zB[] */
 364  int xA, xB;            /* Loop counters for zA[] and zB[] */
 365  char cA = 0, cB;       /* Current character of zA and zB */
 366  char cAprev, cBprev;   /* Previous character of zA and zB */
 367  char cAnext, cBnext;   /* Next character in zA and zB */
 368  int d;                 /* North-west cost value */
 369  int dc = 0;            /* North-west character value */
 370  int res;               /* Final result */
 371  int *m;                /* The cost matrix */
 372  char *cx;              /* Corresponding character values */
 373  int *toFree = 0;       /* Malloced space */
 374  int nMatch = 0;
 375  int mStack[60+15];     /* Stack space to use if not too much is needed */
 376
 377  /* Early out if either input is NULL */
 378  if( zA==0 || zB==0 ) return -1;
 379
 380  /* Skip any common prefix */
 381  while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; }
 382  if( pnMatch ) *pnMatch = nMatch;
 383  if( zA[0]==0 && zB[0]==0 ) return 0;
 384
 385#if 0
 386  printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' ');
 387#endif
 388
 389  /* Verify input strings and measure their lengths */
 390  for(nA=0; zA[nA]; nA++){
 391    if( zA[nA]&0x80 ) return -2;
 392  }
 393  for(nB=0; zB[nB]; nB++){
 394    if( zB[nB]&0x80 ) return -2;
 395  }
 396
 397  /* Special processing if either string is empty */
 398  if( nA==0 ){
 399    cBprev = (char)dc;
 400    for(xB=res=0; (cB = zB[xB])!=0; xB++){
 401      res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV;
 402      cBprev = cB;
 403    }
 404    return res;
 405  }
 406  if( nB==0 ){
 407    cAprev = (char)dc;
 408    for(xA=res=0; (cA = zA[xA])!=0; xA++){
 409      res += insertOrDeleteCost(cAprev, cA, zA[xA+1]);
 410      cAprev = cA;
 411    }
 412    return res;
 413  }
 414
 415  /* A is a prefix of B */
 416  if( zA[0]=='*' && zA[1]==0 ) return 0;
 417
 418  /* Allocate and initialize the Wagner matrix */
 419  if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){
 420    m = mStack;
 421  }else{
 422    m = toFree = sqlite3_malloc64( (nB+1)*5*sizeof(m[0])/4 );
 423    if( m==0 ) return -3;
 424  }
 425  cx = (char*)&m[nB+1];
 426
 427  /* Compute the Wagner edit distance */
 428  m[0] = 0;
 429  cx[0] = (char)dc;
 430  cBprev = (char)dc;
 431  for(xB=1; xB<=nB; xB++){
 432    cBnext = zB[xB];
 433    cB = zB[xB-1];
 434    cx[xB] = cB;
 435    m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext);
 436    cBprev = cB;
 437  }
 438  cAprev = (char)dc;
 439  for(xA=1; xA<=nA; xA++){
 440    int lastA = (xA==nA);
 441    cA = zA[xA-1];
 442    cAnext = zA[xA];
 443    if( cA=='*' && lastA ) break;
 444    d = m[0];
 445    dc = cx[0];
 446    m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext);
 447    cBprev = 0;
 448    for(xB=1; xB<=nB; xB++){
 449      int totalCost, insCost, delCost, subCost, ncx;
 450      cB = zB[xB-1];
 451      cBnext = zB[xB];
 452
 453      /* Cost to insert cB */
 454      insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext);
 455      if( lastA ) insCost /= FINAL_INS_COST_DIV;
 456
 457      /* Cost to delete cA */
 458      delCost = insertOrDeleteCost(cx[xB], cA, cBnext);
 459
 460      /* Cost to substitute cA->cB */
 461      subCost = substituteCost(cx[xB-1], cA, cB);
 462
 463      /* Best cost */
 464      totalCost = insCost + m[xB-1];
 465      ncx = cB;
 466      if( (delCost + m[xB])<totalCost ){
 467        totalCost = delCost + m[xB];
 468        ncx = cA;
 469      }
 470      if( (subCost + d)<totalCost ){
 471        totalCost = subCost + d;
 472      }
 473
 474#if 0
 475      printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c"
 476             " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n",
 477             xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB,
 478             insCost, delCost, subCost, totalCost, ncx?ncx:' ');
 479#endif
 480
 481      /* Update the matrix */
 482      d = m[xB];
 483      dc = cx[xB];
 484      m[xB] = totalCost;
 485      cx[xB] = (char)ncx;
 486      cBprev = cB;
 487    }
 488    cAprev = cA;
 489  }
 490
 491  /* Free the wagner matrix and return the result */
 492  if( cA=='*' ){
 493    res = m[1];
 494    for(xB=1; xB<=nB; xB++){
 495      if( m[xB]<res ){
 496        res = m[xB];
 497        if( pnMatch ) *pnMatch = xB+nMatch;
 498      }
 499    }
 500  }else{
 501    res = m[nB];
 502    /* In the current implementation, pnMatch is always NULL if zA does
 503    ** not end in "*" */
 504    assert( pnMatch==0 );
 505  }
 506  sqlite3_free(toFree);
 507  return res;
 508}
 509
 510/*
 511** Function:    editdist(A,B)
 512**
 513** Return the cost of transforming string A into string B.  Both strings
 514** must be pure ASCII text.  If A ends with '*' then it is assumed to be
 515** a prefix of B and extra characters on the end of B have minimal additional
 516** cost.
 517*/
 518static void editdistSqlFunc(
 519  sqlite3_context *context,
 520  int argc,
 521  sqlite3_value **argv
 522){
 523  int res = editdist1(
 524                    (const char*)sqlite3_value_text(argv[0]),
 525                    (const char*)sqlite3_value_text(argv[1]),
 526                    0);
 527  if( res<0 ){
 528    if( res==(-3) ){
 529      sqlite3_result_error_nomem(context);
 530    }else if( res==(-2) ){
 531      sqlite3_result_error(context, "non-ASCII input to editdist()", -1);
 532    }else{
 533      sqlite3_result_error(context, "NULL input to editdist()", -1);
 534    }
 535  }else{ 
 536    sqlite3_result_int(context, res);
 537  }
 538}
 539
 540/* End of the fixed-cost edit distance implementation
 541******************************************************************************
 542*****************************************************************************
 543** Begin: Configurable cost unicode edit distance routines
 544*/
 545/* Forward declaration of structures */
 546typedef struct EditDist3Cost EditDist3Cost;
 547typedef struct EditDist3Config EditDist3Config;
 548typedef struct EditDist3Point EditDist3Point;
 549typedef struct EditDist3From EditDist3From;
 550typedef struct EditDist3FromString EditDist3FromString;
 551typedef struct EditDist3To EditDist3To;
 552typedef struct EditDist3ToString EditDist3ToString;
 553typedef struct EditDist3Lang EditDist3Lang;
 554
 555
 556/*
 557** An entry in the edit cost table
 558*/
 559struct EditDist3Cost {
 560  EditDist3Cost *pNext;     /* Next cost element */
 561  u8 nFrom;                 /* Number of bytes in aFrom */
 562  u8 nTo;                   /* Number of bytes in aTo */
 563  u16 iCost;                /* Cost of this transformation */
 564  char a[4]    ;            /* FROM string followed by TO string */
 565  /* Additional TO and FROM string bytes appended as necessary */
 566};
 567
 568/*
 569** Edit costs for a particular language ID 
 570*/
 571struct EditDist3Lang {
 572  int iLang;             /* Language ID */
 573  int iInsCost;          /* Default insertion cost */
 574  int iDelCost;          /* Default deletion cost */
 575  int iSubCost;          /* Default substitution cost */
 576  EditDist3Cost *pCost;  /* Costs */
 577};
 578
 579
 580/*
 581** The default EditDist3Lang object, with default costs.
 582*/
 583static const EditDist3Lang editDist3Lang = { 0, 100, 100, 150, 0 };
 584
 585/*
 586** Complete configuration
 587*/
 588struct EditDist3Config {
 589  int nLang;             /* Number of language IDs.  Size of a[] */
 590  EditDist3Lang *a;      /* One for each distinct language ID */
 591};
 592
 593/*
 594** Extra information about each character in the FROM string.
 595*/
 596struct EditDist3From {
 597  int nSubst;              /* Number of substitution cost entries */
 598  int nDel;                /* Number of deletion cost entries */
 599  int nByte;               /* Number of bytes in this character */
 600  EditDist3Cost **apSubst; /* Array of substitution costs for this element */
 601  EditDist3Cost **apDel;   /* Array of deletion cost entries */
 602};
 603
 604/*
 605** A precompiled FROM string.
 606*
 607** In the common case we expect the FROM string to be reused multiple times.
 608** In other words, the common case will be to measure the edit distance
 609** from a single origin string to multiple target strings.
 610*/
 611struct EditDist3FromString {
 612  char *z;                 /* The complete text of the FROM string */
 613  int n;                   /* Number of characters in the FROM string */
 614  int isPrefix;            /* True if ends with '*' character */
 615  EditDist3From *a;        /* Extra info about each char of the FROM string */
 616};
 617
 618/*
 619** Extra information about each character in the TO string.
 620*/
 621struct EditDist3To {
 622  int nIns;                /* Number of insertion cost entries */
 623  int nByte;               /* Number of bytes in this character */
 624  EditDist3Cost **apIns;   /* Array of deletion cost entries */
 625};
 626
 627/*
 628** A precompiled FROM string
 629*/
 630struct EditDist3ToString {
 631  char *z;                 /* The complete text of the TO string */
 632  int n;                   /* Number of characters in the TO string */
 633  EditDist3To *a;          /* Extra info about each char of the TO string */
 634};
 635
 636/*
 637** Clear or delete an instance of the object that records all edit-distance
 638** weights.
 639*/
 640static void editDist3ConfigClear(EditDist3Config *p){
 641  int i;
 642  if( p==0 ) return;
 643  for(i=0; i<p->nLang; i++){
 644    EditDist3Cost *pCost, *pNext;
 645    pCost = p->a[i].pCost;
 646    while( pCost ){
 647      pNext = pCost->pNext;
 648      sqlite3_free(pCost);
 649      pCost = pNext;
 650    }
 651  }
 652  sqlite3_free(p->a);
 653  memset(p, 0, sizeof(*p));
 654}
 655static void editDist3ConfigDelete(void *pIn){
 656  EditDist3Config *p = (EditDist3Config*)pIn;
 657  editDist3ConfigClear(p);
 658  sqlite3_free(p);
 659}
 660
 661/* Compare the FROM values of two EditDist3Cost objects, for sorting.
 662** Return negative, zero, or positive if the A is less than, equal to,
 663** or greater than B.
 664*/
 665static int editDist3CostCompare(EditDist3Cost *pA, EditDist3Cost *pB){
 666  int n = pA->nFrom;
 667  int rc;
 668  if( n>pB->nFrom ) n = pB->nFrom;
 669  rc = strncmp(pA->a, pB->a, n);
 670  if( rc==0 ) rc = pA->nFrom - pB->nFrom;
 671  return rc;
 672}
 673
 674/*
 675** Merge together two sorted lists of EditDist3Cost objects, in order
 676** of increasing FROM.
 677*/
 678static EditDist3Cost *editDist3CostMerge(
 679  EditDist3Cost *pA,
 680  EditDist3Cost *pB
 681){
 682  EditDist3Cost *pHead = 0;
 683  EditDist3Cost **ppTail = &pHead;
 684  EditDist3Cost *p;
 685  while( pA && pB ){
 686    if( editDist3CostCompare(pA,pB)<=0 ){
 687      p = pA;
 688      pA = pA->pNext;
 689    }else{
 690      p = pB;
 691      pB = pB->pNext;
 692    }
 693    *ppTail = p;
 694    ppTail =  &p->pNext;
 695  }
 696  if( pA ){
 697    *ppTail = pA;
 698  }else{
 699    *ppTail = pB;
 700  }
 701  return pHead;
 702}
 703
 704/*
 705** Sort a list of EditDist3Cost objects into order of increasing FROM
 706*/
 707static EditDist3Cost *editDist3CostSort(EditDist3Cost *pList){
 708  EditDist3Cost *ap[60], *p;
 709  int i;
 710  int mx = 0;
 711  ap[0] = 0;
 712  ap[1] = 0;
 713  while( pList ){
 714    p = pList;
 715    pList = p->pNext;
 716    p->pNext = 0;
 717    for(i=0; ap[i]; i++){
 718      p = editDist3CostMerge(ap[i],p);
 719      ap[i] = 0;
 720    }
 721    ap[i] = p;
 722    if( i>mx ){
 723      mx = i;
 724      ap[i+1] = 0;
 725    }
 726  }
 727  p = 0;
 728  for(i=0; i<=mx; i++){
 729    if( ap[i] ) p = editDist3CostMerge(p,ap[i]);
 730  }
 731  return p;
 732}
 733
 734/*
 735** Load all edit-distance weights from a table.
 736*/
 737static int editDist3ConfigLoad(
 738  EditDist3Config *p,      /* The edit distance configuration to load */
 739  sqlite3 *db,            /* Load from this database */
 740  const char *zTable      /* Name of the table from which to load */
 741){
 742  sqlite3_stmt *pStmt;
 743  int rc, rc2;
 744  char *zSql;
 745  int iLangPrev = -9999;
 746  EditDist3Lang *pLang = 0;
 747
 748  zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost"
 749                         " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable);
 750  if( zSql==0 ) return SQLITE_NOMEM;
 751  rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
 752  sqlite3_free(zSql);
 753  if( rc ) return rc;
 754  editDist3ConfigClear(p);
 755  while( sqlite3_step(pStmt)==SQLITE_ROW ){
 756    int iLang = sqlite3_column_int(pStmt, 0);
 757    const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1);
 758    int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0;
 759    const char *zTo = (const char*)sqlite3_column_text(pStmt, 2);
 760    int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0;
 761    int iCost = sqlite3_column_int(pStmt, 3);
 762
 763    assert( zFrom!=0 || nFrom==0 );
 764    assert( zTo!=0 || nTo==0 );
 765    if( nFrom>100 || nTo>100 ) continue;
 766    if( iCost<0 ) continue;
 767    if( iCost>=10000 ) continue;  /* Costs above 10K are considered infinite */
 768    if( pLang==0 || iLang!=iLangPrev ){
 769      EditDist3Lang *pNew;
 770      pNew = sqlite3_realloc64(p->a, (p->nLang+1)*sizeof(p->a[0]));
 771      if( pNew==0 ){ rc = SQLITE_NOMEM; break; }
 772      p->a = pNew;
 773      pLang = &p->a[p->nLang];
 774      p->nLang++;
 775      pLang->iLang = iLang;
 776      pLang->iInsCost = 100;
 777      pLang->iDelCost = 100;
 778      pLang->iSubCost = 150;
 779      pLang->pCost = 0;
 780      iLangPrev = iLang;
 781    }
 782    if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){
 783      pLang->iDelCost = iCost;
 784    }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){
 785      pLang->iInsCost = iCost;
 786    }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){
 787      pLang->iSubCost = iCost;
 788    }else{
 789      EditDist3Cost *pCost;
 790      int nExtra = nFrom + nTo - 4;
 791      if( nExtra<0 ) nExtra = 0;
 792      pCost = sqlite3_malloc64( sizeof(*pCost) + nExtra );
 793      if( pCost==0 ){ rc = SQLITE_NOMEM; break; }
 794      pCost->nFrom = (u8)nFrom;
 795      pCost->nTo = (u8)nTo;
 796      pCost->iCost = (u16)iCost;
 797      memcpy(pCost->a, zFrom, nFrom);
 798      memcpy(pCost->a + nFrom, zTo, nTo);
 799      pCost->pNext = pLang->pCost;
 800      pLang->pCost = pCost; 
 801    }
 802  }
 803  rc2 = sqlite3_finalize(pStmt);
 804  if( rc==SQLITE_OK ) rc = rc2;
 805  if( rc==SQLITE_OK ){
 806    int iLang;
 807    for(iLang=0; iLang<p->nLang; iLang++){
 808      p->a[iLang].pCost = editDist3CostSort(p->a[iLang].pCost);
 809    }
 810  }
 811  return rc;
 812}
 813
 814/*
 815** Return the length (in bytes) of a utf-8 character.  Or return a maximum
 816** of N.
 817*/
 818static int utf8Len(unsigned char c, int N){
 819  int len = 1;
 820  if( c>0x7f ){
 821    if( (c&0xe0)==0xc0 ){
 822      len = 2;
 823    }else if( (c&0xf0)==0xe0 ){
 824      len = 3;
 825    }else{
 826      len = 4;
 827    }
 828  }
 829  if( len>N ) len = N;
 830  return len;
 831}
 832
 833/*
 834** Return TRUE (non-zero) if the To side of the given cost matches
 835** the given string.
 836*/
 837static int matchTo(EditDist3Cost *p, const char *z, int n){
 838  assert( n>0 );
 839  if( p->a[p->nFrom]!=z[0] ) return 0;
 840  if( p->nTo>n ) return 0;
 841  if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0;
 842  return 1;
 843}
 844
 845/*
 846** Return TRUE (non-zero) if the From side of the given cost matches
 847** the given string.
 848*/
 849static int matchFrom(EditDist3Cost *p, const char *z, int n){
 850  assert( p->nFrom<=n );
 851  if( p->nFrom ){
 852    if( p->a[0]!=z[0] ) return 0;
 853    if( strncmp(p->a, z, p->nFrom)!=0 ) return 0;
 854  }
 855  return 1;
 856}
 857
 858/*
 859** Return TRUE (non-zero) of the next FROM character and the next TO
 860** character are the same.
 861*/
 862static int matchFromTo(
 863  EditDist3FromString *pStr,  /* Left hand string */
 864  int n1,                     /* Index of comparison character on the left */
 865  const char *z2,             /* Right-handl comparison character */
 866  int n2                      /* Bytes remaining in z2[] */
 867){
 868  int b1 = pStr->a[n1].nByte;
 869  if( b1>n2 ) return 0;
 870  assert( b1>0 );
 871  if( pStr->z[n1]!=z2[0] ) return 0;
 872  if( strncmp(pStr->z+n1, z2, b1)!=0 ) return 0;
 873  return 1;
 874}
 875
 876/*
 877** Delete an EditDist3FromString objecct
 878*/
 879static void editDist3FromStringDelete(EditDist3FromString *p){
 880  int i;
 881  if( p ){
 882    for(i=0; i<p->n; i++){
 883      sqlite3_free(p->a[i].apDel);
 884      sqlite3_free(p->a[i].apSubst);
 885    }
 886    sqlite3_free(p);
 887  }
 888}
 889
 890/*
 891** Create a EditDist3FromString object.
 892*/
 893static EditDist3FromString *editDist3FromStringNew(
 894  const EditDist3Lang *pLang,
 895  const char *z,
 896  int n
 897){
 898  EditDist3FromString *pStr;
 899  EditDist3Cost *p;
 900  int i;
 901
 902  if( z==0 ) return 0;
 903  if( n<0 ) n = (int)strlen(z);
 904  pStr = sqlite3_malloc64( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 );
 905  if( pStr==0 ) return 0;
 906  pStr->a = (EditDist3From*)&pStr[1];
 907  memset(pStr->a, 0, sizeof(pStr->a[0])*n);
 908  pStr->n = n;
 909  pStr->z = (char*)&pStr->a[n];
 910  memcpy(pStr->z, z, n+1);
 911  if( n && z[n-1]=='*' ){
 912    pStr->isPrefix = 1;
 913    n--;
 914    pStr->n--;
 915    pStr->z[n] = 0;
 916  }else{
 917    pStr->isPrefix = 0;
 918  }
 919
 920  for(i=0; i<n; i++){
 921    EditDist3From *pFrom = &pStr->a[i];
 922    memset(pFrom, 0, sizeof(*pFrom));
 923    pFrom->nByte = utf8Len((unsigned char)z[i], n-i);
 924    for(p=pLang->pCost; p; p=p->pNext){
 925      EditDist3Cost **apNew;
 926      if( i+p->nFrom>n ) continue;
 927      if( matchFrom(p, z+i, n-i)==0 ) continue;
 928      if( p->nTo==0 ){
 929        apNew = sqlite3_realloc64(pFrom->apDel,
 930                                sizeof(*apNew)*(pFrom->nDel+1));
 931        if( apNew==0 ) break;
 932        pFrom->apDel = apNew;
 933        apNew[pFrom->nDel++] = p;
 934      }else{
 935        apNew = sqlite3_realloc64(pFrom->apSubst,
 936                                sizeof(*apNew)*(pFrom->nSubst+1));
 937        if( apNew==0 ) break;
 938        pFrom->apSubst = apNew;
 939        apNew[pFrom->nSubst++] = p;
 940      }
 941    }
 942    if( p ){
 943      editDist3FromStringDelete(pStr);
 944      pStr = 0;
 945      break;
 946    }
 947  }
 948  return pStr;
 949}
 950
 951/*
 952** Update entry m[i] such that it is the minimum of its current value
 953** and m[j]+iCost.
 954*/
 955static void updateCost(
 956  unsigned int *m,
 957  int i,
 958  int j,
 959  int iCost
 960){
 961  unsigned int b;
 962  assert( iCost>=0 );
 963  assert( iCost<10000 );
 964  b = m[j] + iCost;
 965  if( b<m[i] ) m[i] = b;
 966}
 967
 968/*
 969** How much stack space (int bytes) to use for Wagner matrix in 
 970** editDist3Core().  If more space than this is required, the entire
 971** matrix is taken from the heap.  To reduce the load on the memory
 972** allocator, make this value as large as practical for the
 973** architecture in use.
 974*/
 975#ifndef SQLITE_SPELLFIX_STACKALLOC_SZ
 976# define SQLITE_SPELLFIX_STACKALLOC_SZ  (1024)
 977#endif
 978
 979/* Compute the edit distance between two strings.
 980**
 981** If an error occurs, return a negative number which is the error code.
 982**
 983** If pnMatch is not NULL, then *pnMatch is set to the number of characters
 984** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does
 985** not contain the pattern for a prefix-search, then this is always the number
 986** of characters in z2. If pFrom does contain a prefix search pattern, then
 987** it is the number of characters in the prefix of z2 that was deemed to 
 988** match pFrom.
 989*/
 990static int editDist3Core(
 991  EditDist3FromString *pFrom,  /* The FROM string */
 992  const char *z2,              /* The TO string */
 993  int n2,                      /* Length of the TO string */
 994  const EditDist3Lang *pLang,  /* Edit weights for a particular language ID */
 995  int *pnMatch                 /* OUT: Characters in matched prefix */
 996){
 997  int k, n;
 998  int i1, b1;
 999  int i2, b2;
1000  EditDist3FromString f = *pFrom;
1001  EditDist3To *a2;
1002  unsigned int *m;
1003  unsigned int *pToFree;
1004  int szRow;
1005  EditDist3Cost *p;
1006  int res;
1007  sqlite3_uint64 nByte;
1008  unsigned int stackSpace[SQLITE_SPELLFIX_STACKALLOC_SZ/sizeof(unsigned int)];
1009
1010  /* allocate the Wagner matrix and the aTo[] array for the TO string */
1011  n = (f.n+1)*(n2+1);
1012  n = (n+1)&~1;
1013  nByte = n*sizeof(m[0]) + sizeof(a2[0])*n2;
1014  if( nByte<=sizeof(stackSpace) ){
1015    m = stackSpace;
1016    pToFree = 0;
1017  }else{
1018    m = pToFree = sqlite3_malloc64( nByte );
1019    if( m==0 ) return -1;            /* Out of memory */
1020  }
1021  a2 = (EditDist3To*)&m[n];
1022  memset(a2, 0, sizeof(a2[0])*n2);
1023
1024  /* Fill in the a1[] matrix for all characters of the TO string */
1025  for(i2=0; i2<n2; i2++){
1026    a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2);
1027    for(p=pLang->pCost; p; p=p->pNext){
1028      EditDist3Cost **apNew;
1029      if( p->nFrom>0 ) break;
1030      if( i2+p->nTo>n2 ) continue;
1031      if( p->a[0]>z2[i2] ) break;
1032      if( matchTo(p, z2+i2, n2-i2)==0 ) continue;
1033      a2[i2].nIns++;
1034      apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns);
1035      if( apNew==0 ){
1036        res = -1;  /* Out of memory */
1037        goto editDist3Abort;
1038      }
1039      a2[i2].apIns = apNew;
1040      a2[i2].apIns[a2[i2].nIns-1] = p;
1041    }
1042  }
1043
1044  /* Prepare to compute the minimum edit distance */
1045  szRow = f.n+1;
1046  memset(m, 0x01, (n2+1)*szRow*sizeof(m[0]));
1047  m[0] = 0;
1048
1049  /* First fill in the top-row of the matrix with FROM deletion costs */
1050  for(i1=0; i1<f.n; i1 += b1){
1051    b1 = f.a[i1].nByte;
1052    updateCost(m, i1+b1, i1, pLang->iDelCost);
1053    for(k=0; k<f.a[i1].nDel; k++){
1054      p = f.a[i1].apDel[k];
1055      updateCost(m, i1+p->nFrom, i1, p->iCost);
1056    }
1057  }
1058
1059  /* Fill in all subsequent rows, top-to-bottom, left-to-right */
1060  for(i2=0; i2<n2; i2 += b2){
1061    int rx;      /* Starting index for current row */
1062    int rxp;     /* Starting index for previous row */
1063    b2 = a2[i2].nByte;
1064    rx = szRow*(i2+b2);
1065    rxp = szRow*i2;
1066    updateCost(m, rx, rxp, pLang->iInsCost);
1067    for(k=0; k<a2[i2].nIns; k++){
1068      p = a2[i2].apIns[k];
1069      updateCost(m, szRow*(i2+p->nTo), rxp, p->iCost);
1070    }
1071    for(i1=0; i1<f.n; i1+=b1){
1072      int cx;    /* Index of current cell */
1073      int cxp;   /* Index of cell immediately to the left */
1074      int cxd;   /* Index of cell to the left and one row above */
1075      int cxu;   /* Index of cell immediately above */
1076      b1 = f.a[i1].nByte;
1077      cxp = rx + i1;
1078      cx = cxp + b1;
1079      cxd = rxp + i1;
1080      cxu = cxd + b1;
1081      updateCost(m, cx, cxp, pLang->iDelCost);
1082      for(k=0; k<f.a[i1].nDel; k++){
1083        p = f.a[i1].apDel[k];
1084        updateCost(m, cxp+p->nFrom, cxp, p->iCost);
1085      }
1086      updateCost(m, cx, cxu, pLang->iInsCost);
1087      if( matchFromTo(&f, i1, z2+i2, n2-i2) ){
1088        updateCost(m, cx, cxd, 0);
1089      }
1090      updateCost(m, cx, cxd, pLang->iSubCost);
1091      for(k=0; k<f.a[i1].nSubst; k++){
1092        p = f.a[i1].apSubst[k];
1093        if( matchTo(p, z2+i2, n2-i2) ){
1094          updateCost(m, cxd+p->nFrom+szRow*p->nTo, cxd, p->iCost);
1095        }
1096      }
1097    }
1098  }
1099
1100#if 0  /* Enable for debugging */
1101  printf("         ^");
1102  for(i1=0; i1<f.n; i1++) printf(" %c-%2x", f.z[i1], f.z[i1]&0xff);
1103  printf("\n   ^:");
1104  for(i1=0; i1<szRow; i1++){
1105    int v = m[i1];
1106    if( v>9999 ) printf(" ****");
1107    else         printf(" %4d", v);
1108  }
1109  printf("\n");
1110  for(i2=0; i2<n2; i2++){
1111    printf("%c-%02x:", z2[i2], z2[i2]&0xff);
1112    for(i1=0; i1<szRow; i1++){
1113      int v = m[(i2+1)*szRow+i1];
1114      if( v>9999 ) printf(" ****");
1115      else         printf(" %4d", v);
1116    }
1117    printf("\n");
1118  }
1119#endif
1120
1121  /* Free memory allocations and return the result */
1122  res = (int)m[szRow*(n2+1)-1];
1123  n = n2;
1124  if( f.isPrefix ){
1125    for(i2=1; i2<=n2; i2++){
1126      int b = m[szRow*i2-1];
1127      if( b<=res ){ 
1128        res = b;
1129        n = i2 - 1;
1130      }
1131    }
1132  }
1133  if( pnMatch ){
1134    int nExtra = 0;
1135    for(k=0; k<n; k++){
1136      if( (z2[k] & 0xc0)==0x80 ) nExtra++;
1137    }
1138    *pnMatch = n - nExtra;
1139  }
1140
1141editDist3Abort:
1142  for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns);
1143  sqlite3_free(pToFree);
1144  return res;
1145}
1146
1147/*
1148** Get an appropriate EditDist3Lang object.
1149*/
1150static const EditDist3Lang *editDist3FindLang(
1151  EditDist3Config *pConfig,
1152  int iLang
1153){
1154  int i;
1155  for(i=0; i<pConfig->nLang; i++){
1156    if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i];
1157  }
1158  return &editDist3Lang;
1159}
1160
1161/*
1162** Function:    editdist3(A,B,iLang)
1163**              editdist3(tablename)
1164**
1165** Return the cost of transforming string A into string B using edit
1166** weights for iLang.
1167**
1168** The second form loads edit weights into memory from a table.
1169*/
1170static void editDist3SqlFunc(
1171  sqlite3_context *context,
1172  int argc,
1173  sqlite3_value **argv
1174){
1175  EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context);
1176  sqlite3 *db = sqlite3_context_db_handle(context);
1177  int rc;
1178  if( argc==1 ){
1179    const char *zTable = (const char*)sqlite3_value_text(argv[0]);
1180    rc = editDist3ConfigLoad(pConfig, db, zTable);
1181    if( rc ) sqlite3_result_error_code(context, rc);
1182  }else{
1183    const char *zA = (const char*)sqlite3_value_text(argv[0]);
1184    const char *zB = (const char*)sqlite3_value_text(argv[1]);
1185    int nA = sqlite3_value_bytes(argv[0]);
1186    int nB = sqlite3_value_bytes(argv[1]);
1187    int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0;
1188    const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang);
1189    EditDist3FromString *pFrom;
1190    int dist;
1191
1192    pFrom = editDist3FromStringNew(pLang, zA, nA);
1193    if( pFrom==0 ){
1194      sqlite3_result_error_nomem(context);
1195      return;
1196    }
1197    dist = editDist3Core(pFrom, zB, nB, pLang, 0);
1198    editDist3FromStringDelete(pFrom);
1199    if( dist==(-1) ){
1200      sqlite3_result_error_nomem(context);
1201    }else{
1202      sqlite3_result_int(context, dist);
1203    }
1204  } 
1205}
1206
1207/*
1208** Register the editDist3 function with SQLite
1209*/
1210static int editDist3Install(sqlite3 *db){
1211  int rc;
1212  EditDist3Config *pConfig = sqlite3_malloc64( sizeof(*pConfig) );
1213  if( pConfig==0 ) return SQLITE_NOMEM;
1214  memset(pConfig, 0, sizeof(*pConfig));
1215  rc = sqlite3_create_function_v2(db, "editdist3",
1216              2, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1217              editDist3SqlFunc, 0, 0, 0);
1218  if( rc==SQLITE_OK ){
1219    rc = sqlite3_create_function_v2(db, "editdist3",
1220                3, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1221                editDist3SqlFunc, 0, 0, 0);
1222  }
1223  if( rc==SQLITE_OK ){
1224    rc = sqlite3_create_function_v2(db, "editdist3",
1225                1, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1226                editDist3SqlFunc, 0, 0, editDist3ConfigDelete);
1227  }else{
1228    sqlite3_free(pConfig);
1229  }
1230  return rc;
1231}
1232/* End configurable cost unicode edit distance routines
1233******************************************************************************
1234******************************************************************************
1235** Begin transliterate unicode-to-ascii implementation
1236*/
1237
1238#if !SQLITE_AMALGAMATION
1239/*
1240** This lookup table is used to help decode the first byte of
1241** a multi-byte UTF8 character.
1242*/
1243static const unsigned char sqlite3Utf8Trans1[] = {
1244  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1245  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1246  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1247  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1248  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1249  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1250  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1251  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
1252};
1253#endif
1254
1255/*
1256** Return the value of the first UTF-8 character in the string.
1257*/
1258static int utf8Read(const unsigned char *z, int n, int *pSize){
1259  int c, i;
1260
1261  /* All callers to this routine (in the current implementation)
1262  ** always have n>0. */
1263  if( NEVER(n==0) ){
1264    c = i = 0;
1265  }else{
1266    c = z[0];
1267    i = 1;
1268    if( c>=0xc0 ){
1269      c = sqlite3Utf8Trans1[c-0xc0];
1270      while( i<n && (z[i] & 0xc0)==0x80 ){
1271        c = (c<<6) + (0x3f & z[i++]);
1272      }
1273    }
1274  }
1275  *pSize = i;
1276  return c;
1277}
1278
1279/*
1280** Return the number of characters in the utf-8 string in the nIn byte
1281** buffer pointed to by zIn.
1282*/
1283static int utf8Charlen(const char *zIn, int nIn){
1284  int i;
1285  int nChar = 0;
1286  for(i=0; i<nIn; nChar++){
1287    int sz;
1288    utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1289    i += sz;
1290  }
1291  return nChar;
1292}
1293
1294typedef struct Transliteration Transliteration;
1295struct Transliteration {
1296 unsigned short int cFrom;
1297 unsigned char cTo0, cTo1, cTo2, cTo3;
1298#ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS
1299 unsigned char cTo4;
1300#endif
1301};
1302
1303/*
1304** Table of translations from unicode characters into ASCII.
1305*/
1306static const Transliteration translit[] = {
1307  { 0x00A0,  0x20, 0x00, 0x00, 0x00 },  /*   to   */
1308  { 0x00B5,  0x75, 0x00, 0x00, 0x00 },  /* µ to u */
1309  { 0x00C0,  0x41, 0x00, 0x00, 0x00 },  /* À to A */
1310  { 0x00C1,  0x41, 0x00, 0x00, 0x00 },  /* Á to A */
1311  { 0x00C2,  0x41, 0x00, 0x00, 0x00 },  /* Â to A */
1312  { 0x00C3,  0x41, 0x00, 0x00, 0x00 },  /* Ã to A */
1313  { 0x00C4,  0x41, 0x65, 0x00, 0x00 },  /* Ä to Ae */
1314  { 0x00C5,  0x41, 0x61, 0x00, 0x00 },  /* Å to Aa */
1315  { 0x00C6,  0x41, 0x45, 0x00, 0x00 },  /* Æ to AE */
1316  { 0x00C7,  0x43, 0x00, 0x00, 0x00 },  /* Ç to C */
1317  { 0x00C8,  0x45, 0x00, 0x00, 0x00 },  /* È to E */
1318  { 0x00C9,  0x45, 0x00, 0x00, 0x00 },  /* É to E */
1319  { 0x00CA,  0x45, 0x00, 0x00, 0x00 },  /* Ê to E */
1320  { 0x00CB,  0x45, 0x00, 0x00, 0x00 },  /* Ë to E */
1321  { 0x00CC,  0x49, 0x00, 0x00, 0x00 },  /* Ì to I */
1322  { 0x00CD,  0x49, 0x00, 0x00, 0x00 },  /* Í to I */
1323  { 0x00CE,  0x49, 0x00, 0x00, 0x00 },  /* Î to I */
1324  { 0x00CF,  0x49, 0x00, 0x00, 0x00 },  /* Ï to I */
1325  { 0x00D0,  0x44, 0x00, 0x00, 0x00 },  /* Ð to D */
1326  { 0x00D1,  0x4E, 0x00, 0x00, 0x00 },  /* Ñ to N */
1327  { 0x00D2,  0x4F, 0x00, 0x00, 0x00 },  /* Ò to O */
1328  { 0x00D3,  0x4F, 0x00, 0x00, 0x00 },  /* Ó to O */
1329  { 0x00D4,  0x4F, 0x00, 0x00, 0x00 },  /* Ô to O */
1330  { 0x00D5,  0x4F, 0x00, 0x00, 0x00 },  /* Õ to O */
1331  { 0x00D6,  0x4F, 0x65, 0x00, 0x00 },  /* Ö to Oe */
1332  { 0x00D7,  0x78, 0x00, 0x00, 0x00 },  /* × to x */
1333  { 0x00D8,  0x4F, 0x00, 0x00, 0x00 },  /* Ø to O */
1334  { 0x00D9,  0x55, 0x00, 0x00, 0x00 },  /* Ù to U */
1335  { 0x00DA,  0x55, 0x00, 0x00, 0x00 },  /* Ú to U */
1336  { 0x00DB,  0x55, 0x00, 0x00, 0x00 },  /* Û to U */
1337  { 0x00DC,  0x55, 0x65, 0x00, 0x00 },  /* Ü to Ue */
1338  { 0x00DD,  0x59, 0x00, 0x00, 0x00 },  /* Ý to Y */
1339  { 0x00DE,  0x54, 0x68, 0x00, 0x00 },  /* Þ to Th */
1340  { 0x00DF,  0x73, 0x73, 0x00, 0x00 },  /* ß to ss */
1341  { 0x00E0,  0x61, 0x00, 0x00, 0x00 },  /* à to a */
1342  { 0x00E1,  0x61, 0x00, 0x00, 0x00 },  /* á to a */
1343  { 0x00E2,  0x61, 0x00, 0x00, 0x00 },  /* â to a */
1344  { 0x00E3,  0x61, 0x00, 0x00, 0x00 },  /* ã to a */
1345  { 0x00E4,  0x61, 0x65, 0x00, 0x00 },  /* ä to ae */
1346  { 0x00E5,  0x61, 0x61, 0x00, 0x00 },  /* å to aa */
1347  { 0x00E6,  0x61, 0x65, 0x00, 0x00 },  /* æ to ae */
1348  { 0x00E7,  0x63, 0x00, 0x00, 0x00 },  /* ç to c */
1349  { 0x00E8,  0x65, 0x00, 0x00, 0x00 },  /* è to e */
1350  { 0x00E9,  0x65, 0x00, 0x00, 0x00 },  /* é to e */
1351  { 0x00EA,  0x65, 0x00, 0x00, 0x00 },  /* ê to e */
1352  { 0x00EB,  0x65, 0x00, 0x00, 0x00 },  /* ë to e */
1353  { 0x00EC,  0x69, 0x00, 0x00, 0x00 },  /* ì to i */
1354  { 0x00ED,  0x69, 0x00, 0x00, 0x00 },  /* í to i */
1355  { 0x00EE,  0x69, 0x00, 0x00, 0x00 },  /* î to i */
1356  { 0x00EF,  0x69, 0x00, 0x00, 0x00 },  /* ï to i */
1357  { 0x00F0,  0x64, 0x00, 0x00, 0x00 },  /* ð to d */
1358  { 0x00F1,  0x6E, 0x00, 0x00, 0x00 },  /* ñ to n */
1359  { 0x00F2,  0x6F, 0x00, 0x00, 0x00 },  /* ò to o */
1360  { 0x00F3,  0x6F, 0x00, 0x00, 0x00 },  /* ó to o */
1361  { 0x00F4,  0x6F, 0x00, 0x00, 0x00 },  /* ô to o */
1362  { 0x00F5,  0x6F, 0x00, 0x00, 0x00 },  /* õ to o */
1363  { 0x00F6,  0x6F, 0x65, 0x00, 0x00 },  /* ö to oe */
1364  { 0x00F7,  0x3A, 0x00, 0x00, 0x00 },  /* ÷ to : */
1365  { 0x00F8,  0x6F, 0x00, 0x00, 0x00 },  /* ø to o */
1366  { 0x00F9,  0x75, 0x00, 0x00, 0x00 },  /* ù to u */
1367  { 0x00FA,  0x75, 0x00, 0x00, 0x00 },  /* ú to u */
1368  { 0x00FB,  0x75, 0x00, 0x00, 0x00 },  /* û to u */
1369  { 0x00FC,  0x75, 0x65, 0x00, 0x00 },  /* ü to ue */
1370  { 0x00FD,  0x79, 0x00, 0x00, 0x00 },  /* ý to y */
1371  { 0x00FE,  0x74, 0x68, 0x00, 0x00 },  /* þ to th */
1372  { 0x00FF,  0x79, 0x00, 0x00, 0x00 },  /* ÿ to y */
1373  { 0x0100,  0x41, 0x00, 0x00, 0x00 },  /* Ā to A */
1374  { 0x0101,  0x61, 0x00, 0x00, 0x00 },  /* ā to a */
1375  { 0x0102,  0x41, 0x00, 0x00, 0x00 },  /* Ă to A */
1376  { 0x0103,  0x61, 0x00, 0x00, 0x00 },  /* ă to a */
1377  { 0x0104,  0x41, 0x00, 0x00, 0x00 },  /* Ą to A */
1378  { 0x0105,  0x61, 0x00, 0x00, 0x00 },  /* ą to a */
1379  { 0x0106,  0x43, 0x00, 0x00, 0x00 },  /* Ć to C */
1380  { 0x0107,  0x63, 0x00, 0x00, 0x00 },  /* ć to c */
1381  { 0x0108,  0x43, 0x68, 0x00, 0x00 },  /* Ĉ to Ch */
1382  { 0x0109,  0x63, 0x68, 0x00, 0x00 },  /* ĉ to ch */
1383  { 0x010A,  0x43, 0x00, 0x00, 0x00 },  /* Ċ to C */
1384  { 0x010B,  0x63, 0x00, 0x00, 0x00 },  /* ċ to c */
1385  { 0x010C,  0x43, 0x00, 0x00, 0x00 },  /* Č to C */
1386  { 0x010D,  0x63, 0x00, 0x00, 0x00 },  /* č to c */
1387  { 0x010E,  0x44, 0x00, 0x00, 0x00 },  /* Ď to D */
1388  { 0x010F,  0x64, 0x00, 0x00, 0x00 },  /* ď to d */
1389  { 0x0110,  0x44, 0x00, 0x00, 0x00 },  /* Đ to D */
1390  { 0x0111,  0x64, 0x00, 0x00, 0x00 },  /* đ to d */
1391  { 0x0112,  0x45, 0x00, 0x00, 0x00 },  /* Ē to E */
1392  { 0x0113,  0x65, 0x00, 0x00, 0x00 },  /* ē to e */
1393  { 0x0114,  0x45, 0x00, 0x00, 0x00 },  /* Ĕ to E */
1394  { 0x0115,  0x65, 0x00, 0x00, 0x00 },  /* ĕ to e */
1395  { 0x0116,  0x45, 0x00, 0x00, 0x00 },  /* Ė to E */
1396  { 0x0117,  0x65, 0x00, 0x00, 0x00 },  /* ė to e */
1397  { 0x0118,  0x45, 0x00, 0x00, 0x00 },  /* Ę to E */
1398  { 0x0119,  0x65, 0x00, 0x00, 0x00 },  /* ę to e */
1399  { 0x011A,  0x45, 0x00, 0x00, 0x00 },  /* Ě to E */
1400  { 0x011B,  0x65, 0x00, 0x00, 0x00 },  /* ě to e */
1401  { 0x011C,  0x47, 0x68, 0x00, 0x00 },  /* Ĝ to Gh */
1402  { 0x011D,  0x67, 0x68, 0x00, 0x00 },  /* ĝ to gh */
1403  { 0x011E,  0x47, 0x00, 0x00, 0x00 },  /* Ğ to G */
1404  { 0x011F,  0x67, 0x00, 0x00, 0x00 },  /* ğ to g */
1405  { 0x0120,  0x47, 0x00, 0x00, 0x00 },  /* Ġ to G */
1406  { 0x0121,  0x67, 0x00, 0x00, 0x00 },  /* ġ to g */
1407  { 0x0122,  0x47, 0x00, 0x00, 0x00 },  /* Ģ to G */
1408  { 0x0123,  0x67, 0x00, 0x00, 0x00 },  /* ģ to g */
1409  { 0x0124,  0x48, 0x68, 0x00, 0x00 },  /* Ĥ to Hh */
1410  { 0x0125,  0x68, 0x68, 0x00, 0x00 },  /* ĥ to hh */
1411  { 0x0126,  0x48, 0x00, 0x00, 0x00 },  /* Ħ to H */
1412  { 0x0127,  0x68, 0x00, 0x00, 0x00 },  /* ħ to h */
1413  { 0x0128,  0x49, 0x00, 0x00, 0x00 },  /* Ĩ to I */
1414  { 0x0129,  0x69, 0x00, 0x00, 0x00 },  /* ĩ to i */
1415  { 0x012A,  0x49, 0x00, 0x00, 0x00 },  /* Ī to I */
1416  { 0x012B,  0x69, 0x00, 0x00, 0x00 },  /* ī to i */
1417  { 0x012C,  0x49, 0x00, 0x00, 0x00 },  /* Ĭ to I */
1418  { 0x012D,  0x69, 0x00, 0x00, 0x00 },  /* ĭ to i */
1419  { 0x012E,  0x49, 0x00, 0x00, 0x00 },  /* Į to I */
1420  { 0x012F,  0x69, 0x00, 0x00, 0x00 },  /* į to i */
1421  { 0x0130,  0x49, 0x00, 0x00, 0x00 },  /* İ to I */
1422  { 0x0131,  0x69, 0x00, 0x00, 0x00 },  /* ı to i */
1423  { 0x0132,  0x49, 0x4A, 0x00, 0x00 },  /* IJ to IJ */
1424  { 0x0133,  0x69, 0x6A, 0x00, 0x00 },  /* ij to ij */
1425  { 0x0134,  0x4A, 0x68, 0x00, 0x00 },  /* Ĵ to Jh */
1426  { 0x0135,  0x6A, 0x68, 0x00, 0x00 },  /* ĵ to jh */
1427  { 0x0136,  0x4B, 0x00, 0x00, 0x00 },  /* Ķ to K */
1428  { 0x0137,  0x6B, 0x00, 0x00, 0x00 },  /* ķ to k */
1429  { 0x0138,  0x6B, 0x00, 0x00, 0x00 },  /* ĸ to k */
1430  { 0x0139,  0x4C, 0x00, 0x00, 0x00 },  /* Ĺ to L */
1431  { 0x013A,  0x6C, 0x00, 0x00, 0x00 },  /* ĺ to l */
1432  { 0x013B,  0x4C, 0x00, 0x00, 0x00 },  /* Ļ to L */
1433  { 0x013C,  0x6C, 0x00, 0x00, 0x00 },  /* ļ to l */
1434  { 0x013D,  0x4C, 0x00, 0x00, 0x00 },  /* Ľ to L */
1435  { 0x013E,  0x6C, 0x00, 0x00, 0x00 },  /* ľ to l */
1436  { 0x013F,  0x4C, 0x2E, 0x00, 0x00 },  /* Ŀ to L. */
1437  { 0x0140,  0x6C, 0x2E, 0x00, 0x00 },  /* ŀ to l. */
1438  { 0x0141,  0x4C, 0x00, 0x00, 0x00 },  /* Ł to L */
1439  { 0x0142,  0x6C, 0x00, 0x00, 0x00 },  /* ł to l */
1440  { 0x0143,  0x4E, 0x00, 0x00, 0x00 },  /* Ń to N */
1441  { 0x0144,  0x6E, 0x00, 0x00, 0x00 },  /* ń to n */
1442  { 0x0145,  0x4E, 0x00, 0x00, 0x00 },  /* Ņ to N */
1443  { 0x0146,  0x6E, 0x00, 0x00, 0x00 },  /* ņ to n */
1444  { 0x0147,  0x4E, 0x00, 0x00, 0x00 },  /* Ň to N */
1445  { 0x0148,  0x6E, 0x00, 0x00, 0x00 },  /* ň to n */
1446  { 0x0149,  0x27, 0x6E, 0x00, 0x00 },  /* ʼn to 'n */
1447  { 0x014A,  0x4E, 0x47, 0x00, 0x00 },  /* Ŋ to NG */
1448  { 0x014B,  0x6E, 0x67, 0x00, 0x00 },  /* ŋ to ng */
1449  { 0x014C,  0x4F, 0x00, 0x00, 0x00 },  /* Ō to O */
1450  { 0x014D,  0x6F, 0x00, 0x00, 0x00 },  /* ō to o */
1451  { 0x014E,  0x4F, 0x00, 0x00, 0x00 },  /* Ŏ to O */
1452  { 0x014F,  0x6F, 0x00, 0x00, 0x00 },  /* ŏ to o */
1453  { 0x0150,  0x4F, 0x00, 0x00, 0x00 },  /* Ő to O */
1454  { 0x0151,  0x6F, 0x00, 0x00, 0x00 },  /* ő to o */
1455  { 0x0152,  0x4F, 0x45, 0x00, 0x00 },  /* Œ to OE */
1456  { 0x0153,  0x6F, 0x65, 0x00, 0x00 },  /* œ to oe */
1457  { 0x0154,  0x52, 0x00, 0x00, 0x00 },  /* Ŕ to R */
1458  { 0x0155,  0x72, 0x00, 0x00, 0x00 },  /* ŕ to r */
1459  { 0x0156,  0x52, 0x00, 0x00, 0x00 },  /* Ŗ to R */
1460  { 0x0157,  0x72, 0x00, 0x00, 0x00 },  /* ŗ to r */
1461  { 0x0158,  0x52, 0x00, 0x00, 0x00 },  /* Ř to R */
1462  { 0x0159,  0x72, 0x00, 0x00, 0x00 },  /* ř to r */
1463  { 0x015A,  0x53, 0x00, 0x00, 0x00 },  /* Ś to S */
1464  { 0x015B,  0x73, 0x00, 0x00, 0x00 },  /* ś to s */
1465  { 0x015C,  0x53, 0x68, 0x00, 0x00 },  /* Ŝ to Sh */
1466  { 0x015D,  0x73, 0x68, 0x00, 0x00 },  /* ŝ to sh */
1467  { 0x015E,  0x53, 0x00, 0x00, 0x00 },  /* Ş to S */
1468  { 0x015F,  0x73, 0x00, 0x00, 0x00 },  /* ş to s */
1469  { 0x0160,  0x53, 0x00, 0x00, 0x00 },  /* Š to S */
1470  { 0x0161,  0x73, 0x00, 0x00, 0x00 },  /* š to s */
1471  { 0x0162,  0x54, 0x00, 0x00, 0x00 },  /* Ţ to T */
1472  { 0x0163,  0x74, 0x00, 0x00, 0x00 },  /* ţ to t */
1473  { 0x0164,  0x54, 0x00, 0x00, 0x00 },  /* Ť to T */
1474  { 0x0165,  0x74, 0x00, 0x00, 0x00 },  /* ť to t */
1475  { 0x0166,  0x54, 0x00, 0x00, 0x00 },  /* Ŧ to T */
1476  { 0x0167,  0x74, 0x00, 0x00, 0x00 },  /* ŧ to t */
1477  { 0x0168,  0x55, 0x00, 0x00, 0x00 },  /* Ũ to U */
1478  { 0x0169,  0x75, 0x00, 0x00, 0x00 },  /* ũ to u */
1479  { 0x016A,  0x55, 0x00, 0x00, 0x00 },  /* Ū to U */
1480  { 0x016B,  0x75, 0x00, 0x00, 0x00 },  /* ū to u */
1481  { 0x016C,  0x55, 0x00, 0x00, 0x00 },  /* Ŭ to U */
1482  { 0x016D,  0x75, 0x00, 0x00, 0x00 },  /* ŭ to u */
1483  { 0x016E,  0x55, 0x00, 0x00, 0x00 },  /* Ů to U */
1484  { 0x016F,  0x75, 0x00, 0x00, 0x00 },  /* ů to u */
1485  { 0x0170,  0x55, 0x00, 0x00, 0x00 },  /* Ű to U */
1486  { 0x0171,  0x75, 0x00, 0x00, 0x00 },  /* ű to u */
1487  { 0x0172,  0x55, 0x00, 0x00, 0x00 },  /* Ų to U */
1488  { 0x0173,  0x75, 0x00, 0x00, 0x00 },  /* ų to u */
1489  { 0x0174,  0x57, 0x00, 0x00, 0x00 },  /* Ŵ to W */
1490  { 0x0175,  0x77, 0x00, 0x00, 0x00 },  /* ŵ to w */
1491  { 0x0176,  0x59, 0x00, 0x00, 0x00 },  /* Ŷ to Y */
1492  { 0x0177,  0x79, 0x00, 0x00, 0x00 },  /* ŷ to y */
1493  { 0x0178,  0x59, 0x00, 0x00, 0x00 },  /* Ÿ to Y */
1494  { 0x0179,  0x5A, 0x00, 0x00, 0x00 },  /* Ź to Z */
1495  { 0x017A,  0x7A, 0x00, 0x00, 0x00 },  /* ź to z */
1496  { 0x017B,  0x5A, 0x00, 0x00, 0x00 },  /* Ż to Z */
1497  { 0x017C,  0x7A, 0x00, 0x00, 0x00 },  /* ż to z */
1498  { 0x017D,  0x5A, 0x00, 0x00, 0x00 },  /* Ž to Z */
1499  { 0x017E,  0x7A, 0x00, 0x00, 0x00 },  /* ž to z */
1500  { 0x017F,  0x73, 0x00, 0x00, 0x00 },  /* ſ to s */
1501  { 0x0192,  0x66, 0x00, 0x00, 0x00 },  /* ƒ to f */
1502  { 0x0218,  0x53, 0x00, 0x00, 0x00 },  /* Ș to S */
1503  { 0x0219,  0x73, 0x00, 0x00, 0x00 },  /* ș to s */
1504  { 0x021A,  0x54, 0x00, 0x00, 0x00 },  /* Ț to T */
1505  { 0x021B,  0x74, 0x00, 0x00, 0x00 },  /* ț to t */
1506  { 0x0386,  0x41, 0x00, 0x00, 0x00 },  /* Ά to A */
1507  { 0x0388,  0x45, 0x00, 0x00, 0x00 },  /* Έ to E */
1508  { 0x0389,  0x49, 0x00, 0x00, 0x00 },  /* Ή to I */
1509  { 0x038A,  0x49, 0x00, 0x00, 0x00 },  /* Ί to I */
1510  { 0x038C,  0x4f, 0x00, 0x00, 0x00 },  /* Ό to O */
1511  { 0x038E,  0x59, 0x00, 0x00, 0x00 },  /* Ύ to Y */
1512  { 0x038F,  0x4f, 0x00, 0x00, 0x00 },  /* Ώ to O */
1513  { 0x0390,  0x69, 0x00, 0x00, 0x00 },  /* ΐ to i */
1514  { 0x0391,  0x41, 0x00, 0x00, 0x00 },  /* Α to A */
1515  { 0x0392,  0x42, 0x00, 0x00, 0x00 },  /* Β to B */
1516  { 0x0393,  0x47, 0x00, 0x00, 0x00 },  /* Γ to G */
1517  { 0x0394,  0x44, 0x00, 0x00, 0x00 },  /* Δ to D */
1518  { 0x0395,  0x45, 0x00, 0x00, 0x00 },  /* Ε to E */
1519  { 0x0396,  0x5a, 0x00, 0x00, 0x00 },  /* Ζ to Z */
1520  { 0x0397,  0x49, 0x00, 0x00, 0x00 },  /* Η to I */
1521  { 0x0398,  0x54, 0x68, 0x00, 0x00 },  /* Θ to Th */
1522  { 0x0399,  0x49, 0x00, 0x00, 0x00 },  /* Ι to I */
1523  { 0x039A,  0x4b, 0x00, 0x00, 0x00 },  /* Κ to K */
1524  { 0x039B,  0x4c, 0x00, 0x00, 0x00 },  /* Λ to L */
1525  { 0x039C,  0x4d, 0x00, 0x00, 0x00 },  /* Μ to M */
1526  { 0x039D,  0x4e, 0x00, 0x00, 0x00 },  /* Ν to N */
1527  { 0x039E,  0x58, 0x00, 0x00, 0x00 },  /* Ξ to X */
1528  { 0x039F,  0x4f, 0x00, 0x00, 0x00 },  /* Ο to O */
1529  { 0x03A0,  0x50, 0x00, 0x00, 0x00 },  /* Π to P */
1530  { 0x03A1,  0x52, 0x00, 0x00, 0x00 },  /* Ρ to R */
1531  { 0x03A3,  0x53, 0x00, 0x00, 0x00 },  /* Σ to S */
1532  { 0x03A4,  0x54, 0x00, 0x00, 0x00 },  /* Τ to T */
1533  { 0x03A5,  0x59, 0x00, 0x00, 0x00 },  /* Υ to Y */
1534  { 0x03A6,  0x46, 0x00, 0x00, 0x00 },  /* Φ to F */
1535  { 0x03A7,  0x43, 0x68, 0x00, 0x00 },  /* Χ to Ch */
1536  { 0x03A8,  0x50, 0x73, 0x00, 0x00 },  /* Ψ to Ps */
1537  { 0x03A9,  0x4f, 0x00, 0x00, 0x00 },  /* Ω to O */
1538  { 0x03AA,  0x49, 0x00, 0x00, 0x00 },  /* Ϊ to I */
1539  { 0x03AB,  0x59, 0x00, 0x00, 0x00 },  /* Ϋ to Y */
1540  { 0x03AC,  0x61, 0x00, 0x00, 0x00 },  /* ά to a */
1541  { 0x03AD,  0x65, 0x00, 0x00, 0x00 },  /* έ to e */
1542  { 0x03AE,  0x69, 0x00, 0x00, 0x00 },  /* ή to i */
1543  { 0x03AF,  0x69, 0x00, 0x00, 0x00 },  /* ί to i */
1544  { 0x03B1,  0x61, 0x00, 0x00, 0x00 },  /* α to a */
1545  { 0x03B2,  0x62, 0x00, 0x00, 0x00 },  /* β to b */
1546  { 0x03B3,  0x67, 0x00, 0x00, 0x00 },  /* γ to g */
1547  { 0x03B4,  0x64, 0x00, 0x00, 0x00 },  /* δ to d */
1548  { 0x03B5,  0x65, 0x00, 0x00, 0x00 },  /* ε to e */
1549  { 0x03B6,  0x7a, 0x00, 0x00, 0x00 },  /* ζ to z */
1550  { 0x03B7,  0x69, 0x00, 0x00, 0x00 },  /* η to i */
1551  { 0x03B8,  0x74, 0x68, 0x00, 0x00 },  /* θ to th */
1552  { 0x03B9,  0x69, 0x00, 0x00, 0x00 },  /* ι to i */
1553  { 0x03BA,  0x6b, 0x00, 0x00, 0x00 },  /* κ to k */
1554  { 0x03BB,  0x6c, 0x00, 0x00, 0x00 },  /* λ to l */
1555  { 0x03BC,  0x6d, 0x00, 0x00, 0x00 },  /* μ to m */
1556  { 0x03BD,  0x6e, 0x00, 0x00, 0x00 },  /* ν to n */
1557  { 0x03BE,  0x78, 0x00, 0x00, 0x00 },  /* ξ to x */
1558  { 0x03BF,  0x6f, 0x00, 0x00, 0x00 },  /* ο to o */
1559  { 0x03C0,  0x70, 0x00, 0x00, 0x00 },  /* π to p */
1560  { 0x03C1,  0x72, 0x00, 0x00, 0x00 },  /* ρ to r */
1561  { 0x03C3,  0x73, 0x00, 0x00, 0x00 },  /* σ to s */
1562  { 0x03C4,  0x74, 0x00, 0x00, 0x00 },  /* τ to t */
1563  { 0x03C5,  0x79, 0x00, 0x00, 0x00 },  /* υ to y */
1564  { 0x03C6,  0x66, 0x00, 0x00, 0x00 },  /* φ to f */
1565  { 0x03C7,  0x63, 0x68, 0x00, 0x00 },  /* χ to ch */
1566  { 0x03C8,  0x70, 0x73, 0x00, 0x00 },  /* ψ to ps */
1567  { 0x03C9,  0x6f, 0x00, 0x00, 0x00 },  /* ω to o */
1568  { 0x03CA,  0x69, 0x00, 0x00, 0x00 },  /* ϊ to i */
1569  { 0x03CB,  0x79, 0x00, 0x00, 0x00 },  /* ϋ to y */
1570  { 0x03CC,  0x6f, 0x00, 0x00, 0x00 },  /* ό to o */
1571  { 0x03CD,  0x79, 0x00, 0x00, 0x00 },  /* ύ to y */
1572  { 0x03CE,  0x69, 0x00, 0x00, 0x00 },  /* ώ to i */
1573  { 0x0400,  0x45, 0x00, 0x00, 0x00 },  /* Ѐ to E */
1574  { 0x0401,  0x45, 0x00, 0x00, 0x00 },  /* Ё to E */
1575  { 0x0402,  0x44, 0x00, 0x00, 0x00 },  /* Ђ to D */
1576  { 0x0403,  0x47, 0x00, 0x00, 0x00 },  /* Ѓ to G */
1577  { 0x0404,  0x45, 0x00, 0x00, 0x00 },  /* Є to E */
1578  { 0x0405,  0x5a, 0x00, 0x00, 0x00 },  /* Ѕ to Z */
1579  { 0x0406,  0x49, 0x00, 0x00, 0x00 },  /* І to I */
1580  { 0x0407,  0x49, 0x00, 0x00, 0x00 },  /* Ї to I */
1581  { 0x0408,  0x4a, 0x00, 0x00, 0x00 },  /* Ј to J */
1582  { 0x0409,  0x49, 0x00, 0x00, 0x00 },  /* Љ to I */
1583  { 0x040A,  0x4e, 0x00, 0x00, 0x00 },  /* Њ to N */
1584  { 0x040B,  0x44, 0x00, 0x00, 0x00 },  /* Ћ to D */
1585  { 0x040C,  0x4b, 0x00, 0x00, 0x00 },  /* Ќ to K */
1586  { 0x040D,  0x49, 0x00, 0x00, 0x00 },  /* Ѝ to I */
1587  { 0x040E,  0x55, 0x00, 0x00, 0x00 },  /* Ў to U */
1588  { 0x040F,  0x44, 0x00, 0x00, 0x00 },  /* Џ to D */
1589  { 0x0410,  0x41, 0x00, 0x00, 0x00 },  /* А to A */
1590  { 0x0411,  0x42, 0x00, 0x00, 0x00 },  /* Б to B */
1591  { 0x0412,  0x56, 0x00, 0x00, 0x00 },  /* В to V */
1592  { 0x0413,  0x47, 0x00, 0x00, 0x00 },  /* Г to G */
1593  { 0x0414,  0x44, 0x00, 0x00, 0x00 },  /* Д to D */
1594  { 0x0415,  0x45, 0x00, 0x00, 0x00 },  /* Е to E */
1595  { 0x0416,  0x5a, 0x68, 0x00, 0x00 },  /* Ж to Zh */
1596  { 0x0417,  0x5a, 0x00, 0x00, 0x00 },  /* З to Z */
1597  { 0x0418,  0x49, 0x00, 0x00, 0x00 },  /* И to I */
1598  { 0x0419,  0x49, 0x00, 0x00, 0x00 },  /* Й to I */
1599  { 0x041A,  0x4b, 0x00, 0x00, 0x00 },  /* К to K */
1600  { 0x041B,  0x4c, 0x00, 0x00, 0x00 },  /* Л to L */
1601  { 0x041C,  0x4d, 0x00, 0x00, 0x00 },  /* М to M */
1602  { 0x041D,  0x4e, 0x00, 0x00, 0x00 },  /* Н to N */
1603  { 0x041E,  0x4f, 0x00, 0x00, 0x00 },  /* О to O */
1604  { 0x041F,  0x50, 0x00, 0x00, 0x00 },  /* П to P */
1605  { 0x0420,  0x52, 0x00, 0x00, 0x00 },  /* Р to R */
1606  { 0x0421,  0x53, 0x00, 0x00, 0x00 },  /* С to S */
1607  { 0x0422,  0x54, 0x00, 0x00, 0x00 },  /* Т to T */
1608  { 0x0423,  0x55, 0x00, 0x00, 0x00 },  /* У to U */
1609  { 0x0424,  0x46, 0x00, 0x00, 0x00 },  /* Ф to F */
1610  { 0x0425,  0x4b, 0x68, 0x00, 0x00 },  /* Х to Kh */
1611  { 0x0426,  0x54, 0x63, 0x00, 0x00 },  /* Ц to Tc */
1612  { 0x0427,  0x43, 0x68, 0x00, 0x00 },  /* Ч to Ch */
1613  { 0x0428,  0x53, 0x68, 0x00, 0x00 },  /* Ш to Sh */
1614  { 0x0429,  0x53, 0x68, 0x63, 0x68 },  /* Щ to Shch */
1615  { 0x042A,  0x61, 0x00, 0x00, 0x00 },  /*  to A */
1616  { 0x042B,  0x59, 0x00, 0x00, 0x00 },  /* Ы to Y */
1617  { 0x042C,  0x59, 0x00, 0x00, 0x00 },  /*  to Y */
1618  { 0x042D,  0x45, 0x00, 0x00, 0x00 },  /* Э to E */
1619  { 0x042E,  0x49, 0x75, 0x00, 0x00 },  /* Ю to Iu */
1620  { 0x042F,  0x49, 0x61, 0x00, 0x00 },  /* Я to Ia */
1621  { 0x0430,  0x61, 0x00, 0x00, 0x00 },  /* а to a */
1622  { 0x0431,  0x62, 0x00, 0x00, 0x00 },  /* б to b */
1623  { 0x0432,  0x76, 0x00, 0x00, 0x00 },  /* в to v */
1624  { 0x0433,  0x67, 0x00, 0x00, 0x00 },  /* г to g */
1625  { 0x0434,  0x64, 0x00, 0x00, 0x00 },  /* д to d */
1626  { 0x0435,  0x65, 0x00, 0x00, 0x00 },  /* е to e */
1627  { 0x0436,  0x7a, 0x68, 0x00, 0x00 },  /* ж to zh */
1628  { 0x0437,  0x7a, 0x00, 0x00, 0x00 },  /* з to z */
1629  { 0x0438,  0x69, 0x00, 0x00, 0x00 },  /* и to i */
1630  { 0x0439,  0x69, 0x00, 0x00, 0x00 },  /* й to i */
1631  { 0x043A,  0x6b, 0x00, 0x00, 0x00 },  /* к to k */
1632  { 0x043B,  0x6c, 0x00, 0x00, 0x00 },  /* л to l */
1633  { 0x043C,  0x6d, 0x00, 0x00, 0x00 },  /* м to m */
1634  { 0x043D,  0x6e, 0x00, 0x00, 0x00 },  /* н to n */
1635  { 0x043E,  0x6f, 0x00, 0x00, 0x00 },  /* о to o */
1636  { 0x043F,  0x70, 0x00, 0x00, 0x00 },  /* п to p */
1637  { 0x0440,  0x72, 0x00, 0x00, 0x00 },  /* р to r */
1638  { 0x0441,  0x73, 0x00, 0x00, 0x00 },  /* с to s */
1639  { 0x0442,  0x74, 0x00, 0x00, 0x00 },  /* т to t */
1640  { 0x0443,  0x75, 0x00, 0x00, 0x00 },  /* у to u */
1641  { 0x0444,  0x66, 0x00, 0x00, 0x00 },  /* ф to f */
1642  { 0x0445,  0x6b, 0x68, 0x00, 0x00 },  /* х to kh */
1643  { 0x0446,  0x74, 0x63, 0x00, 0x00 },  /* ц to tc */
1644  { 0x0447,  0x63, 0x68, 0x00, 0x00 },  /* ч to ch */
1645  { 0x0448,  0x73, 0x68, 0x00, 0x00 },  /* ш to sh */
1646  { 0x0449,  0x73, 0x68, 0x63, 0x68 },  /* щ to shch */
1647  { 0x044A,  0x61, 0x00, 0x00, 0x00 },  /*  to a */
1648  { 0x044B,  0x79, 0x00, 0x00, 0x00 },  /* ы to y */
1649  { 0x044C,  0x79, 0x00, 0x00, 0x00 },  /*  to y */
1650  { 0x044D,  0x65, 0x00, 0x00, 0x00 },  /* э to e */
1651  { 0x044E,  0x69, 0x75, 0x00, 0x00 },  /* ю to iu */
1652  { 0x044F,  0x69, 0x61, 0x00, 0x00 },  /* я to ia */
1653  { 0x0450,  0x65, 0x00, 0x00, 0x00 },  /* ѐ to e */
1654  { 0x0451,  0x65, 0x00, 0x00, 0x00 },  /* ё to e */
1655  { 0x0452,  0x64, 0x00, 0x00, 0x00 },  /* ђ to d */
1656  { 0x0453,  0x67, 0x00, 0x00, 0x00 },  /* ѓ to g */
1657  { 0x0454,  0x65, 0x00, 0x00, 0x00 },  /* є to e */
1658  { 0x0455,  0x7a, 0x00, 0x00, 0x00 },  /* ѕ to z */
1659  { 0x0456,  0x69, 0x00, 0x00, 0x00 },  /* і to i */
1660  { 0x0457,  0x69, 0x00, 0x00, 0x00 },  /* ї to i */
1661  { 0x0458,  0x6a, 0x00, 0x00, 0x00 },  /* ј to j */
1662  { 0x0459,  0x69, 0x00, 0x00, 0x00 },  /* љ to i */
1663  { 0x045A,  0x6e, 0x00, 0x00, 0x00 },  /* њ to n */
1664  { 0x045B,  0x64, 0x00, 0x00, 0x00 },  /* ћ to d */
1665  { 0x045C,  0x6b, 0x00, 0x00, 0x00 },  /* ќ to k */
1666  { 0x045D,  0x69, 0x00, 0x00, 0x00 },  /* ѝ to i */
1667  { 0x045E,  0x75, 0x00, 0x00, 0x00 },  /* ў to u */
1668  { 0x045F,  0x64, 0x00, 0x00, 0x00 },  /* џ to d */
1669  { 0x1E02,  0x42, 0x00, 0x00, 0x00 },  /* Ḃ to B */
1670  { 0x1E03,  0x62, 0x00, 0x00, 0x00 },  /* ḃ to b */
1671  { 0x1E0A,  0x44, 0x00, 0x00, 0x00 },  /* Ḋ to D */
1672  { 0x1E0B,  0x64, 0x00, 0x00, 0x00 },  /* ḋ to d */
1673  { 0x1E1E,  0x46, 0x00, 0x00, 0x00 },  /* Ḟ to F */
1674  { 0x1E1F,  0x66, 0x00, 0x00, 0x00 },  /* ḟ to f */
1675  { 0x1E40,  0x4D, 0x00, 0x00, 0x00 },  /* Ṁ to M */
1676  { 0x1E41,  0x6D, 0x00, 0x00, 0x00 },  /* ṁ to m */
1677  { 0x1E56,  0x50, 0x00, 0x00, 0x00 },  /* Ṗ to P */
1678  { 0x1E57,  0x70, 0x00, 0x00, 0x00 },  /* ṗ to p */
1679  { 0x1E60,  0x53, 0x00, 0x00, 0x00 },  /* Ṡ to S */
1680  { 0x1E61,  0x73, 0x00, 0x00, 0x00 },  /* ṡ to s */
1681  { 0x1E6A,  0x54, 0x00, 0x00, 0x00 },  /* Ṫ to T */
1682  { 0x1E6B,  0x74, 0x00, 0x00, 0x00 },  /* ṫ to t */
1683  { 0x1E80,  0x57, 0x00, 0x00, 0x00 },  /* Ẁ to W */
1684  { 0x1E81,  0x77, 0x00, 0x00, 0x00 },  /* ẁ to w */
1685  { 0x1E82,  0x57, 0x00, 0x00, 0x00 },  /* Ẃ to W */
1686  { 0x1E83,  0x77, 0x00, 0x00, 0x00 },  /* ẃ to w */
1687  { 0x1E84,  0x57, 0x00, 0x00, 0x00 },  /* Ẅ to W */
1688  { 0x1E85,  0x77, 0x00, 0x00, 0x00 },  /* ẅ to w */
1689  { 0x1EF2,  0x59, 0x00, 0x00, 0x00 },  /* Ỳ to Y */
1690  { 0x1EF3,  0x79, 0x00, 0x00, 0x00 },  /* ỳ to y */
1691  { 0xFB00,  0x66, 0x66, 0x00, 0x00 },  /* ff to ff */
1692  { 0xFB01,  0x66, 0x69, 0x00, 0x00 },  /* fi to fi */
1693  { 0xFB02,  0x66, 0x6C, 0x00, 0x00 },  /* fl to fl */
1694  { 0xFB05,  0x73, 0x74, 0x00, 0x00 },  /* ſt to st */
1695  { 0xFB06,  0x73, 0x74, 0x00, 0x00 },  /* st to st */
1696};
1697
1698static const Transliteration *spellfixFindTranslit(int c, int *pxTop){
1699  *pxTop = (sizeof(translit)/sizeof(translit[0])) - 1;
1700  return translit;
1701}
1702
1703/*
1704** Convert the input string from UTF-8 into pure ASCII by converting
1705** all non-ASCII characters to some combination of characters in the
1706** ASCII subset.
1707**
1708** The returned string might contain more characters than the input.
1709**
1710** Space to hold the returned string comes from sqlite3_malloc() and
1711** should be freed by the caller.
1712*/
1713static unsigned char *transliterate(const unsigned char *zIn, int nIn){
1714#ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS
1715  unsigned char *zOut = sqlite3_malloc64( nIn*5 + 1 );
1716#else
1717  unsigned char *zOut = sqlite3_malloc64( nIn*4 + 1 );
1718#endif
1719  int c, sz, nOut;
1720  if( zOut==0 ) return 0;
1721  nOut = 0;
1722  while( nIn>0 ){
1723    c = utf8Read(zIn, nIn, &sz);
1724    zIn += sz;
1725    nIn -= sz;
1726    if( c<=127 ){
1727      zOut[nOut++] = (unsigned char)c;
1728    }else{
1729      int xTop, xBtm, x;
1730      const Transliteration *tbl = spellfixFindTranslit(c, &xTop);
1731      xBtm = 0;
1732      while( xTop>=xBtm ){
1733        x = (xTop + xBtm)/2;
1734        if( tbl[x].cFrom==c ){
1735          zOut[nOut++] = tbl[x].cTo0;
1736          if( tbl[x].cTo1 ){
1737            zOut[nOut++] = tbl[x].cTo1;
1738            if( tbl[x].cTo2 ){
1739              zOut[nOut++] = tbl[x].cTo2;
1740              if( tbl[x].cTo3 ){
1741                zOut[nOut++] = tbl[x].cTo3;
1742#ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS
1743                if( tbl[x].cTo4 ){
1744                  zOut[nOut++] = tbl[x].cTo4;
1745                }
1746#endif /* SQLITE_SPELLFIX_5BYTE_MAPPINGS */
1747              }
1748            }
1749          }
1750          c = 0;
1751          break;
1752        }else if( tbl[x].cFrom>c ){
1753          xTop = x-1;
1754        }else{
1755          xBtm = x+1;
1756        }
1757      }
1758      if( c ) zOut[nOut++] = '?';
1759    }
1760  }
1761  zOut[nOut] = 0;
1762  return zOut;
1763}
1764
1765/*
1766** Return the number of characters in the shortest prefix of the input
1767** string that transliterates to an ASCII string nTrans bytes or longer.
1768** Or, if the transliteration of the input string is less than nTrans
1769** bytes in size, return the number of characters in the input string.
1770*/
1771static int translen_to_charlen(const char *zIn, int nIn, int nTrans){
1772  int i, c, sz, nOut;
1773  int nChar;
1774
1775  i = nOut = 0;
1776  for(nChar=0; i<nIn && nOut<nTrans; nChar++){
1777    c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1778    i += sz;
1779
1780    nOut++;
1781    if( c>=128 ){
1782      int xTop, xBtm, x;
1783      const Transliteration *tbl = spellfixFindTranslit(c, &xTop);
1784      xBtm = 0;
1785      while( xTop>=xBtm ){
1786        x = (xTop + xBtm)/2;
1787        if( tbl[x].cFrom==c ){
1788          if( tbl[x].cTo1 ){
1789            nOut++;
1790            if( tbl[x].cTo2 ){
1791              nOut++;
1792              if( tbl[x].cTo3 ){
1793                nOut++;
1794              }
1795            }
1796          }
1797          break;
1798        }else if( tbl[x].cFrom>c ){
1799          xTop = x-1;
1800        }else{
1801          xBtm = x+1;
1802        }
1803      }
1804    }
1805  }
1806
1807  return nChar;
1808}
1809
1810
1811/*
1812**    spellfix1_translit(X)
1813**
1814** Convert a string that contains non-ASCII Roman characters into 
1815** pure ASCII.
1816*/
1817static void transliterateSqlFunc(
1818  sqlite3_context *context,
1819  int argc,
1820  sqlite3_value **argv
1821){
1822  const unsigned char *zIn = sqlite3_value_text(argv[0]);
1823  int nIn = sqlite3_value_bytes(argv[0]);
1824  unsigned char *zOut = transliterate(zIn, nIn);
1825  if( zOut==0 ){
1826    sqlite3_result_error_nomem(context);
1827  }else{
1828    sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
1829  }
1830}
1831
1832/*
1833**    spellfix1_scriptcode(X)
1834**
1835** Try to determine the dominant script used by the word X and return
1836** its ISO 15924 numeric code.
1837**
1838** The current implementation only understands the following scripts:
1839**
1840**    215  (Latin)
1841**    220  (Cyrillic)
1842**    200  (Greek)
1843**
1844** This routine will return 998 if the input X contains characters from
1845** two or more of the above scripts or 999 if X contains no characters
1846** from any of the above scripts.
1847*/
1848static void scriptCodeSqlFunc(
1849  sqlite3_context *context,
1850  int argc,
1851  sqlite3_value **argv
1852){
1853  const unsigned char *zIn = sqlite3_value_text(argv[0]);
1854  int nIn = sqlite3_value_bytes(argv[0]);
1855  int c, sz;
1856  int scriptMask = 0;
1857  int res;
1858  int seenDigit = 0;
1859# define SCRIPT_LATIN       0x0001
1860# define SCRIPT_CYRILLIC    0x0002
1861# define SCRIPT_GREEK       0x0004
1862# define SCRIPT_HEBREW      0x0008
1863# define SCRIPT_ARABIC      0x0010
1864
1865  while( nIn>0 ){
1866    c = utf8Read(zIn, nIn, &sz);
1867    zIn += sz;
1868    nIn -= sz;
1869    if( c<0x02af ){
1870      if( c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT ){
1871        scriptMask |= SCRIPT_LATIN;
1872      }else if( c>='0' && c<='9' ){
1873        seenDigit = 1;
1874      }
1875    }else if( c>=0x0400 && c<=0x04ff ){
1876      scriptMask |= SCRIPT_CYRILLIC;
1877    }else if( c>=0x0386 && c<=0x03ce ){
1878      scriptMask |= SCRIPT_GREEK;
1879    }else if( c>=0x0590 && c<=0x05ff ){
1880      scriptMask |= SCRIPT_HEBREW;
1881    }else if( c>=0x0600 && c<=0x06ff ){
1882      scriptMask |= SCRIPT_ARABIC;
1883    }
1884  }
1885  if( scriptMask==0 && seenDigit ) scriptMask = SCRIPT_LATIN;
1886  switch( scriptMask ){
1887    case 0:                res = 999; break;
1888    case SCRIPT_LATIN:     res = 215; break;
1889    case SCRIPT_CYRILLIC:  res = 220; break;
1890    case SCRIPT_GREEK:     res = 200; break;
1891    case SCRIPT_HEBREW:    res = 125; break;
1892    case SCRIPT_ARABIC:    res = 160; break;
1893    default:               res = 998; break;
1894  }
1895  sqlite3_result_int(context, res);
1896}
1897
1898/* End transliterate
1899******************************************************************************
1900******************************************************************************
1901** Begin spellfix1 virtual table.
1902*/
1903
1904/* Maximum length of a phonehash used for querying the shadow table */
1905#define SPELLFIX_MX_HASH  32
1906
1907/* Maximum number of hash strings to examine per query */
1908#define SPELLFIX_MX_RUN   1
1909
1910typedef struct spellfix1_vtab spellfix1_vtab;
1911typedef struct spellfix1_cursor spellfix1_cursor;
1912
1913/* Fuzzy-search virtual table object */
1914struct spellfix1_vtab {
1915  sqlite3_vtab base;         /* Base class - must be first */
1916  sqlite3 *db;               /* Database connection */
1917  char *zDbName;             /* Name of database holding this table */
1918  char *zTableName;          /* Name of the virtual table */
1919  char *zCostTable;          /* Table holding edit-distance cost numbers */
1920  EditDist3Config *pConfig3; /* Parsed edit distance costs */
1921};
1922
1923/* Fuzzy-search cursor object */
1924struct spellfix1_cursor {
1925  sqlite3_vtab_cursor base;    /* Base class - must be first */
1926  spellfix1_vtab *pVTab;       /* The table to which this cursor belongs */
1927  char *zPattern;              /* rhs of MATCH clause */
1928  int idxNum;                  /* idxNum value passed to xFilter() */
1929  int nRow;                    /* Number of rows of content */
1930  int nAlloc;                  /* Number of allocated rows */
1931  int iRow;                    /* Current row of content */
1932  int iLang;                   /* Value of the langid= constraint */
1933  int iTop;                    /* Value of the top= constraint */
1934  int iScope;                  /* Value of the scope= constraint */
1935  int nSearch;                 /* Number of vocabulary items checked */
1936  sqlite3_stmt *pFullScan;     /* Shadow query for a full table scan */
1937  struct spellfix1_row {       /* For each row of content */
1938    sqlite3_int64 iRowid;         /* Rowid for this row */
1939    char *zWord;                  /* Text for this row */
1940    int iRank;                    /* Rank for this row */
1941    int iDistance;                /* Distance from pattern for this row */
1942    int iScore;                   /* Score for sorting */
1943    int iMatchlen;                /* Value of matchlen column (or -1) */
1944    char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */
1945  } *a; 
1946};
1947
1948/*
1949** Construct one or more SQL statements from the format string given
1950** and then evaluate those statements. The success code is written
1951** into *pRc.
1952**
1953** If *pRc is initially non-zero then this routine is a no-op.
1954*/
1955static void spellfix1DbExec(
1956  int *pRc,              /* Success code */
1957  sqlite3 *db,           /* Database in which to run SQL */
1958  const char *zFormat,   /* Format string for SQL */
1959  ...                    /* Arguments to the format string */
1960){
1961  va_list ap;
1962  char *zSql;
1963  if( *pRc ) return;
1964  va_start(ap, zFormat);
1965  zSql = sqlite3_vmprintf(zFormat, ap);
1966  va_end(ap);
1967  if( zSql==0 ){
1968    *pRc = SQLITE_NOMEM;
1969  }else{
1970    *pRc = sqlite3_exec(db, zSql, 0, 0, 0);
1971    sqlite3_free(zSql);
1972  }
1973}
1974
1975/*
1976** xDisconnect/xDestroy method for the fuzzy-search module.
1977*/
1978static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab){
1979  spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
1980  int rc = SQLITE_OK;
1981  if( isDestroy ){
1982    sqlite3 *db = p->db;
1983    spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"",
1984                  p->zDbName, p->zTableName);
1985  }
1986  if( rc==SQLITE_OK ){
1987    sqlite3_free(p->zTableName);
1988    editDist3ConfigDelete(p->pConfig3);
1989    sqlite3_free(p->zCostTable);
1990    sqlite3_free(p);
1991  }
1992  return rc;
1993}
1994static int spellfix1Disconnect(sqlite3_vtab *pVTab){
1995  return spellfix1Uninit(0, pVTab);
1996}
1997static int spellfix1Destroy(sqlite3_vtab *pVTab){
1998  return spellfix1Uninit(1, pVTab);
1999}
2000
2001/*
2002** Make a copy of a string.  Remove leading and trailing whitespace
2003** and dequote it.
2004*/
2005static char *spellfix1Dequote(const char *zIn){
2006  char *zOut;
2007  int i, j;
2008  char c;
2009  while( isspace((unsigned char)zIn[0]) ) zIn++;
2010  zOut = sqlite3_mprintf("%s", zIn);
2011  if( zOut==0 ) return 0;
2012  i = (int)strlen(zOut);
2013#if 0  /* The parser will never leave spaces at the end */
2014  while( i>0 && isspace(zOut[i-1]) ){ i--; }
2015#endif
2016  zOut[i] = 0;
2017  c = zOut[0];
2018  if( c=='\'' || c=='"' ){
2019    for(i=1, j=0; ALWAYS(zOut[i]); i++){
2020      zOut[j++] = zOut[i];
2021      if( zOut[i]==c ){
2022        if( zOut[i+1]==c ){
2023          i++;
2024        }else{
2025          zOut[j-1] = 0;
2026          break;
2027        }
2028      }
2029    }
2030  }
2031  return zOut;
2032}
2033
2034
2035/*
2036** xConnect/xCreate method for the spellfix1 module. Arguments are:
2037**
2038**   argv[0]   -> module name  ("spellfix1")
2039**   argv[1]   -> database name
2040**   argv[2]   -> table name
2041**   argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter)
2042*/
2043static int spellfix1Init(
2044  int isCreate,
2045  sqlite3 *db,
2046  void *pAux,
2047  int argc, const char *const*argv,
2048  sqlite3_vtab **ppVTab,
2049  char **pzErr
2050){
2051  spellfix1_vtab *pNew = 0;
2052  /* const char *zModule = argv[0]; // not used */
2053  const char *zDbName = argv[1];
2054  const char *zTableName = argv[2];
2055  int nDbName;
2056  int rc = SQLITE_OK;
2057  int i;
2058
2059  nDbName = (int)strlen(zDbName);
2060  pNew = sqlite3_malloc64( sizeof(*pNew) + nDbName + 1);
2061  if( pNew==0 ){
2062    rc = SQLITE_NOMEM;
2063  }else{
2064    memset(pNew, 0, sizeof(*pNew));
2065    pNew->zDbName = (char*)&pNew[1];
2066    memcpy(pNew->zDbName, zDbName, nDbName+1);
2067    pNew->zTableName = sqlite3_mprintf("%s", zTableName);
2068    pNew->db = db;
2069    if( pNew->zTableName==0 ){
2070      rc = SQLITE_NOMEM;
2071    }else{
2072      sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS);
2073      rc = sqlite3_declare_vtab(db, 
2074           "CREATE TABLE x(word,rank,distance,langid, "
2075           "score, matchlen, phonehash HIDDEN, "
2076           "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, "
2077           "soundslike HIDDEN, command HIDDEN)"
2078      );
2079#define SPELLFIX_COL_WORD            0
2080#define SPELLFIX_COL_RANK            1
2081#define SPELLFIX_COL_DISTANCE        2
2082#define SPELLFIX_COL_LANGID          3
2083#define SPELLFIX_COL_SCORE           4
2084#define SPELLFIX_COL_MATCHLEN        5
2085#define SPELLFIX_COL_PHONEHASH       6
2086#define SPELLFIX_COL_TOP             7
2087#define SPELLFIX_COL_SCOPE           8
2088#define SPELLFIX_COL_SRCHCNT         9
2089#define SPELLFIX_COL_SOUNDSLIKE     10
2090#define SPELLFIX_COL_COMMAND        11
2091    }
2092    if( rc==SQLITE_OK && isCreate ){
2093      spellfix1DbExec(&rc, db,
2094         "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n"
2095         "  id INTEGER PRIMARY KEY,\n"
2096         "  rank INT,\n"
2097         "  langid INT,\n"
2098         "  word TEXT,\n"
2099         "  k1 TEXT,\n"
2100         "  k2 TEXT\n"
2101         ");\n",
2102         zDbName, zTableName
2103      );
2104      spellfix1DbExec(&rc, db,
2105         "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_vocab_index_langid_k2\" "
2106            "ON \"%w_vocab\"(langid,k2);",
2107         zDbName, zTableName, zTableName
2108      );
2109    }
2110    for(i=3; rc==SQLITE_OK && i<argc; i++){
2111      if( strncmp(argv[i],"edit_cost_table=",16)==0 && pNew->zCostTable==0 ){
2112        pNew->zCostTable = spellfix1Dequote(&argv[i][16]);
2113        if( pNew->zCostTable==0 ) rc = SQLITE_NOMEM;
2114        continue;
2115      }
2116      *pzErr = sqlite3_mprintf("bad argument to spellfix1(): \"%s\"", argv[i]);
2117      rc = SQLITE_ERROR; 
2118    }
2119  }
2120
2121  if( rc && pNew ){
2122    *ppVTab = 0;
2123    spellfix1Uninit(0, &pNew->base);
2124  }else{
2125    *ppVTab = (sqlite3_vtab *)pNew;
2126  }
2127  return rc;
2128}
2129
2130/*
2131** The xConnect and xCreate methods
2132*/
2133static int spellfix1Connect(
2134  sqlite3 *db,
2135  void *pAux,
2136  int argc, const char *const*argv,
2137  sqlite3_vtab **ppVTab,
2138  char **pzErr
2139){
2140  return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr);
2141}
2142static int spellfix1Create(
2143  sqlite3 *db,
2144  void *pAux,
2145  int argc, const char *const*argv,
2146  sqlite3_vtab **ppVTab,
2147  char **pzErr
2148){
2149  return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr);
2150}
2151
2152/*
2153** Clear all of the content from a cursor.
2154*/
2155static void spellfix1ResetCursor(spellfix1_cursor *pCur){
2156  int i;
2157  for(i=0; i<pCur->nRow; i++){
2158    sqlite3_free(pCur->a[i].zWord);
2159  }
2160  pCur->nRow = 0;
2161  pCur->iRow = 0;
2162  pCur->nSearch = 0;
2163  if( pCur->pFullScan ){
2164    sqlite3_finalize(pCur->pFullScan);
2165    pCur->pFullScan = 0;
2166  }
2167}
2168
2169/*
2170** Resize the cursor to hold up to N rows of content
2171*/
2172static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){
2173  struct spellfix1_row *aNew;
2174  assert( N>=pCur->nRow );
2175  aNew = sqlite3_realloc64(pCur->a, sizeof(pCur->a[0])*N);
2176  if( aNew==0 && N>0 ){
2177    spellfix1ResetCursor(pCur);
2178    sqlite3_free(pCur->a);
2179    pCur->nAlloc = 0;
2180    pCur->a = 0;
2181  }else{
2182    pCur->nAlloc = N;
2183    pCur->a = aNew;
2184  }
2185}
2186
2187
2188/*
2189** Close a fuzzy-search cursor.
2190*/
2191static int spellfix1Close(sqlite3_vtab_cursor *cur){
2192  spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2193  spellfix1ResetCursor(pCur);
2194  spellfix1ResizeCursor(pCur, 0);
2195  sqlite3_free(pCur->zPattern);
2196  sqlite3_free(pCur);
2197  return SQLITE_OK;
2198}
2199
2200#define SPELLFIX_IDXNUM_MATCH  0x01         /* word MATCH $str */
2201#define SPELLFIX_IDXNUM_LANGID 0x02         /* langid == $langid */
2202#define SPELLFIX_IDXNUM_TOP    0x04         /* top = $top */
2203#define SPELLFIX_IDXNUM_SCOPE  0x08         /* scope = $scope */
2204#define SPELLFIX_IDXNUM_DISTLT 0x10         /* distance < $distance */
2205#define SPELLFIX_IDXNUM_DISTLE 0x20         /* distance <= $distance */
2206#define SPELLFIX_IDXNUM_ROWID  0x40         /* rowid = $rowid */
2207#define SPELLFIX_IDXNUM_DIST   (0x10|0x20)  /* DISTLT and DISTLE */
2208
2209/*
2210**
2211** The plan number is a bitmask of the SPELLFIX_IDXNUM_* values defined
2212** above.
2213**
2214** filter.argv[*] values contains $str, $langid, $top, $scope and $rowid
2215** if specified and in that order.
2216*/
2217static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
2218  int iPlan = 0;
2219  int iLangTerm = -1;
2220  int iTopTerm = -1;
2221  int iScopeTerm = -1;
2222  int iDistTerm = -1;
2223  int iRowidTerm = -1;
2224  int i;
2225  const struct sqlite3_index_constraint *pConstraint;
2226  pConstraint = pIdxInfo->aConstraint;
2227  for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
2228    if( pConstraint->usable==0 ) continue;
2229
2230    /* Terms of the form:  word MATCH $str */
2231    if( (iPlan & SPELLFIX_IDXNUM_MATCH)==0 
2232     && pConstraint->iColumn==SPELLFIX_COL_WORD
2233     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
2234    ){
2235      iPlan |= SPELLFIX_IDXNUM_MATCH;
2236      pIdxInfo->aConstraintUsage[i].argvIndex = 1;
2237      pIdxInfo->aConstraintUsage[i].omit = 1;
2238    }
2239
2240    /* Terms of the form:  langid = $langid  */
2241    if( (iPlan & SPELLFIX_IDXNUM_LANGID)==0
2242     && pConstraint->iColumn==SPELLFIX_COL_LANGID
2243     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2244    ){
2245      iPlan |= SPELLFIX_IDXNUM_LANGID;
2246      iLangTerm = i;
2247    }
2248
2249    /* Terms of the form:  top = $top */
2250    if( (iPlan & SPELLFIX_IDXNUM_TOP)==0
2251     && pConstraint->iColumn==SPELLFIX_COL_TOP
2252     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2253    ){
2254      iPlan |= SPELLFIX_IDXNUM_TOP;
2255      iTopTerm = i;
2256    }
2257
2258    /* Terms of the form:  scope = $scope */
2259    if( (iPlan & SPELLFIX_IDXNUM_SCOPE)==0
2260     && pConstraint->iColumn==SPELLFIX_COL_SCOPE
2261     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2262    ){
2263      iPlan |= SPELLFIX_IDXNUM_SCOPE;
2264      iScopeTerm = i;
2265    }
2266
2267    /* Terms of the form:  distance < $dist or distance <= $dist */
2268    if( (iPlan & SPELLFIX_IDXNUM_DIST)==0
2269     && pConstraint->iColumn==SPELLFIX_COL_DISTANCE
2270     && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
2271          || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
2272    ){
2273      if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ){
2274        iPlan |= SPELLFIX_IDXNUM_DISTLT;
2275      }else{
2276        iPlan |= SPELLFIX_IDXNUM_DISTLE;
2277      }
2278      iDistTerm = i;
2279    }
2280
2281    /* Terms of the form:  distance < $dist or distance <= $dist */
2282    if( (iPlan & SPELLFIX_IDXNUM_ROWID)==0
2283     && pConstraint->iColumn<0
2284     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2285    ){
2286      iPlan |= SPELLFIX_IDXNUM_ROWID;
2287      iRowidTerm = i;
2288    }
2289  }
2290  if( iPlan&SPELLFIX_IDXNUM_MATCH ){
2291    int idx = 2;
2292    pIdxInfo->idxNum = iPlan;
2293    if( pIdxInfo->nOrderBy==1
2294     && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE
2295     && pIdxInfo->aOrderBy[0].desc==0
2296    ){
2297      pIdxInfo->orderByConsumed = 1;  /* Default order by iScore */
2298    }
2299    if( iPlan&SPELLFIX_IDXNUM_LANGID ){
2300      pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++;
2301      pIdxInfo->aConstraintUsage[iLangTerm].omit = 1;
2302    }
2303    if( iPlan&SPELLFIX_IDXNUM_TOP ){
2304      pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++;
2305      pIdxInfo->aConstraintUsage[iTopTerm].omit = 1;
2306    }
2307    if( iPlan&SPELLFIX_IDXNUM_SCOPE ){
2308      pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++;
2309      pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1;
2310    }
2311    if( iPlan&SPELLFIX_IDXNUM_DIST ){
2312      pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++;
2313      pIdxInfo->aConstraintUsage[iDistTerm].omit = 1;
2314    }
2315    pIdxInfo->estimatedCost = 1e5;
2316  }else if( (iPlan & SPELLFIX_IDXNUM_ROWID) ){
2317    pIdxInfo->idxNum = SPELLFIX_IDXNUM_ROWID;
2318    pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1;
2319    pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1;
2320    pIdxInfo->estimatedCost = 5;
2321  }else{
2322    pIdxInfo->idxNum = 0;
2323    pIdxInfo->estimatedCost = 1e50;
2324  }
2325  return SQLITE_OK;
2326}
2327
2328/*
2329** Open a new fuzzy-search cursor.
2330*/
2331static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
2332  spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2333  spellfix1_cursor *pCur;
2334  pCur = sqlite3_malloc64( sizeof(*pCur) );
2335  if( pCur==0 ) return SQLITE_NOMEM;
2336  memset(pCur, 0, sizeof(*pCur));
2337  pCur->pVTab = p;
2338  *ppCursor = &pCur->base;
2339  return SQLITE_OK;
2340}
2341
2342/*
2343** Adjust a distance measurement by the words rank in order to show
2344** preference to common words.
2345*/
2346static int spellfix1Score(int iDistance, int iRank){
2347  int iLog2;
2348  for(iLog2=0; iRank>0; iLog2++, iRank>>=1){}
2349  return iDistance + 32 - iLog2;
2350}
2351
2352/*
2353** Compare two spellfix1_row objects for sorting purposes in qsort() such
2354** that they sort in order of increasing distance.
2355*/
2356static int SQLITE_CDECL spellfix1RowCompare(const void *A, const void *B){
2357  const struct spellfix1_row *a = (const struct spellfix1_row*)A;
2358  const struct spellfix1_row *b = (const struct spellfix1_row*)B;
2359  return a->iScore - b->iScore;
2360}
2361
2362/*
2363** A structure used to pass information from spellfix1FilterForMatch()
2364** into spellfix1RunQuery().
2365*/
2366typedef struct MatchQuery {
2367  spellfix1_cursor *pCur;          /* The cursor being queried */
2368  sqlite3_stmt *pStmt;             /* shadow table query statment */
2369  char zHash[SPELLFIX_MX_HASH];    /* The current phonehash for zPattern */
2370  const char *zPattern;            /* Transliterated input string */
2371  int nPattern;                    /* Length of zPattern */
2372  EditDist3FromString *pMatchStr3; /* Original unicode string */
2373  EditDist3Config *pConfig3;       /* Edit-distance cost coefficients */
2374  const EditDist3Lang *pLang;      /* The selected language coefficients */
2375  int iLang;                       /* The language id */
2376  int iScope;                      /* Default scope */
2377  int iMaxDist;                    /* Maximum allowed edit distance, or -1 */
2378  int rc;                          /* Error code */
2379  int nRun;                  /* Number of prior runs for the same zPattern */
2380  char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH];  /* Prior hashes */
2381} MatchQuery;
2382
2383/*
2384** Run a query looking for the best matches against zPattern using
2385** zHash as the character class seed hash.
2386*/
2387static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery){
2388  const char *zK1;
2389  const char *zWord;
2390  int iDist;
2391  int iRank;
2392  int iScore;
2393  int iWorst = 0;
2394  int idx;
2395  int idxWorst = -1;
2396  int i;
2397  int iScope = p->iScope;
2398  spellfix1_cursor *pCur = p->pCur;
2399  sqlite3_stmt *pStmt = p->pStmt;
2400  char zHash1[SPELLFIX_MX_HASH];
2401  char zHash2[SPELLFIX_MX_HASH];
2402  char *zClass;
2403  int nClass;
2404  int rc;
2405
2406  if( pCur->a==0 || p->rc ) return;   /* Prior memory allocation failure */
2407  zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery);
2408  if( zClass==0 ){
2409    p->rc = SQLITE_NOMEM;
2410    return;
2411  }
2412  nClass = (int)strlen(zClass);
2413  if( nClass>SPELLFIX_MX_HASH-2 ){
2414    nClass = SPELLFIX_MX_HASH-2;
2415    zClass[nClass] = 0;
2416  }
2417  if( nClass<=iScope ){
2418    if( nClass>2 ){
2419      iScope = nClass-1;
2420    }else{
2421      iScope = nClass;
2422    }
2423  }
2424  memcpy(zHash1, zClass, iScope);
2425  sqlite3_free(zClass);
2426  zHash1[iScope] = 0;
2427  memcpy(zHash2, zHash1, iScope);
2428  zHash2[iScope] = 'Z';
2429  zHash2[iScope+1] = 0;
2430#if SPELLFIX_MX_RUN>1
2431  for(i=0; i<p->nRun; i++){
2432    if( strcmp(p->azPrior[i], zHash1)==0 ) return;
2433  }
2434#endif
2435  assert( p->nRun<SPELLFIX_MX_RUN );
2436  memcpy(p->azPrior[p->nRun++], zHash1, iScope+1);
2437  if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM
2438   || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM
2439  ){
2440    p->rc = SQLITE_NOMEM;
2441    return;
2442  }
2443#if SPELLFIX_MX_RUN>1
2444  for(i=0; i<pCur->nRow; i++){
2445    if( pCur->a[i].iScore>iWorst ){
2446      iWorst = pCur->a[i].iScore;
2447      idxWorst = i;
2448    }
2449  }
2450#endif
2451  while( sqlite3_step(pStmt)==SQLITE_ROW ){
2452    int iMatchlen = -1;
2453    iRank = sqlite3_column_int(pStmt, 2);
2454    if( p->pMatchStr3 ){
2455      int nWord = sqlite3_column_bytes(pStmt, 1);
2456      zWord = (const char*)sqlite3_column_text(pStmt, 1);
2457      iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen);
2458    }else{
2459      zK1 = (const char*)sqlite3_column_text(pStmt, 3);
2460      if( zK1==0 ) continue;
2461      iDist = editdist1(p->zPattern, zK1, 0);
2462    }
2463    if( iDist<0 ){
2464      p->rc = SQLITE_NOMEM;
2465      break;
2466    }
2467    pCur->nSearch++;
2468    
2469    /* If there is a "distance < $dist" or "distance <= $dist" constraint,
2470    ** check if this row meets it. If not, jump back up to the top of the
2471    ** loop to process the next row. Otherwise, if the row does match the
2472    ** distance constraint, check if the pCur->a[] array is already full.
2473    ** If it is and no explicit "top = ?" constraint was present in the
2474    ** query, grow the array to ensure there is room for the new entry. */
2475    assert( (p->iMaxDist>=0)==((pCur->idxNum & SPELLFIX_IDXNUM_DIST) ? 1 : 0) );
2476    if( p->iMaxDist>=0 ){
2477      if( iDist>p->iMaxDist ) continue;
2478      if( pCur->nRow>=pCur->nAlloc && (pCur->idxNum & SPELLFIX_IDXNUM_TOP)==0 ){
2479        spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10);
2480        if( pCur->a==0 ) break;
2481      }
2482    }
2483
2484    iScore = spellfix1Score(iDist,iRank);
2485    if( pCur->nRow<pCur->nAlloc ){
2486      idx = pCur->nRow;
2487    }else if( iScore<iWorst ){
2488      idx = idxWorst;
2489      sqlite3_free(pCur->a[idx].zWord);
2490    }else{
2491      continue;
2492    }
2493
2494    pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1));
2495    if( pCur->a[idx].zWord==0 ){
2496      p->rc = SQLITE_NOMEM;
2497      break;
2498    }
2499    pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0);
2500    pCur->a[idx].iRank = iRank;
2501    pCur->a[idx].iDistance = iDist;
2502    pCur->a[idx].iScore = iScore;
2503    pCur->a[idx].iMatchlen = iMatchlen;
2504    memcpy(pCur->a[idx].zHash, zHash1, iScope+1);
2505    if( pCur->nRow<pCur->nAlloc ) pCur->nRow++;
2506    if( pCur->nRow==pCur->nAlloc ){
2507      iWorst = pCur->a[0].iScore;
2508      idxWorst = 0;
2509      for(i=1; i<pCur->nRow; i++){
2510        iScore = pCur->a[i].iScore;
2511        if( iWorst<iScore ){
2512          iWorst = iScore;
2513          idxWorst = i;
2514        }
2515      }
2516    }
2517  }
2518  rc = sqlite3_reset(pStmt);
2519  if( rc ) p->rc = rc;
2520}
2521
2522/*
2523** This version of the xFilter method work if the MATCH term is present
2524** and we are doing a scan.
2525*/
2526static int spellfix1FilterForMatch(
2527  spellfix1_cursor *pCur,
2528  int argc,
2529  sqlite3_value **argv
2530){
2531  int idxNum = pCur->idxNum;
2532  const unsigned char *zMatchThis;   /* RHS of the MATCH operator */
2533  EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */
2534  char *zPattern;                    /* Transliteration of zMatchThis */
2535  int nPattern;                      /* Length of zPattern */
2536  int iLimit = 20;                   /* Max number of rows of output */
2537  int iScope = 3;                    /* Use this many characters of zClass */
2538  int iLang = 0;                     /* Language code */
2539  char *zSql;                        /* SQL of shadow table query */
2540  sqlite3_stmt *pStmt = 0;           /* Shadow table query */
2541  int rc;                            /* Result code */
2542  int idx = 1;                       /* Next available filter parameter */
2543  spellfix1_vtab *p = pCur->pVTab;   /* The virtual table that owns pCur */
2544  MatchQuery x;                      /* For passing info to RunQuery() */
2545
2546  /* Load the cost table if we have not already done so */
2547  if( p->zCostTable!=0 && p->pConfig3==0 ){
2548    p->pConfig3 = sqlite3_malloc64( sizeof(p->pConfig3[0]) );
2549    if( p->pConfig3==0 ) return SQLITE_NOMEM;
2550    memset(p->pConfig3, 0, sizeof(p->pConfig3[0]));
2551    rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable);
2552    if( rc ) return rc;
2553  }
2554  memset(&x, 0, sizeof(x));
2555  x.iScope = 3;  /* Default scope if none specified by "WHERE scope=N" */
2556  x.iMaxDist = -1;   /* Maximum allowed edit distance */
2557
2558  if( idxNum&2 ){
2559    iLang = sqlite3_value_int(argv[idx++]);
2560  }
2561  if( idxNum&4 ){
2562    iLimit = sqlite3_value_int(argv[idx++]);
2563    if( iLimit<1 ) iLimit = 1;
2564  }
2565  if( idxNum&8 ){
2566    x.iScope = sqlite3_value_int(argv[idx++]);
2567    if( x.iScope<1 ) x.iScope = 1;
2568    if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2;
2569  }
2570  if( idxNum&(16|32) ){
2571    x.iMaxDist = sqlite3_value_int(argv[idx++]);
2572    if( idxNum&16 ) x.iMaxDist--;
2573    if( x.iMaxDist<0 ) x.iMaxDist = 0;
2574  }
2575  spellfix1ResetCursor(pCur);
2576  spellfix1ResizeCursor(pCur, iLimit);
2577  zMatchThis = sqlite3_value_text(argv[0]);
2578  if( zMatchThis==0 ) return SQLITE_OK;
2579  if( p->pConfig3 ){
2580    x.pLang = editDist3FindLang(p->pConfig3, iLang);
2581    pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1);
2582    if( pMatchStr3==0 ){
2583      x.rc = SQLITE_NOMEM;
2584      goto filter_exit;
2585    }
2586  }else{
2587    x.pLang = 0;
2588  }
2589  zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0]));
2590  sqlite3_free(pCur->zPattern);
2591  pCur->zPattern = zPattern;
2592  if( zPattern==0 ){
2593    x.rc = SQLITE_NOMEM;
2594    goto filter_exit;
2595  }
2596  nPattern = (int)strlen(zPattern);
2597  if( zPattern[nPattern-1]=='*' ) nPattern--;
2598  zSql = sqlite3_mprintf(
2599     "SELECT id, word, rank, coalesce(k1,word)"
2600     "  FROM \"%w\".\"%w_vocab\""
2601     " WHERE langid=%d AND k2>=?1 AND k2<?2",
2602     p->zDbName, p->zTableName, iLang
2603  );
2604  if( zSql==0 ){
2605    x.rc = SQLITE_NOMEM;
2606    pStmt = 0;
2607    goto filter_exit;
2608  }
2609  rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
2610  sqlite3_free(zSql);
2611  pCur->iLang = iLang;
2612  x.pCur = pCur;
2613  x.pStmt = pStmt;
2614  x.zPattern = zPattern;
2615  x.nPattern = nPattern;
2616  x.pMatchStr3 = pMatchStr3;
2617  x.iLang = iLang;
2618  x.rc = rc;
2619  x.pConfig3 = p->pConfig3;
2620  if( x.rc==SQLITE_OK ){
2621    spellfix1RunQuery(&x, zPattern, nPattern);
2622  }
2623
2624  if( pCur->a ){
2625    qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare);
2626    pCur->iTop = iLimit;
2627    pCur->iScope = iScope;
2628  }else{
2629    x.rc = SQLITE_NOMEM;
2630  }
2631
2632filter_exit:
2633  sqlite3_finalize(pStmt);
2634  editDist3FromStringDelete(pMatchStr3);
2635  return x.rc;
2636}
2637
2638/*
2639** This version of xFilter handles a full-table scan case
2640*/
2641static int spellfix1FilterForFullScan(
2642  spellfix1_cursor *pCur,
2643  int argc,
2644  sqlite3_value **argv
2645){
2646  int rc = SQLITE_OK;
2647  int idxNum = pCur->idxNum;
2648  char *zSql;
2649  spellfix1_vtab *pVTab = pCur->pVTab;
2650  spellfix1ResetCursor(pCur);
2651  assert( idxNum==0 || idxNum==64 );
2652  zSql = sqlite3_mprintf(
2653     "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"%s",
2654     pVTab->zDbName, pVTab->zTableName,
2655     ((idxNum & 64) ? " WHERE rowid=?" : "")
2656  );
2657  if( zSql==0 ) return SQLITE_NOMEM;
2658  rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0);
2659  sqlite3_free(zSql);
2660  if( rc==SQLITE_OK && (idxNum & 64) ){
2661    assert( argc==1 );
2662    rc = sqlite3_bind_value(pCur->pFullScan, 1, argv[0]);
2663  }
2664  pCur->nRow = pCur->iRow = 0;
2665  if( rc==SQLITE_OK ){
2666    rc = sqlite3_step(pCur->pFullScan);
2667    if( rc==SQLITE_ROW ){ pCur->iRow = -1; rc = SQLITE_OK; }
2668    if( rc==SQLITE_DONE ){ rc = SQLITE_OK; }
2669  }else{
2670    pCur->iRow = 0;
2671  }
2672  return rc;
2673}
2674
2675
2676/*
2677** Called to "rewind" a cursor back to the beginning so that
2678** it starts its output over again.  Always called at least once
2679** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call.
2680*/
2681static int spellfix1Filter(
2682  sqlite3_vtab_cursor *cur, 
2683  int idxNum, const char *idxStr,
2684  int argc, sqlite3_value **argv
2685){
2686  spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2687  int rc;
2688  pCur->idxNum = idxNum;
2689  if( idxNum & 1 ){
2690    rc = spellfix1FilterForMatch(pCur, argc, argv);
2691  }else{
2692    rc = spellfix1FilterForFullScan(pCur, argc, argv);
2693  }
2694  return rc;
2695}
2696
2697
2698/*
2699** Advance a cursor to its next row of output
2700*/
2701static int spellfix1Next(sqlite3_vtab_cursor *cur){
2702  spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2703  int rc = SQLITE_OK;
2704  if( pCur->iRow < pCur->nRow ){
2705    if( pCur->pFullScan ){
2706      rc = sqlite3_step(pCur->pFullScan);
2707      if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow;
2708      if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK;
2709    }else{
2710      pCur->iRow++;
2711    }
2712  }
2713  return rc;
2714}
2715
2716/*
2717** Return TRUE if we are at the end-of-file
2718*/
2719static int spellfix1Eof(sqlite3_vtab_cursor *cur){
2720  spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2721  return pCur->iRow>=pCur->nRow;
2722}
2723
2724/*
2725** Return columns from the current row.
2726*/
2727static int spellfix1Column(
2728  sqlite3_vtab_cursor *cur,
2729  sqlite3_context *ctx,
2730  int i
2731){
2732  spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2733  if( pCur->pFullScan ){
2734    if( i<=SPELLFIX_COL_LANGID ){
2735      sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i));
2736    }else{
2737      sqlite3_result_null(ctx);
2738    }
2739    return SQLITE_OK;
2740  }
2741  switch( i ){
2742    case SPELLFIX_COL_WORD: {
2743      sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC);
2744      break;
2745    }
2746    case SPELLFIX_COL_RANK: {
2747      sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank);
2748      break;
2749    }
2750    case SPELLFIX_COL_DISTANCE: {
2751      sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance);
2752      break;
2753    }
2754    case SPELLFIX_COL_LANGID: {
2755      sqlite3_result_int(ctx, pCur->iLang);
2756      break;
2757    }
2758    case SPELLFIX_COL_SCORE: {
2759      sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore);
2760      break;
2761    }
2762    case SPELLFIX_COL_MATCHLEN: {
2763      int iMatchlen = pCur->a[pCur->iRow].iMatchlen;
2764      if( iMatchlen<0 ){
2765        int nPattern = (int)strlen(pCur->zPattern);
2766        char *zWord = pCur->a[pCur->iRow].zWord;
2767        int nWord = (int)strlen(zWord);
2768
2769        if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){
2770          char *zTranslit;
2771          int res;
2772          zTranslit = (char *)transliterate((unsigned char *)zWord, nWord);
2773          if( !zTranslit ) return SQLITE_NOMEM;
2774          res = editdist1(pCur->zPattern, zTranslit, &iMatchlen);
2775          sqlite3_free(zTranslit);
2776          if( res<0 ) return SQLITE_NOMEM;
2777          iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen);
2778        }else{
2779          iMatchlen = utf8Charlen(zWord, nWord);
2780        }
2781      }
2782
2783      sqlite3_result_int(ctx, iMatchlen);
2784      break;
2785    }
2786    case SPELLFIX_COL_PHONEHASH: {
2787      sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC);
2788      break;
2789    }
2790    case SPELLFIX_COL_TOP: {
2791      sqlite3_result_int(ctx, pCur->iTop);
2792      break;
2793    }
2794    case SPELLFIX_COL_SCOPE: {
2795      sqlite3_result_int(ctx, pCur->iScope);
2796      break;
2797    }
2798    case SPELLFIX_COL_SRCHCNT: {
2799      sqlite3_result_int(ctx, pCur->nSearch);
2800      break;
2801    }
2802    default: {
2803      sqlite3_result_null(ctx);
2804      break;
2805    }
2806  }
2807  return SQLITE_OK;
2808}
2809
2810/*
2811** The rowid.
2812*/
2813static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
2814  spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2815  if( pCur->pFullScan ){
2816    *pRowid = sqlite3_column_int64(pCur->pFullScan, 4);
2817  }else{
2818    *pRowid = pCur->a[pCur->iRow].iRowid;
2819  }
2820  return SQLITE_OK;
2821}
2822
2823/*
2824** This function is called by the xUpdate() method. It returns a string
2825** containing the conflict mode that xUpdate() should use for the current
2826** operation. One of: "ROLLBACK", "IGNORE", "ABORT" or "REPLACE".
2827*/
2828static const char *spellfix1GetConflict(sqlite3 *db){
2829  static const char *azConflict[] = {
2830    /* Note: Instead of "FAIL" - "ABORT". */
2831    "ROLLBACK", "IGNORE", "ABORT", "ABORT", "REPLACE"
2832  };
2833  int eConflict = sqlite3_vtab_on_conflict(db);
2834
2835  assert( eConflict==SQLITE_ROLLBACK || eConflict==SQLITE_IGNORE
2836       || eConflict==SQLITE_FAIL || eConflict==SQLITE_ABORT
2837       || eConflict==SQLITE_REPLACE
2838  );
2839  assert( SQLITE_ROLLBACK==1 );
2840  assert( SQLITE_IGNORE==2 );
2841  assert( SQLITE_FAIL==3 );
2842  assert( SQLITE_ABORT==4 );
2843  assert( SQLITE_REPLACE==5 );
2844
2845  return azConflict[eConflict-1];
2846}
2847
2848/*
2849** The xUpdate() method.
2850*/
2851static int spellfix1Update(
2852  sqlite3_vtab *pVTab,
2853  int argc,
2854  sqlite3_value **argv,
2855  sqlite_int64 *pRowid
2856){
2857  int rc = SQLITE_OK;
2858  sqlite3_int64 rowid, newRowid;
2859  spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2860  sqlite3 *db = p->db;
2861
2862  if( argc==1 ){
2863    /* A delete operation on the rowid given by argv[0] */
2864    rowid = *pRowid = sqlite3_value_int64(argv[0]);
2865    spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" "
2866                           " WHERE id=%lld",
2867                  p->zDbName, p->zTableName, rowid);
2868  }else{
2869    const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]);
2870    int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]);
2871    int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]);
2872    int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]);
2873    const unsigned char *zSoundslike =
2874           sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2875    int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2876    char *zK1, *zK2;
2877    int i;
2878    char c;
2879    const char *zConflict = spellfix1GetConflict(db);
2880
2881    if( zWord==0 ){
2882      /* Inserts of the form:  INSERT INTO table(command) VALUES('xyzzy');
2883      ** cause zWord to be NULL, so we look at the "command" column to see
2884      ** what special actions to take */
2885      const char *zCmd = 
2886         (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]);
2887      if( zCmd==0 ){
2888        pVTab->zErrMsg = sqlite3_mprintf("NOT NULL constraint failed: %s.word",
2889                                         p->zTableName);
2890        return SQLITE_CONSTRAINT_NOTNULL;
2891      }
2892      if( strcmp(zCmd,"reset")==0 ){
2893        /* Reset the  edit cost table (if there is one). */
2894        editDist3ConfigDelete(p->pConfig3);
2895        p->pConfig3 = 0;
2896        return SQLITE_OK;
2897      }
2898      if( strncmp(zCmd,"edit_cost_table=",16)==0 ){
2899        editDist3ConfigDelete(p->pConfig3);
2900        p->pConfig3 = 0;
2901        sqlite3_free(p->zCostTable);
2902        p->zCostTable = spellfix1Dequote(zCmd+16);
2903        if( p->zCostTable==0 ) return SQLITE_NOMEM;
2904        if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ){
2905          sqlite3_free(p->zCostTable);
2906          p->zCostTable = 0;
2907        }
2908        return SQLITE_OK;
2909      }
2910      pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"",
2911                                       p->zTableName, zCmd);
2912      return SQLITE_ERROR;
2913    }
2914    if( iRank<1 ) iRank = 1;
2915    if( zSoundslike ){
2916      zK1 = (char*)transliterate(zSoundslike, nSoundslike);
2917    }else{
2918      zK1 = (char*)transliterate(zWord, nWord);
2919    }
2920    if( zK1==0 ) return SQLITE_NOMEM;
2921    for(i=0; (c = zK1[i])!=0; i++){
2922       if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A';
2923    }
2924    zK2 = (char*)phoneticHash((const unsigned char*)zK1, i);
2925    if( zK2==0 ){
2926      sqlite3_free(zK1);
2927      return SQLITE_NOMEM;
2928    }
2929    if( sqlite3_value_type(argv[0])==SQLITE_NULL ){
2930      if( sqlite3_value_type(argv[1])==SQLITE_NULL ){
2931        spellfix1DbExec(&rc, db,
2932               "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) "
2933               "VALUES(%d,%d,%Q,nullif(%Q,%Q),%Q)",
2934               p->zDbName, p->zTableName,
2935               iRank, iLang, zWord, zK1, zWord, zK2
2936        );
2937      }else{
2938        newRowid = sqlite3_value_int64(argv[1]);
2939        spellfix1DbExec(&rc, db,
2940            "INSERT OR %s INTO \"%w\".\"%w_vocab\"(id,rank,langid,word,k1,k2) "
2941            "VALUES(%lld,%d,%d,%Q,nullif(%Q,%Q),%Q)",
2942            zConflict, p->zDbName, p->zTableName,
2943            newRowid, iRank, iLang, zWord, zK1, zWord, zK2
2944        );
2945      }
2946      *pRowid = sqlite3_last_insert_rowid(db);
2947    }else{
2948      rowid = sqlite3_value_int64(argv[0]);
2949      newRowid = *pRowid = sqlite3_value_int64(argv[1]);
2950      spellfix1DbExec(&rc, db,
2951             "UPDATE OR %s \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d,"
2952             " word=%Q, k1=nullif(%Q,%Q), k2=%Q WHERE id=%lld",
2953             zConflict, p->zDbName, p->zTableName, newRowid, iRank, iLang,
2954             zWord, zK1, zWord, zK2, rowid
2955      );
2956    }
2957    sqlite3_free(zK1);
2958    sqlite3_free(zK2);
2959  }
2960  return rc;
2961}
2962
2963/*
2964** Rename the spellfix1 table.
2965*/
2966static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew){
2967  spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2968  sqlite3 *db = p->db;
2969  int rc = SQLITE_OK;
2970  char *zNewName = sqlite3_mprintf("%s", zNew);
2971  if( zNewName==0 ){
2972    return SQLITE_NOMEM;
2973  }
2974  spellfix1DbExec(&rc, db, 
2975     "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"",
2976     p->zDbName, p->zTableName, zNewName
2977  );
2978  if( rc==SQLITE_OK ){
2979    sqlite3_free(p->zTableName);
2980    p->zTableName = zNewName;
2981  }else{
2982    sqlite3_free(zNewName);
2983  }
2984  return rc;
2985}
2986
2987
2988/*
2989** A virtual table module that provides fuzzy search.
2990*/
2991static sqlite3_module spellfix1Module = {
2992  0,                       /* iVersion */
2993  spellfix1Create,         /* xCreate - handle CREATE VIRTUAL TABLE */
2994  spellfix1Connect,        /* xConnect - reconnected to an existing table */
2995  spellfix1BestIndex,      /* xBestIndex - figure out how to do a query */
2996  spellfix1Disconnect,     /* xDisconnect - close a connection */
2997  spellfix1Destroy,        /* xDestroy - handle DROP TABLE */
2998  spellfix1Open,           /* xOpen - open a cursor */
2999  spellfix1Close,          /* xClose - close a cursor */
3000  spellfix1Filter,         /* xFilter - configure scan constraints */
3001  spellfix1Next,           /* xNext - advance a cursor */
3002  spellfix1Eof,            /* xEof - check for end of scan */
3003  spellfix1Column,         /* xColumn - read data */
3004  spellfix1Rowid,          /* xRowid - read data */
3005  spellfix1Update,         /* xUpdate */
3006  0,                       /* xBegin */
3007  0,                       /* xSync */
3008  0,                       /* xCommit */
3009  0,                       /* xRollback */
3010  0,                       /* xFindMethod */
3011  spellfix1Rename,         /* xRename */
3012  0,                       /* xSavepoint */
3013  0,                       /* xRelease */
3014  0,                       /* xRollbackTo */
3015  0,                       /* xShadowName */
3016  0                        /* xIntegrity */
3017};
3018
3019/*
3020** Register the various functions and the virtual table.
3021*/
3022static int spellfix1Register(sqlite3 *db){
3023  int rc = SQLITE_OK;
3024  int i;
3025  rc = sqlite3_create_function(db, "spellfix1_translit", 1,
3026                               SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3027                                transliterateSqlFunc, 0, 0);
3028  if( rc==SQLITE_OK ){
3029    rc = sqlite3_create_function(db, "spellfix1_editdist", 2,
3030                                 SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3031                                  editdistSqlFunc, 0, 0);
3032  }
3033  if( rc==SQLITE_OK ){
3034    rc = sqlite3_create_function(db, "spellfix1_phonehash", 1,
3035                                 SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3036                                  phoneticHashSqlFunc, 0, 0);
3037  }
3038  if( rc==SQLITE_OK ){
3039    rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1,
3040                                  SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3041                                  scriptCodeSqlFunc, 0, 0);
3042  }
3043  if( rc==SQLITE_OK ){
3044    rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0);
3045  }
3046  if( rc==SQLITE_OK ){
3047    rc = editDist3Install(db);
3048  }
3049
3050  /* Verify sanity of the translit[] table */
3051  for(i=0; i<sizeof(translit)/sizeof(translit[0])-1; i++){
3052    assert( translit[i].cFrom<translit[i+1].cFrom );
3053  }
3054
3055  return rc;
3056}
3057
3058#endif /* SQLITE_OMIT_VIRTUALTABLE */
3059
3060/*
3061** Extension load function.
3062*/
3063#ifdef _WIN32
3064__declspec(dllexport)
3065#endif
3066int sqlite3_spellfix_init(
3067  sqlite3 *db, 
3068  char **pzErrMsg, 
3069  const sqlite3_api_routines *pApi
3070){
3071  SQLITE_EXTENSION_INIT2(pApi);
3072#ifndef SQLITE_OMIT_VIRTUALTABLE
3073  return spellfix1Register(db);
3074#endif
3075  return SQLITE_OK;
3076}