1package importer
2
3import (
4 "bufio"
5 "bytes"
6 "context"
7 "fmt"
8 "io"
9 "math"
10 "os"
11 "strings"
12
13 "github.com/urfave/cli/v2"
14
15 "git.gabrielgio.me/dict/db"
16)
17
18var ImportCommand = &cli.Command{
19 Name: "import",
20 Usage: "convert dict.cc dictionary into a queryable sqlite format.",
21 Flags: []cli.Flag{
22 &cli.StringFlag{
23 Name: "output",
24 Value: "main.dict",
25 Usage: "Dictionary database location",
26 },
27 &cli.StringFlag{
28 Name: "input",
29 Value: "dict.txt",
30 Usage: "Dict.cc txt dictionary file",
31 },
32 },
33 Action: func(cCtx *cli.Context) error {
34 input := cCtx.String("input")
35 output := cCtx.String("output")
36 return Import(context.Background(), input, output)
37 },
38}
39
40func Import(ctx context.Context, txtInput, sqliteOutput string) error {
41 db, err := db.Open(":memory:")
42 if err != nil {
43 return err
44 }
45 err = db.Migrate(ctx)
46 if err != nil {
47 return err
48 }
49
50 file, err := os.Open(txtInput)
51 if err != nil {
52 return err
53 }
54 defer file.Close()
55
56 count := 0
57 total, err := lineCounter(file)
58 if err != nil {
59 return err
60 }
61
62 _, err = file.Seek(0, 0)
63 if err != nil {
64 return err
65 }
66
67 scanner := bufio.NewScanner(file)
68 for scanner.Scan() {
69 if strings.HasPrefix(scanner.Text(), "#") || scanner.Text() == "" {
70 continue
71 }
72
73 var (
74 p = strings.SplitN(scanner.Text(), "\t", 2)
75 word = p[0]
76 line = strings.ReplaceAll(p[1], "\t", " ")
77 )
78
79 if err := db.InsertLine(ctx, word, line); err != nil {
80 return err
81 }
82 count++
83
84 if (count % 1234) == 0 {
85 fmt.Print("\033[G\033[K") // move the cursor left and clear the line
86 per := math.Ceil((float64(count) / float64(total)) * 100.0)
87 fmt.Printf("%d/%d (%.0f%%)", count, total, per)
88 }
89 }
90
91 fmt.Printf("Consolidating")
92 err = db.Consolidade(ctx)
93 if err != nil {
94 return err
95 }
96
97 err = db.Backup(ctx, sqliteOutput)
98 if err != nil {
99 return err
100 }
101 return nil
102}
103
104func lineCounter(r io.Reader) (int, error) {
105 var count int
106 const lineBreak = '\n'
107
108 buf := make([]byte, bufio.MaxScanTokenSize)
109
110 for {
111 bufferSize, err := r.Read(buf)
112 if err != nil && err != io.EOF {
113 return 0, err
114 }
115
116 var buffPosition int
117 for {
118 i := bytes.IndexByte(buf[buffPosition:], lineBreak)
119 if i == -1 || bufferSize == buffPosition {
120 break
121 }
122 buffPosition += i + 1
123 count++
124 }
125 if err == io.EOF {
126 break
127 }
128 }
129
130 return count, nil
131}