dict @ master

  1package importer
  2
  3import (
  4	"bufio"
  5	"bytes"
  6	"context"
  7	"fmt"
  8	"io"
  9	"math"
 10	"os"
 11	"strings"
 12
 13	"github.com/urfave/cli/v2"
 14
 15	"git.gabrielgio.me/dict/db"
 16)
 17
 18var ImportCommand = &cli.Command{
 19	Name:  "import",
 20	Usage: "convert dict.cc dictionary into a queryable sqlite format.",
 21	Flags: []cli.Flag{
 22		&cli.StringFlag{
 23			Name:  "output",
 24			Value: "main.dict",
 25			Usage: "Dictionary database location",
 26		},
 27		&cli.StringFlag{
 28			Name:  "input",
 29			Value: "dict.txt",
 30			Usage: "Dict.cc txt dictionary file",
 31		},
 32	},
 33	Action: func(cCtx *cli.Context) error {
 34		input := cCtx.String("input")
 35		output := cCtx.String("output")
 36		return Import(context.Background(), input, output)
 37	},
 38}
 39
 40func Import(ctx context.Context, txtInput, sqliteOutput string) error {
 41	db, err := db.Open(":memory:")
 42	if err != nil {
 43		return err
 44	}
 45	err = db.Migrate(ctx)
 46	if err != nil {
 47		return err
 48	}
 49
 50	file, err := os.Open(txtInput)
 51	if err != nil {
 52		return err
 53	}
 54	defer file.Close()
 55
 56	count := 0
 57	total, err := lineCounter(file)
 58	if err != nil {
 59		return err
 60	}
 61
 62	_, err = file.Seek(0, 0)
 63	if err != nil {
 64		return err
 65	}
 66
 67	scanner := bufio.NewScanner(file)
 68	for scanner.Scan() {
 69		if strings.HasPrefix(scanner.Text(), "#") || scanner.Text() == "" {
 70			continue
 71		}
 72
 73		var (
 74			p    = strings.SplitN(scanner.Text(), "\t", 2)
 75			word = p[0]
 76			line = strings.ReplaceAll(p[1], "\t", " ")
 77		)
 78
 79		if err := db.InsertLine(ctx, word, line); err != nil {
 80			return err
 81		}
 82		count++
 83
 84		if (count % 1234) == 0 {
 85			fmt.Print("\033[G\033[K") // move the cursor left and clear the line
 86			per := math.Ceil((float64(count) / float64(total)) * 100.0)
 87			fmt.Printf("%d/%d (%.0f%%)", count, total, per)
 88		}
 89	}
 90
 91	fmt.Printf("\nConsolidating...")
 92	err = db.Consolidade(ctx)
 93	if err != nil {
 94		return err
 95	}
 96
 97	err = db.Backup(ctx, sqliteOutput)
 98	if err != nil {
 99		return err
100	}
101	return nil
102}
103
104func lineCounter(r io.Reader) (int, error) {
105	var count int
106	const lineBreak = '\n'
107
108	buf := make([]byte, bufio.MaxScanTokenSize)
109
110	for {
111		bufferSize, err := r.Read(buf)
112		if err != nil && err != io.EOF {
113			return 0, err
114		}
115
116		var buffPosition int
117		for {
118			i := bytes.IndexByte(buf[buffPosition:], lineBreak)
119			if i == -1 || bufferSize == buffPosition {
120				break
121			}
122			buffPosition += i + 1
123			count++
124		}
125		if err == io.EOF {
126			break
127		}
128	}
129
130	return count, nil
131}