:gijutsu_burogu:

隞餅柴潦詻扼喋圃lasticsearch刨udachi押啜扎喋雿踴

Elasticsearch具敶X蝝閫具便uromoji具臬乓俟udachi整 ∼臬押啜扎具行靘艾整 閰喋臭誑銝閬扼

qiita.com

隞乩芥詻余lasticsearch具娟udachi押啜扎蝞∠艾整

github.com

湔乓艾喋准潦扼恍撣艾押啜扎胯Elasticsearch 7.8 整扼整 柴Elasticsearch 7.13具娟udachi押啜扎敺扼整扼

閫瘙箇

README怒詻艾芸押啜扎怒敹閬整 ./gradlew -PelasticsearchVersion=7.13.0 build具具/build/distributions/押啜扎

 gradle -v

------------------------------------------------------------
Gradle 7.1
------------------------------------------------------------

Build time:   2021-06-14 14:47:26 UTC
Revision:     989ccc9952b140ee6ab88870e8a12f1b2998369e

Kotlin:       1.4.31
Groovy:       3.0.7
Ant:          Apache Ant(TM) version 1.10.9 compiled on September 27 2020
JVM:          11.0.3 (Amazon.com Inc. 11.0.3+7-LTS)
OS:           Mac OS X 10.15.6 x86_64

潦詻扼訪摰怒敺怒潦詻扼訝摰艾怒具2柴柴怒扼胯潦詻扼訪具娟udachi押啜扎艾整具具整 1柴柴怒柴准瑯乓畾艾具刻/build銝扎艾怒敹閬整

DynamoDB吮ap湔啜(AWS SDK for Go)

Users 潦

Id Weapons
0 {"sord": "Normal", "hammer" : "Failure"}
1 {"hammer":"Normal"}

隞乩柴芥潦怒Id:0柴艾潦嗚潦啜甇血灸ow敺游胯押柴怒胯具芥箄啗胯扼

銝閮柴急U俐eapons峻ap摮具艾游胯湔啜具扼整 艾潦嗚潦胯整雿甇血具艾芥嗆(Weapons疸ull桀游)敺整 桀游胯具押潦箇艾整整

閫瘙箇

DB1芥扼胯桀憿嫘航圾瘙箝具扼芥柴3芥怒西圾瘙箝整

func main() {
    ctx := context.Background()
    config := aws.NewConfig().
        WithRegion("ap-northeast-1").
        WithEndpoint("http://127.0.0.1:8000").
        WithCredentials(credentials.NewStaticCredentials("dummy", "dummy", "dummy"))

    client := client{
        dynamodb.New(session.Must(session.NewSession(config))),
    }
    if err := client.update(ctx); err != nil {
        log.Fatalln(err)
    }
}

type client struct {
    dynamodb *dynamodb.DynamoDB
}

func (c *client) update(ctx context.Context) error {
    err := c.updateWithWeapons(ctx) // 1.Weapons摮具芥啜ErrCodeConditionalCheckFailedException餈
    if err != nil {
        if aerr, ok := err.(awserr.Error); ok {
            if aerr.Code() == dynamodb.ErrCodeConditionalCheckFailedException {
                err = c.updateNoWeapon(ctx) // 2.Weapons摮具,ErrCodeConditionalCheckFailedException餈
                if err != nil {
                    if aerr, ok := err.(awserr.Error); ok {
                        if aerr.Code() == dynamodb.ErrCodeConditionalCheckFailedException { // 12桅批乓准颯嫘劬eapons餈賢艾游
                            err = c.updateWithWeapons(ctx) // 逝pdate
                        }
                    }
                }
            }
        }
    }
    return err
}

func (c *client) updateWithWeapons(ctx context.Context) error {
    updateItemInput := &dynamodb.UpdateItemInput{
        TableName: aws.String("Users"),
        Key: map[string]*dynamodb.AttributeValue{
            "Id": {N: aws.String("0")},
        },
        ExpressionAttributeNames: map[string]*string{
            "#WEAPONS": aws.String("Weapons"),
            "#WEAPON":  aws.String("bow"),
        },
        ExpressionAttributeValues: map[string]*dynamodb.AttributeValue{
            ":st": {S: aws.String("normal")},
        },
        UpdateExpression:    aws.String("set #WEAPONS.#WEAPON = :st"),
        ConditionExpression: aws.String("attribute_exists(Id) and attribute_exists(Weapons)"), // Weapons芥蚓onditionalCheckFailedException箇
    }
    _, err := c.dynamodb.UpdateItemWithContext(ctx, updateItemInput)
    return err
}

func (c *client) updateNoWeapon(ctx context.Context) error {
    updateItemInput := &dynamodb.UpdateItemInput{
        TableName: aws.String("Users"),
        Key: map[string]*dynamodb.AttributeValue{
            "Id": {N: aws.String("0")},
        },
        ExpressionAttributeNames: map[string]*string{
            "#WEAPONS": aws.String("Weapons"),
        },
        ExpressionAttributeValues: map[string]*dynamodb.AttributeValue{
            ":map": {M: map[string]*dynamodb.AttributeValue{
                "bow": {S: aws.String("normal")},
            }},
        },
        UpdateExpression:    aws.String("set #WEAPONS = :map"),
        ConditionExpression: aws.String("attribute_exists(Id) and attribute_not_exists(Weapons)"), // Weapons蚓onditionalCheckFailedException箇
    }
    _, err := c.dynamodb.UpdateItemWithContext(ctx, updateItemInput)
    return err
}

賬潦嫘喋潦: https://github.com/kotaroooo0/for_output/blob/master/dynamodb/main.go

Go Conference 2021 Spring怎餃∼

4500PV, 270胯扯胯敹具詻急艾整

鞈雿

∼格貊怒整

格研扼胯鈭箝銝鈭箸酋鞈押雿具閬刻膩嫘艾整 隞舀獢鞈扼胯芥柴扼鈭箝桅典舀釣銝鈭箸酋鞈桐急釣整

望蝝U怨喋瘝詻柴佈o扳蝝U具喋詻喋芯艾

株鈭Recruit Engineers Advent Calendar 202011亦柴株鈭扼

TL;DR

餈隞乩株有嫘望蝝U詻株喋撘瑯扼

  • 銵: 芥潦胯潦璊蝝X賬摰鋆亟lasticsearch桐蝙典璈賬怠批具

  • 准胯: 璊蝝U准胯思舀芣賬扎 具喋詻U怒具璊蝝U具喋詻舀芰乓扼亥桐U憭扼具喋詻U桐鴃扎靘具

璊蝝U具喋詻桐蝯踴乓望蝝W怨底芥怨芯憪整 准啜押喋啗隤Go隤准批郎扼柴扼蝺渡具刖o批鋆整 整璊蝝W敹怒扼芰陛扳脩璊蝝U具喋詻桀鋆閬扎芥柴扼璊蝝W敹桀怒芥典扼

曉券脰敶U批鋆銝准斗蝝W敹芥UIssuePullRequest芥拚典扼 賬潦嫘喋潦臭誑銝抒恣艾整

github.com

格研怒艾典憭扼

摰鋆柴格閬

憭扼隞乩格賬摰鋆整

其格臭誑銝柴怒芥艾整 扎喋胯嫣典冽蝝a典怠艾整

f:id:kotaroooo0:20201210214726p:plain

雿輻其臭誑銝扼 甇研潦箸蝝U"Amazon Prime"券蝬批箝艾1,3芰柴柴踴"Amazon""Prime"U艾2芰柴胯艾芥具整 整"amAzon PRime"柴芾”閮箝詨艾具整

func main() {
    // DB冽亦
    config := stalefish.NewDBConfig("root", "password", "127.0.0.1", "3306", "stalefish")
    db, _ := stalefish.NewDBClient(config) // 具押潦喋芥喋啜舐

    storage := stalefish.NewStorageRdbImpl(db) // 瘞貊鞎砍怒嫘研潦
    analyzer := stalefish.NewAnalyzer([]stalefish.CharFilter{}, stalefish.StandardTokenizer{}, []stalefish.TokenFilter{stalefish.StemmerFilter{}, stalefish.LowercaseFilter{}, stalefish.StopWordFilter{}}) // 詻隤怠脯U押扎
    indexer := stalefish.NewIndexer(storage, analyzer, make(stalefish.InvertedIndexMap)) // 頠Y蔭扎喋胯嫘雿扎喋胯(U押扎嗚潦具嫘研潦詻瘜典乓)

    // 准乓∼喋餈賢
    indexer.AddDocument(stalefish.NewDocument("You can watch lots of interesting dramas on Amazon Prime."))
    indexer.AddDocument(stalefish.NewDocument("Forest phenomena in the Amazon are a prime concern."))
    indexer.AddDocument(stalefish.NewDocument("I watched amazon prime until late at night yesterday."))
    indexer.AddDocument(stalefish.NewDocument("Breaking Bad is a very jarring drama."))

    // 研潦箸蝝U銵
    q := stalefish.NewPhraseQuery("amAzon PRime", analyzer)
    seacher := q.Searcher(storage)
    result, _ := seacher.Search() // 具押潦喋芥喋啜舐
    fmt.Println(result)
    // result: [{1 You can watch lots of interesting dramas on Amazon Prime.} {3 I watched amazon prime until late at night yesterday.}]
}

閰喟敦

Analyzer, Indexer, Searcher, Storage撠銝整

Analyzer

Analyzer胯0隞乩哽har Filter1娛okenizer0隞乩娛oken Filter瑽整 Analyzer怒艾喳脯艾胯具芥株”閮箝詨艾⊿喋柴怒踴芥喋啜艾整

   analyzer := stalefish.Analyzer{
        []stalefish.CharFilter{stalefish.MappingCharFilter{map[string]string{":)": "_happy_", ":(": "_sad_"}}}, 
        stalefish.StandardTokenizer{},
        []stalefish.TokenFilter{stalefish.LowercaseFilter{}, stalefish.StopWordFilter{}, stalefish.StemmerFilter{}},
    }
    fmt.Println(analyzer.Analyze("I have a lot of TASKs. I am very sad :("))
    // output: ["lot", "task", "am", "very", "sad", "sad"]

隞乩株”柴怠椰喋詨衣脯扼整

Analyze MappingCharFilter敺 StandardTokenizer敺 LowercaseFilter敺 StopWordFilter敺 StemmerFilter敺
I have a lot of TASKs. I am very sad :( I have a lot of TASKs. I am very sad sad I, have, a, lot, of, TASKs, I, am, very, sad, sad I, have, a, lot, of, tasks,I, am, very, sad, sad lot, tasks, am, very, sad, sad lot, task, am, very, sad, sad

IndexerSearcher詻胯具芥Analyzer恍艾頠Y蔭扎喋胯衣艾整

Indexer

頠Y蔭扎喋胯怒扎

璊蝝U具喋詻扼舫芣蝝U摰整急研桃格活柴頠Y蔭扎喋胯鈭思整 Indexer胯整∼U芯頠Y蔭扎喋胯靽∼U芯頠Y蔭扎喋胯柴萸扎箝憭扼芥艾嫘研潦詻頠Y蔭扎喋胯具潦詻艾嫘研潦詻思摮整

頠Y蔭扎喋胯怒胯押桀隤怠紋艾押柴准乓∼喋蝝乓艾具晞扼芥押柴准乓∼喋柴押怒桀隤箇整艾桐蝵格晞靽摮艾整 隤桐蝵格晞胯"Amazon Prime"芥拚摨憭批芸游怒研潦箝扳蝝U游怠蝴蝡∼整

隞乩頠Y蔭扎喋胯柴潦踵扼頠Y蔭芥嫘扼舀碰D典箇曆蝵柴扼胯芥艾嫘喋Z蝞柴怠隤箇暹啜靽艾整

// 頠Y蔭扎喋胯
// TokenID>頠Y蔭芥嫘柴
type InvertedIndexMap map[TokenID]InvertedIndexValue

// 頠Y蔭芥嫘
type InvertedIndexValue struct {
    Token          Token       `db:"token"`
    PostingList    PostingList `db:"posting_list"`    // 潦胯喋怒嫘喋啜嫘芥嫘
    DocsCount      int         `db:"docs_count"`      // 潦胯喋怒豢
    PositionsCount int         `db:"positions_count"` // 冽詨扼柴潦胯喋桀箇暹
}

// 嫘喋啜芥嫘碰D柴芥喋胯芥嫘
type PostingList []Posting

type Posting struct {
    DocumentID     DocumentID // 詻埋D
    Positions      []int      // 訾葉桐蝵格
    PositionsCount int        // 訾葉桐蝵格晞格
}

頠Y蔭扎喋胯雿怒扎

隞雿Indexer怒頠Y蔭扎喋胯雿衣格臭誑銝扼

  1. Analyzer扳蝡脯喋箝
  2. 喋具怒嫘喋啜芥嫘雿艾∼U芯頠Y蔭扎喋胯怨蕭
  3. 颯怒准乓∼喋ID拍具柴扯誥蝵柴芥嫘桀具艾柴嫘喋啜芥嫘准乓∼喋ID格扼賬潦
  4. ∼U芯頠Y蔭扎喋胯柴萸扎箝頞嫘研潦訾頠Y蔭扎喋胯怒潦詻

折閬芥柴胯∼U芯頠Y蔭扎喋胯頞芥啜嫘研潦訾急偶蝬芥具具扼 憭扼具嫘研潦詻U胯颯孵啜憓整∼U芯蝙券舀撠研潦芥怒芥整 Elasticsearch扼准乓∼喋餈賢艾急偶蝬扼胯芥餈賢艾啜芥冽蝝W紋鞊∼怠怒整芥押啜整

Searcher

Seacher舀蝝U胯具芥具瘞貊頠Y蔭扎喋胯准乓∼喋璊蝝U整 隞喋桀箇暸摨柴研潦箸蝝U摰鋆整 潦批隞颯芸鋆怒芥艾扎寡胯扼

研潦箸蝝U桀其格臭誑銝扼

  1. 璊蝝U胯具芥喳脯
  2. 喋柴嫘喋啜芥嫘嫘研潦詻敺碰D具桀箇曆蝵柴柴芥嫘箝
  3. 具艾喋批銝格碰D怒整扎喋桀箇曆蝵柴乓艾唳蝝Y怨蕭

乓艾押胯喋桃詨紋箇曆蝵柴閮蝞詨紋箇曆蝵柴怒芥芥研潦箝怒芥艾典文整 Amazon Prime"具研潦箝靘怨狀整 "Amazon"舀碘桐蝵[3,7]怠箇整"Prime"舀碘桐蝵[8,18,32]怠箇整頠Y蔭扎喋胯韏唳颯血具整(7芰柴8芰柴怠箇整艾柴折) "Amazon"臬0芰柴喋扼"Prime"1芰柴喋扼柴扼N芰柴扼具蝔桐蝵格晞撘衣詨紋雿蝵柴閮蝞整 "Amazon"胯柴整整[3,7]具芥"Prime"1撘[7,17,31]怒芥整 "Amazon""Prime"詨紋雿蝵7怠箇整艾柴扼研潦箝怒芥艾典文整

Storage

MySQL嫘研潦詻具血拍具頠Y蔭扎喋胯准乓∼喋喋瘞貊艾整 MySQL舀押芷豢U胯芥冽艾柴扼具摰鋆踴俟torage扎喋踴潦扼潦嫘摰蝢押摰鋆U艾整

准乓∼喋喋埋D胯MySQL思遙艾頠Y蔭扎喋胯JSONMySQL思摮艾整

U喋怒研喋1211乓怨鈭蝔踴具芥格乓整扼怒扼具閮鈭怒刻艾整准乓∼喋扎喋准瑯喋啜艾研潦箸蝝U衣餈具准芥胯具整批鋆扼西胯扼 Done is better than perfect桃移蟡批鋆艾整∼扼啜畾敹萸芸鋆怒芥艾典憭扼隤啜桀蝴怎艾啣扼

臭誑銝格賬摰鋆艾整

  • 嫘喋啜芥嫘柴芥喋胯芥嫘摰鋆
  • 頠Y蔭扎喋胯桀抒葬
  • MatchQuery桀鋆
  • TF/IDF扼柴嫘喋U芥喋
  • 璊蝝Y柴賬潦
  • 隞餅柴准乓∼喋潦怒閮剖
  • MySQL隞怒芥研扎
  • 銝西衣芥押押潦喋嫘乓潦喋
  • 敶X蝝閫Ngram芥固okenizer格∪撐

logmi.jp

artem.krylysov.com

github.com

Go扼胯怒拍具吁et摰鋆

准啜押喋啗隤Go隤准扼艾胯怒拍具吁et摰鋆具整 瑯喋怒斗扎芥寞柴抒晶隞整

颯思誑銝株鈭佈o扼娟et桀鋆怒扎西圻整胯怒晞寞怒航圻艾整扼

kotaroooo0-dev.hatenablog.com

准啜押喋啗隤Go扼真int娟et摰鋆艾整 瑽雿具艾胯[]uint64Set銵具整 index0桀游0~63Set怠怒整艾index1桀游64~127Set怠怒整艾銵具整 1扼啜格啣扎特et怠怒整艾具銵具整 隞乩株”胯{0,1,2,3,64,65,68,69}銵具整(銵其葉殷膝嚚伐膝具0桀游)

index uint64(銵刻)
0 00000嚚伐膝嚚001111
1 00000嚚伐膝嚚110011

颯株蝝鞎扼芥撠芣湔啜扼颯憭啜株蝝∼梢芥拚雿銵游扼胯胯怒怒Set臬寞扼

摰鋆

type IntSet struct {
    words []uint64
}

func (s *IntSet) Has(x int) bool {
    word, bit := x/64, uint(x%64)
    return word < len(s.words) && s.words[word]&(1<<bit) != 0
}

func (s *IntSet) Add(x int) {
    word, bit := x/64, uint(x%64)
    for word >= len(s.words) {
        s.words = append(s.words, 0)
    }
    s.words[word] |= 1 << bit
}

func (s *IntSet) Remove(x int) {
    word, bit := x/64, uint(x%64)
    if word > len(s.words) {
        return
    }
    s.words[word] &= ^(1 << bit)
}

func (s *IntSet) UnionWith(t *IntSet) {
    for i, tword := range t.words {
        if i < len(s.words) {
            s.words[i] |= tword
        } else {
            s.words = append(s.words, tword)
        }
    }
}

func (s *IntSet) IntersectWith(t *IntSet) {
    for i, sword := range s.words {
        sword &= t.words[i]
    }
    s.words = s.words[:len(t.words)]
}

甇格湔唬誑憭桀游

鞎格啜怒游string桀游扼甇格湔啜具扼胯怠耦撘娟et銵函整扼柴扼胯芥冽整 鞎格啜桀游胯鞎格啜怒芥芥怨雲啗圾瘙箝扼扼 string桀游胯雿桅X啜扳啣扎怠行迤格湔啜具航賬扼 uint64批具艾格摮銵函整具銝航賬芥柴航芣扼 柴怠怒整艾芥怒整艾典斗准艾整賡賣箇整 2撟游怨鈭怒蝣箇潦踵扼怒潦怒踴其撮艾芥冽整

kotaroooo0-dev.hatenablog.com

Go扼押斗具摰鋆血梢仿剛航

押斗具Graphviz批航柴雿整

github.com

$ brew install graphviz
$ go get github.com/kotaroooo0/mute

$ vi data.txt
$ mute -s data.txt | dot -T png -o sample.png

# data.txt
keynote
keycase
king
kingdom
macbook

keynotekeycasekey整找氬艾柴整

Trie具航銵鈭扼Slack桀梢仿剛憭找芥押艾踴艾 扎具扎展d扎怒芥芥典隤霅扼整 Slack柴喋怒/who典亙具∼喋潦敺扼data.txt怒喋潦艾 mute -s data.txt | dot -T png -o sample.png具啜餃箏整

押斗具具

押斗具舫摨隞具桐蝔柴扼 trie桀蝘啜"retrieval"隤皞扼 敶X蝝閫思蝙颲詻芥押怎具艾整

梢仿剛璊蝝U恍押潦踵扼閮蝞蝛粹閮蝞晞怠芥艾整 蝛粹閮蝞恍U艾舀芥潦踵獢艾整

摰鋆

github.com

銝颯U怒氬芥箝株蝝柴臭誑銝2嫘扼

押斗具雿典

踹乓隤瑕柴怒潦踹乓銵整 扼急仿剛銝氬艾游胯思瘛晞潦賊脯扼扼銝氬芥芥銝瘛晞畾菟急啜潦雿整

func (n *Node) insert(w string) error {
    runes := []rune(w)
    currentNode := n
    for i, r := range runes {
        if nextNode, ok := currentNode.Children[r]; ok {
            currentNode = nextNode
        } else {
            currentNode.Children[r] = newNode(string(r), make(map[rune]*Node), false)
            currentNode = currentNode.Children[r]
        }

        // 蝯蝡胯怒扼
        if i == len(runes)-1 {
            currentNode.End = true
        }
    }
    return nil
}

押斗具Y揣dotfile典

DFS扳Y揣銵芥dotfile艾整 visitAll怠衣鈭伶FS摰整艾整

∪X啜批鋆具怒generateDotfileX啜批具血蝯具扼整 ∪X啜銵怒胯var visitAll func(n *Node)柴怠恐閮具敹扼芥visitAll芸蝢押喋喋扎具押潦怒芥整

靘啜怒∪X啜批鋆怒啜准潦怒visitAllX啜摰閮具具g := gographviz.NewGraph()啜准潦怠恐閮敹閬扼艾整

func generateDotfile(trie *Node, output string) (string, error) {
    g := gographviz.NewGraph()
    g.SetName("G")
    g.SetDir(true)

    var fontSize = "35"
    var visitAll func(n *Node)
    visitAll = func(n *Node) {
        for _, v := range n.Children {
            g.AddNode("G", strconv.Itoa(n.ID), map[string]string{"label": n.getLabel(), "shape": n.getShape(), "fontsize": fontSize})
            g.AddNode("G", strconv.Itoa(v.ID), map[string]string{"label": v.getLabel(), "shape": v.getShape(), "fontsize": fontSize})
            g.AddEdge(strconv.Itoa(n.ID), strconv.Itoa(v.ID), true, nil)
            visitAll(v)
        }
    }
    visitAll(trie)

        //     

    return g.String(), nil
}

隞胯瑯喋怒芣具柴潦踵具艾押斗具摰鋆整 押斗具摰鋆柴潦踵U怒氬芥箝怒臬桃車憿整 恍LOUDS押柴扼

takeda25.hatenablog.jp