Skip to contents

Tokenizer operations

Usage

tokenizer_set(conn, index, body, ...)

Arguments

conn

an Elasticsearch connection object, see connect()

index

(character) A character vector of index names

body

Query, either a list or json.

...

Curl options passed on to crul::HttpClient

References

https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-tokenizers.html

Author

Scott Chamberlain myrmecocystus@gmail.com

Examples

if (FALSE) { # \dontrun{
# connection setup
(x <- connect())

# set tokenizer

## NGram tokenizer
body <- '{
        "settings" : {
             "analysis" : {
                 "analyzer" : {
                     "my_ngram_analyzer" : {
                         "tokenizer" : "my_ngram_tokenizer"
                     }
                 },
                 "tokenizer" : {
                     "my_ngram_tokenizer" : {
                         "type" : "nGram",
                         "min_gram" : "2",
                         "max_gram" : "3",
                         "token_chars": [ "letter", "digit" ]
                     }
                 }
             }
      }
}'
if (index_exists('test1')) index_delete('test1')
tokenizer_set(index = "test1", body=body)
index_analyze(text = "hello world", index = "test1", 
  analyzer='my_ngram_analyzer')
} # }