ํ์ฌ ์๋ผ์คํฑ์์น๋ฅผ ์ด์ฉํด ์์งํ ๋ฐ์ดํฐ(MBTI ํ์ ๋ณ ํ ์คํธ ๋ฐ์ดํฐ)๋ฅผ ์กฐํํ๋ ํ๋ก์ ํธ๋ฅผ ์งํํ๊ณ ์์ต๋๋ค. ์ฌ๊ธฐ์ MBTI ํ์ ๊ณผ ์ค๋งํธํฐ(์์ดํฐ ๋๋ ๊ฐค๋ญ์)์ ์๊ด์ฑ์ ๋ถ์ํ๊ธฐ ์ํด ES์ ์ฟผ๋ฆฌ๋ฅผ ํ๋ํ๊ณ ์์ต๋๋ค.
์ธ๋ฑ์ค ๊ตฌ์ฑ
์ฝํ ์ธ ๋ด๋ถ์์ ๋ช ์ฌ๋ง ์ถ์ถํ์ฌ ๋ถ์ํ๊ธฐ ์ํด nori_noun์ด๋ผ๋ ๋ถ์๊ธฐ๋ฅผ ๋ณ๋๋ก ์์ฑํ์ฌ ํ๋๋ก ์ค์ ํ์ต๋๋ค.
{
"mbti" : {
"aliases" : { },
"mappings" : {
"properties" : {
"comment_cnt" : {
"type" : "integer"
},
"contents" : {
"type" : "text",
"fields" : {
"full" : {
"type" : "keyword"
},
"nori_mixed" : {
"type" : "text",
"analyzer" : "nori_mixed",
"search_analyzer" : "standard"
},
"nori_noun" : {
"type" : "text",
"analyzer" : "nori_pos_noun",
"search_analyzer" : "standard"
}
}
},
"doc_url" : {
"type" : "text"
},
"keyword" : {
"type" : "keyword"
},
"like_cnt" : {
"type" : "integer"
},
"platform" : {
"type" : "keyword"
},
"published_at" : {
"type" : "date"
},
"title" : {
"type" : "text"
},
"writer" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"routing" : {
"allocation" : {
"include" : {
"_tier_preference" : "data_content"
}
}
},
"number_of_shards" : "1",
"provided_name" : "mbti",
"creation_date" : "1649761141991",
"analysis" : {
"filter" : {
"pos_filter" : {
"type" : "nori_part_of_speech",
"stoptags" : [
"VV",
"VA",
"VX",
"VCP",
"VCN",
"MM",
"MAG",
"MAJ",
"IC",
"J",
"E",
"XPN",
"XSA",
"XSN",
"XSV",
"SP",
"SSC",
"SSO",
"SC",
"SE",
"UNA"
]
}
},
"analyzer" : {
"nori_mixed" : {
"filter" : "shingle",
"tokenizer" : "nori_t_mixed"
},
"nori_pos_noun" : {
"filter" : "pos_filter",
"type" : "custom",
"tokenizer" : "nori_t_mixed"
}
},
"tokenizer" : {
"nori_t_mixed" : {
"type" : "nori_tokenizer",
"decompound_mode" : "mixed"
}
}
},
"number_of_replicas" : "1",
"uuid" : "e8w9oHlLSyqF5oDzviz0KA",
"version" : {
"created" : "7170299"
}
}
}
}
}
๊ฐ MBTI ํ์ ๋ณ ์ค๋งํธํฐ ์ ํธ๋
GET mbti/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"contents.nori_noun": {
"query": "ENFP"
}
}
},
{
"match_phrase": {
"contents": {
"query": "์์ดํฐ",
"boost": 2
}
}
}
]
}
}
}
Match ์ฟผ๋ฆฌ์ ์ฝํ ์ธ ๋ด๋ถ์ MBTI ํ์ ์ด ์๋์ง์ ๋ํด ์ฐ์ ์กฐํํ๋ค. ๊ทธ๋ฆฌ๊ณ match_phrase๋ก ์์ดํฐ์ boost๋ฅผ ํ์ฌ ์์ดํฐ์ด ํฌํจ๋ ์๋ฌธ์ด ์์ ๊ฒ์๋๋๋ก ์ฟผ๋ฆฌ๋ฅผ ๋ง๋ค์์ต๋๋ค.
์ฌ์ค match_phrase ๋ ๊ตฌ๋ฌธ, ์ฆ โ์์ดํฐ ๊ฟํโ์ฒ๋ผ ๊ตฌ๋ฌธ์ ๋งค์นญ ์ ๋๋ฅผ boost ํด์ฃผ๊ธฐ ๋๋ฌธ์ ์ ์ฌ๋ก์์๋ ์ ์ ํ ์ฌ์ฉ ์ฌ๋ก๋ ์๋๋๋ค. ์๋๋ โENFP ์์ดํฐโ์ผ๋ก ๊ฒ์ํ์ฌ ๊ฐ ๋จ์ด ๊ฐ์ Distance๋ฅผ ๊ฒ์ Score ์๊ณ ๋ฆฌ์ฆ์ผ๋ก ์ฌ์ฉํ๋ ค๊ณ ํ์ง๋ง ์คํจํ์ฌ ์ฐ์ match_phrase๋ฅผ ์ฌ์ฉํ์์ต๋๋ค.
ํด๊ฒฐ๋ฐฉ๋ฒ์? (์๋๋ถํฐ๋ ์ฝ์ง์ ์ญ์ฌ์ ๋๋ค...)
#1 Should ๋ฅผ ์ฌ์ฉํ ์กฐํ
Should์ ๊ฒฝ์ฐ ํํ ๋งํ๋ or ์กฐ๊ฑด์ผ๋ก ์กฐ๊ฑด์ ํฌํจ๋ ๋จ์ด๊ฐ ์์๋ก ์ฌ๋ผ์ค๋๋ก(์ฆ score ์ ์๊ฐ ๋ ๋์์ง ์ ์๋๋ก) ์ฟผ๋ฆฌ ๋ณ๊ฒฝ
GET mbti/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"contents.nori_noun": {
"query": "ENFP"
}
}
}
],
"should": [
{
"match": {
"contents": {
"query": "์์ดํฐ",
"boost": 2
}
}
},
{
"match": {
"contents": {
"query": "ENFP",
"boost": 1
}
}
}
]
}
}
}