Elasticsearch 学习笔记

Elasticsearch 可以用于快速地储存、搜索和分析海量数据。这里会将其简称为 ES。大部分DSL在 Kibana Dev Tools 中执行。

👉文章列表
Elasticsearch 7 学习笔记 ⏬ 快速上手 Index、Type、Document 安装与启动 Kibana 的使用 下载历史版本 文档唯一性 默认端口与端口设置 创建和删除索引 自定义 mapping 和 settings 设置索引副本数量和分片数量 查看所有索引 数据类型 字符串类型 keyword 、text 数组 添加和更新文档 通过 _bulk 批量添加文档 使用 from 、size 进行分页查询 查询中使用 sort 进行排序 查询结果只展示部分字段 查询结果中展示 _version 字段 使用 ignore_above 限制字符串长度 动态映射 精确搜索/全文搜索示例
客户端 ⏬ Python 客户端 Java TransportClient API 客户端 Java REST Client API 客户端
其他 ⏬ ES 6 快速上手 ES 5 快速上手

Elasticsearch 7 : 精确搜索/全文搜索示例


目录:


实战1

创建索引:

PUT student
{
  "mappings" : {
    "properties" : {
      "name" : {
        "type" : "keyword"
      },
      "age" : {
        "type" : "integer"
      },
      "height": {
        "type": "integer"
      }
    }
  }
}

使用 _bulk 创建多个文档:

POST _bulk
{ "index" : { "_index" : "student", "_id" : "1" } }
{ "name" : "张三", "age": 12 }
{ "index" : { "_index" : "student", "_id" : "2" } }
{ "name" : "李四", "age": 10,  "height": 112 }
{ "index" : { "_index" : "student", "_id" : "3" } }
{ "name" : "王五", "age": 11, "height": 108 }
{ "index" : { "_index" : "student", "_id" : "4" } }
{ "name" : "陈六", "age": 11, "height": 111 }

查询所有数据

GET student/_search

或者:

GET student/_search
{
  "query": {
    "match_all": {}
  }
}

或者:

POST student/_search
{
  "query": {
    "match_all": {}
  }
}

查询张三的信息:

POST student/_search
{
  "query": {
    "match": {
      "name": "张三"
    }
  }
}

或者:

POST student/_search
{
  "query": {
    "term": {
      "name": "张三"
    }
  }
}

或者:

POST student/_search
{
  "query": {
    "bool": {
      "must": [
        { "term" : { "name": "张三" } }
      ]
    }
  }
}

查询 11 岁的张三的信息:

因为没有数据,所以下面的查询结果为空:

POST student/_search
{
  "query": {
    "bool": {
      "must": [
        { "term" : { "name": "张三" } },
        { "term" : { "age": 11 } }
      ]
    }
  }
}

查询 11岁、12岁的所有学生信息

POST student/_search
{
  "query": {
    "bool": {
      "must": [
        { "terms" : { "age": [11, 12] } }
      ]
    }
  }
}

注意,是terms,不是 term

查询小于11岁的学生信息

POST student/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "range": {
            "age": {
              "lt": "11"
            }
          }
        }
      ]
    }
  }
}

查询11岁到13岁的学生信息

POST student/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "range": {
            "age": {
              "gte": "11",
              "lte": "13"
            }
          }
        }
      ]
    }
  }
}

查询有身高 height 记录的学生

POST student/_search
{
    "query": {
        "exists": {
            "field": "height"
        }
    }
}

查询没有身高 height 记录的学生

POST student/_search
{
  "query": {
    "bool": {
      "must_not": [
        {
          "exists": { "field": "height"}
        }
      ]
    }
  }
}

在 ES 2.2.0 之前有一个 missing 指令,效果相同。不过已经被废弃。

是否存在11岁的学生

POST student/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "term": {"age": 11}
        }
      ]
    }
  },
  "from": 0,
  "size": 1
}

若结果中有记录,则认为存在。

11 岁的学生总人数

方法1:

# 请求
POST student/_count
{
  "query": {
    "bool": {
      "must": [
        {
          "term": {"age": 11}
        }
      ]
    }
  }
}

# 响应
{
  "count" : 2,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  }
}

方法2:使用聚合查询

# 请求
POST student/_search
{
  "aggs":{
    "age_count": {
      "terms": {"field": "age"}
    }
  },
  "size": 0
}

# 响应
{
  "took" : 71,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 4,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "age_count" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : 11,
          "doc_count" : 2
        },
        {
          "key" : 10,
          "doc_count" : 1
        },
        {
          "key" : 12,
          "doc_count" : 1
        }
      ]
    }
  }
}

可以看到,11 岁的有2个,10岁的1个,12岁的1个。

方法3:查询后进行聚合:

# 请求
POST student/_search
{
  "query": {
    "bool": {
      "must": [
        {"term": {"age": 11} }
      ]
    }
  },
  "aggs":{
    "age_count": {
      "terms": {"field": "age"}
    }
  },
  "size": 0
}

# 响应
{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "age_count" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : 11,
          "doc_count" : 2
        }
      ]
    }
  }
}

方法4:

# 请求
POST student/_search
{
  "size": 0,
  "aggregations": {
    "group_by_age": {
      "aggregations": {
        "count_age": {
          "value_count": {
            "field": "_index"
          }
        }
      },
      "terms": {
        "field": "age"
      }
    }
  }
}

# 响应
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 4,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_by_age" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : 11,
          "doc_count" : 2,
          "count_age" : {
            "value" : 2
          }
        },
        {
          "key" : 10,
          "doc_count" : 1,
          "count_age" : {
            "value" : 1
          }
        },
        {
          "key" : 12,
          "doc_count" : 1,
          "count_age" : {
            "value" : 1
          }
        }
      ]
    }
  }
}

学生的平均岁数

方式1:

# 请求
POST student/_search
{
  "aggs":{
    "age_stat": {
      "stats": {"field": "age"}
    }
  },
  "size": 0
}

# 响应
{
  "took" : 45,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 4,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "age_stat" : {
      "count" : 4,
      "min" : 10.0,
      "max" : 12.0,
      "avg" : 11.0,
      "sum" : 44.0
    }
  }
}

stats 指令,会计算出指定字段的 count、min、max、avg、sum。

方式2:

# 请求
POST student/_search
{
  "aggs":{
    "age_stat": {
      "avg": {"field": "age"}
    }
  },
  "size": 0
}

# 响应
{
  "took" : 5,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 4,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "age_stat" : {
      "value" : 11.0
    }
  }
}

最高的学生是谁?

最高的学生可能有多个。

方式1:找到最高的身高值,然后根据身高搜索学生信息。

方式2:按照身高排序。找到第1个的身高,然后根据身高搜索所有学生信息:

POST student/_search
{
  "query": {
    "match_all": {}
  },
  "sort" : [
    {"height": {"order": "desc"}}
  ],
  "from": 0,
  "size": 1
}

每个年龄的平均身高是多少?

# 请求
POST student/_search
{
  "size": 0,
  "aggregations": {
    "group_by_age": {
      "aggregations": {
        "avg_height": {
          "avg": {
            "field": "height"
          }
        }
      },
      "terms": {
        "field": "age"
      }
    }
  }
}

# 响应
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 4,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_by_age" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : 11,
          "doc_count" : 2,
          "avg_height" : {
            "value" : 109.5
          }
        },
        {
          "key" : 10,
          "doc_count" : 1,
          "avg_height" : {
            "value" : 112.0
          }
        },
        {
          "key" : 12,
          "doc_count" : 1,
          "avg_height" : {
            "value" : null
          }
        }
      ]
    }
  }
}

获取每个年龄的平均身高,并按照年龄从小打大排序

方式1:

# 请求
POST student/_search
{
  "size": 0,
  "aggregations": { 
    "group_by_age": {
      "aggregations": {
        "avg_height": {
          "avg": {
            "field": "height"
          }
        }
      },
      "terms": {
        "field": "age",
        "order": {
          "_term": "asc"
        }
      }
    }
  }
}

# 响应 (响应中指出 _term 已经废弃,应使用 _key)
#! Deprecation: Deprecated aggregation order key [_term] used, replaced by [_key]
{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 4,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_by_age" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : 10,
          "doc_count" : 1,
          "avg_height" : {
            "value" : 112.0
          }
        },
        {
          "key" : 11,
          "doc_count" : 2,
          "avg_height" : {
            "value" : 109.5
          }
        },
        {
          "key" : 12,
          "doc_count" : 1,
          "avg_height" : {
            "value" : null
          }
        }
      ]
    }
  }
}

方式2:

POST student/_search
{
  "size": 0,
  "aggregations": { 
    "group_by_age": {
      "aggregations": {
        "avg_height": {
          "avg": {
            "field": "height"
          }
        },
        "bucket_sort_by_avg_height": {
          "bucket_sort": {
            "sort": [
              {"_key": {"order": "asc"}}
            ]
          }
        }
      },
      "terms": {
        "field": "age"
      }
    }
  }
}

获取每个年龄的平均身高,并按照平均身高从大到小排序

# 请求

POST student/_search
{
  "size": 0,
  "aggregations": { 
    "group_by_age": {
      "aggregations": {
        "avg_height": {
          "avg": {
            "field": "height"
          }
        },
        "bucket_sort_by_avg_height": {
          "bucket_sort": {
            "sort": [
              {"avg_height": {"order": "desc"}}
            ]
          }
        }
      },
      "terms": {
        "field": "age"
      }
    }
  }
}

# 响应
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 4,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_by_age" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : 10,
          "doc_count" : 1,
          "avg_height" : {
            "value" : 112.0
          }
        },
        {
          "key" : 11,
          "doc_count" : 2,
          "avg_height" : {
            "value" : 109.5
          }
        }
      ]
    }
  }
}

( 本文完 )

文章目录