ElasticSearch 高级操作
ElasticSearch 集群管理
| POST /person1/_doc/5 { "name":"张三5号", "age":18, "address":"北京海淀区" }
| #批量操作 #1.删除5号 #新增8号 #更新2号 name为2号 POST _bulk {"delete":{"_index":"person1","_id":"5"}} {"create":{"_index":"person1","_id":"8"}} {"name":"八号","age":18,"address":"北京"} {"update":{"_index":"person1","_id":"2"}} {"doc":{"name":"2号"}}
| { "took" : 51, "errors" : true, "items" : [ { "delete" : { "_index" : "person1", "_type" : "_doc", "_id" : "5", "_version" : 2, "result" : "deleted", "_shards" : { "total" : 2, "successful" : 1, "failed" : 0 }, "_seq_no" : 6, "_primary_term" : 2, "status" : 200 } }, { "create" : { "_index" : "person1", "_type" : "_doc", "_id" : "8", "_version" : 1, "result" : "created", "_shards" : { "total" : 2, "successful" : 1, "failed" : 0 }, "_seq_no" : 7, "_primary_term" : 2, "status" : 201 } }, { "update" : { "_index" : "person1", "_type" : "_doc", "_id" : "2", "_version" : 2, "result" : "updated", "_shards" : { "total" : 2, "successful" : 1, "failed" : 0 }, "_seq_no" : 10, "_primary_term" : 2, "status" : 200 } } ] }
@Test public void test2() throws IOException {
BulkRequest bulkRequest =new BulkRequest();
DeleteRequest deleteRequest=new DeleteRequest("person1","5"); bulkRequest.add(deleteRequest);
Map<String, Object> map=new HashMap<>(); map.put("name","六号"); IndexRequest indexRequest=new IndexRequest("person1").id("6").source(map); bulkRequest.add(indexRequest); Map<String, Object> mapUpdate=new HashMap<>(); mapUpdate.put("name","三号"); UpdateRequest updateRequest=new UpdateRequest("person1","3").doc(mapUpdate);
BulkResponse response = client.bulk(bulkRequest, RequestOptions.DEFAULT); System.out.println(response.status());
| PUT goods { "mappings": { "properties": { "title": { "type": "text", "analyzer": "ik_smart" }, "price": { "type": "double" }, "createTime": { "type": "date" }, "categoryName": { "type": "keyword" }, "brandName": { "type": "keyword" }, "spec": { "type": "object" }, "saleNum": { "type": "integer" }, "stock": { "type": "integer" } } } }
| <dependency> <groupId>org.mybatis.spring.boot</groupId> <artifactId>mybatis-spring-boot-starter</artifactId> <version>2.1.0</version> </dependency>
<dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> </dependency>
<dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.4</version> </dependency>
| spring: datasource: url: jdbc:mysql:///es?serverTimezone=UTC username: root password: root driver-class-name: com.mysql.cj.jdbc.Driver
mybatis: mapper-locations: classpath:mapper/*Mapper.xml type-aliases-package: com.itheima.elasticsearchdemo2.domain
| package com.itheima.elasticsearchdemo2.domain;
import com.alibaba.fastjson.annotation.JSONField;
import java.util.Date; import java.util.Map;
public class Goods {
private int id; private String title; private double price; private int stock; private int saleNum; private Date createTime; private String categoryName; private String brandName; private Map spec;
@JSONField(serialize = false) private String specStr;
public int getId() { return id; }
public void setId(int id) { this.id = id; }
public String getTitle() { return title; }
public void setTitle(String title) { this.title = title; }
public double getPrice() { return price; }
public void setPrice(double price) { this.price = price; }
public int getStock() { return stock; }
public void setStock(int stock) { this.stock = stock; }
public int getSaleNum() { return saleNum; }
public void setSaleNum(int saleNum) { this.saleNum = saleNum; }
public Date getCreateTime() { return createTime; }
public void setCreateTime(Date createTime) { this.createTime = createTime; }
public String getCategoryName() { return categoryName; }
public void setCategoryName(String categoryName) { this.categoryName = categoryName; }
public String getBrandName() { return brandName; }
public void setBrandName(String brandName) { this.brandName = brandName; }
public Map getSpec() { return spec; }
public void setSpec(Map spec) { this.spec = spec; }
public String getSpecStr() { return specStr; }
public void setSpecStr(String specStr) { this.specStr = specStr; }
@Override public String toString() { return "Goods{" + "id=" + id + ", title='" + title + '\'' + ", price=" + price + ", stock=" + stock + ", saleNum=" + saleNum + ", createTime=" + createTime + ", categoryName='" + categoryName + '\'' + ", brandName='" + brandName + '\'' + ", spec=" + spec + ", specStr='" + specStr + '\'' + '}'; } }
| package com.itheima.elasticsearchdemo2.mapper;
import com.itheima.elasticsearchdemo2.domain.Goods; import org.apache.ibatis.annotations.Mapper; import org.springframework.stereotype.Repository;
import java.util.List;
@Repository @Mapper public interface GoodsMapper {
public List<Goods> findAll();
| <?xml version="1.0" encoding="UTF-8" ?> <!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.itheima.elasticsearchdemo2.mapper.GoodsMapper">
<select id="findAll" resultType="goods"> select `id` , `title` , `price` , `stock` , `saleNum` , `createTime` , `categoryName`, `brandName` , `spec` as specStr
from goods
@Test public void test3() throws IOException { List<Goods> goodsList = goodsMapper.findAll();
BulkRequest bulkRequest=new BulkRequest();
for (Goods goods : goodsList) {
String specStr = goods.getSpecStr();
Map map = JSON.parseObject(specStr, Map.class);
String data = JSON.toJSONString(goods);
IndexRequest indexRequest=new IndexRequest("goods").source(data,XContentType.JSON); bulkRequest.add(indexRequest);
BulkResponse response = client.bulk(bulkRequest, RequestOptions.DEFAULT); System.out.println(response.status());
| #spec配置的数据类型是JSON对象,所以当存放字符串的时候报错 "spec": { "type": "object" },
| # 默认情况下,es一次展示10条数据,通过from和size来控制分页 # 查询结果详解
GET goods/_search { "query": { "match_all": {} }, "from": 0, "size": 100 }
GET goods
@Test public void matchAll() throws IOException {
SearchRequest searchRequest=new SearchRequest("goods");
SearchSourceBuilder sourceBuilder=new SearchSourceBuilder();
QueryBuilder queryBuilder= QueryBuilders.matchAllQuery(); sourceBuilder.query(queryBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHits hits = searchResponse.getHits();
Long total= hits.getTotalHits().value; System.out.println("总数:"+total); SearchHit[] hits1 = hits.getHits(); List<Goods> goodsList = new ArrayList<>(); for (SearchHit searchHit : hits1) { String sourceAsString = searchHit.getSourceAsString(); Goods goods = JSON.parseObject(sourceAsString, Goods.class); goodsList.add(goods); }
for (Goods goods : goodsList) { System.out.println(goods); }
| text:会分词,不支持聚合
| GET goods/_search { "query": { "term": { "title": { "value": "华为" } } } }
例如:查询title 为“华为”的,title type 为text
查询categoryName 字段时,categoryName字段为keyword ,keyword:不会分词,将全部内容作为一个词条,
| GET goods/_search { "query": { "term": { "categoryName": { "value": "华为手机" } } } }
| # match查询 GET goods/_search { "query": { "match": { "title": "华为手机" } }, "size": 500 }
match 的默认搜索(or 并集)
例如:华为手机,会分词为 “华为”,“手机” 只要出现其中一个词条都会搜索到
match的 and(交集) 搜索
例如:例如:华为手机,会分词为 “华为”,“手机” 但要求“华为”,和“手机”同时出现在词条中
- term query会去倒排索引中寻找确切的term,它并不知道分词器的存在。这种查询适合keyword 、numeric、date
- match query知道分词器的存在。并且理解是如何被分词的
wildcard查询:会对查询条件进行分词。还可以使用通配符 ?(任意单个字符) 和 * (0个或多个字符)
| "*华*" 包含华字的 "华*" 华字后边多个字符 "华?" 华字后边多个字符 "*华"或"?华" 会引发全表(全索引)扫描 注意效率问题
| # wildcard 查询。查询条件分词,模糊查询 GET goods/_search { "query": { "wildcard": { "title": { "value": "华*" } } } }
| \W:匹配包括下划线的任何单词字符,等价于 [A-Z a-z 0-9_] 开头的反斜杠是转义符
(.)*为任意字符 正则查询取决于正则表达式的效率
| GET goods/_search { "query": { "regexp": { "title": "\\w+(.)*" } } }
| # 前缀查询 对keyword类型支持比较好 GET goods/_search { "query": { "prefix": { "brandName": { "value": "三" } } } }
| WildcardQueryBuilder query = QueryBuilders.wildcardQuery("title", "华*");
RegexpQueryBuilder query = QueryBuilders.regexpQuery("title", "\\w+(.)*"); PrefixQueryBuilder query = QueryBuilders.prefixQuery("brandName", "三");
| # 范围查询
GET goods/_search { "query": { "range": { "price": { "gte": 2000, "lte": 3000 } } }, "sort": [ { "price": { "order": "desc" } } ] }
| RangeQueryBuilder query = QueryBuilders.rangeQuery("price");
queryString 多条件查询
query_string:识别query中的连接符(or 、and)
| # queryString
GET goods/_search { "query": { "query_string": { "fields": ["title","categoryName","brandName"], "query": "华为 AND 手机" } } }
simple_query_string:不识别query中的连接符(or 、and),查询时会将 “华为”、”and”、“手机”分别进行查询
| GET goods/_search { "query": { "simple_query_string": { "fields": ["title","categoryName","brandName"], "query": "华为 AND 手机" } } }
| GET goods/_search { "query": { "query_string": { "fields": ["title","brandName","categoryName"], "query": "华为手机 " , "default_operator": "AND" } } }
| QueryStringQueryBuilder query = QueryBuilders.queryStringQuery("华为手机").field("title").field("categoryName") .field("brandName").defaultOperator(Operator.AND);
| GET goods/_search { "query": { "simple_query_string": { "fields": ["title","brandName","categoryName"], "query": "华为手机 " , "default_operator": "OR" } } }
注意:query中的or and 是查询时 匹配条件是否同时出现—-or 出现一个即可,and 两个条件同时出现
default_operator的or and 是对结果进行 并集(or)、交集(and)
| # boolquery #must和filter配合使用时,max_score(得分)是显示的 #must 默认数组形式 GET goods/_search { "query": { "bool": { "must": [ { "term": { "brandName": { "value": "华为" } } } ], "filter":[ { "term": { "title": "手机" } }, { "range":{ "price": { "gte": 2000, "lte": 3000 } } } ] } } } #filter 单独使用 filter可以是单个条件,也可多个条件(数组形式) GET goods/_search { "query": { "bool": { "filter": [ { "term": { "brandName": { "value": "华为" } } } ] } } }
- 查询品牌名称为:华为
- 查询标题包含:手机
- 查询价格在:2000-3000
must 、filter为连接方式
| BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("brandName", "华为"); boolQuery.must(termQueryBuilder); MatchQueryBuilder matchQuery = QueryBuilders.matchQuery("title", "手机"); boolQuery.filter(matchQuery);
RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery("price"); rangeQuery.gte(2000); rangeQuery.lte(3000); boolQuery.filter(rangeQuery);
•桶聚合:相当于MySQL的 group by 操作。不要对text类型的数据进行分组,会失败。
| # 聚合查询
# 指标聚合 聚合函数
GET goods/_search { "query": { "match": { "title": "手机" } }, "aggs": { "max_price": { "max": { "field": "price" } } } }
# 桶聚合 分组
GET goods/_search { "query": { "match": { "title": "手机" } }, "aggs": { "goods_brands": { "terms": { "field": "brandName", "size": 100 } } } }
- 查询title包含手机的数据
- 查询品牌列表
@Test public void testAggQuery() throws IOException {
SearchRequest searchRequest=new SearchRequest("goods");
SearchSourceBuilder sourceBuilder=new SearchSourceBuilder();
MatchQueryBuilder queryBuilder = QueryBuilders.matchQuery("title", "手机");
sourceBuilder.query(queryBuilder); AggregationBuilder aggregation=AggregationBuilders.terms("goods_brands").field("brandName").size(100); sourceBuilder.aggregation(aggregation);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHits hits = searchResponse.getHits();
Long total= hits.getTotalHits().value; System.out.println("总数:"+total);
Aggregations aggregations = searchResponse.getAggregations(); Map<String, Aggregation> aggregationMap = aggregations.asMap();
Terms goods_brands =(Terms) aggregationMap.get("goods_brands");
List<? extends Terms.Bucket> buckets = goods_brands.getBuckets();
Map<String,Object>map=new HashMap<>(); for (Terms.Bucket bucket : buckets) {
System.out.println(bucket.getKey()); map.put(bucket.getKeyAsString(),bucket.getDocCount()); }
默认前后缀 :em
| GET goods/_search { "query": { "match": { "title": "电视" } }, "highlight": { "fields": { "title": { "pre_tags": "<font color='red'>", "post_tags": "</font>" } } } }
1. 设置高亮
2. 将高亮了的字段数据,替换原有数据
@Test public void testHighLightQuery() throws IOException {
SearchRequest searchRequest = new SearchRequest("goods");
SearchSourceBuilder sourceBulider = new SearchSourceBuilder();
MatchQueryBuilder query = QueryBuilders.matchQuery("title", "手机");
HighlightBuilder highlighter = new HighlightBuilder(); highlighter.field("title"); highlighter.preTags("<font color='red'>"); highlighter.postTags("</font>");
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHits searchHits = searchResponse.getHits(); long value = searchHits.getTotalHits().value; System.out.println("总记录数:"+value);
List<Goods> goodsList = new ArrayList<>(); SearchHit[] hits = searchHits.getHits(); for (SearchHit hit : hits) { String sourceAsString = hit.getSourceAsString();
Goods goods = JSON.parseObject(sourceAsString, Goods.class);
Map<String, HighlightField> highlightFields = hit.getHighlightFields(); HighlightField HighlightField = highlightFields.get("title"); Text[] fragments = HighlightField.fragments(); goods.setTitle(fragments[0].toString()); goodsList.add(goods); }
for (Goods goods : goodsList) { System.out.println(goods); }
| #查询别名 默认别名无法查看,默认别名同索引名 GET goods/_alias/ #结果 { "goods" : { "aliases" : { } } }
| # -------重建索引-----------
# 新建student_index_v1。索引名称必须全部小写
PUT student_index_v1 { "mappings": { "properties": { "birthday":{ "type": "date" } } } } #查看 student_index_v1 结构 GET student_index_v1 #添加数据 PUT student_index_v1/_doc/1 { "birthday":"1999-11-11" } #查看数据 GET student_index_v1/_search
#添加数据 PUT student_index_v1/_doc/1 { "birthday":"1999年11月11日" }
2.重建索引:将student_index_v1 数据拷贝到 student_index_v2
| # 业务变更了,需要改变birthday字段的类型为text
# 1. 创建新的索引 student_index_v2 # 2. 将student_index_v1 数据拷贝到 student_index_v2
# 创建新的索引 student_index_v2 PUT student_index_v2 { "mappings": { "properties": { "birthday":{ "type": "text" } } } } # 将student_index_v1 数据拷贝到 student_index_v2 # _reindex 拷贝数据 POST _reindex { "source": { "index": "student_index_v1" }, "dest": { "index": "student_index_v2" } }
GET student_index_v2/_search
PUT student_index_v2/_doc/2 { "birthday":"1999年11月11日" }
注意:DELETE student_index_v1 这一操作将删除student_index_v1索引库,并不是删除别名
| # 思考: 现在java代码中操作es,还是使用的实student_index_v1老的索引名称。 # 1. 改代码(不推荐) # 2. 索引别名(推荐)
# 步骤: # 0. 先删除student_index_v1 # 1. 给student_index_v2起个别名 student_index_v1
# 先删除student_index_v1 #DELETE student_index_v1 这一操作将删除student_index_v1索引库 #索引库默认的别名与索引库同名,无法删除
# 给student_index_v1起个别名 student_index_v11 POST student_index_v2/_alias/student_index_v11 #测试删除命令 POST /_aliases { "actions": [ {"remove": {"index": "student_index_v1", "alias": "student_index_v11"}} ] } DELETE student_index_v1 # 给student_index_v2起个别名 student_index_v1 POST student_index_v2/_alias/student_index_v1
#查询别名 GET goods/_alias/
GET student_index_v1/_search GET student_index_v2/_search
3.16 查询脚本命令集合
4-ElasticSearch 集群
es 集群:
•ElasticSearch 天然支持分布式
•ElasticSearch 的设计隐藏了分布式本身的复杂性
•集群(cluster):一组拥有共同的 cluster name 的 节点。
•节点(node) :集群中的一个 Elasticearch 实例
•索引(index) :es存储数据的地方。相当于关系数据库中的database概念
•主分片(Primary shard):相对于副本分片的定义。
•副本分片(Replica shard)每个主分片可以有一个或者多个副本,数据和主分片一样。
| vim kibana-7.4.0-linux-x86_64-cluster/config/kibana.yml
| i18n.locale: "zh-CN"
server.port: 5602 server.host: "" server.name: "kibana-itcast-cluster" elasticsearch.hosts: ["http://localhost:9201","http://localhost:9202","http://localhost:9203"] elasticsearch.requestTimeout: 99999
4.5-JavaAPI 访问集群
| PUT cluster_test { "mappings": { "properties": { "name":{ "type": "text" } } } }
GET cluster_test GET cluster_test/_search
POST /cluster_test/_doc/1 { "name":"张三" }
| @Resource(name="clusterClient") RestHighLevelClient clusterClient;
@Test public void testCluster() throws IOException {
GetRequest indexRequest=new GetRequest("cluster_test","1");
GetResponse response = clusterClient.get(indexRequest, RequestOptions.DEFAULT); System.out.println(response.getSourceAsString());
| private String host1;
private int port1;
private String host2;
private int port2;
private String host3;
private int port3;
@Bean("clusterClient") public RestHighLevelClient clusterClient(){ return new RestHighLevelClient(RestClient.builder( new HttpHost(host1,port1,"http"), new HttpHost(host2,port2,"http"), new HttpHost(host3,port3,"http") )); }
| elasticsearch: host: port: 9200 host1: port1: 9201 host2: port2: 9202 host3: port3: 9203
| #分片配置 #"number_of_shards": 3, 主分片数量 #"number_of_replicas": 1 主分片备份数量,每一个主分片有一个备份 # 3个主分片+3个副分片=6个分片 PUT cluster_test1 { "settings": { "number_of_shards": 3, "number_of_replicas": 1 }, "mappings": { "properties": { "name":{ "type": "text" } } } }
2.itcast-3 挂掉
4.itcast-3 恢复正常后,节点分片将自平衡回去(并不一定是原来的分片)
2.分片数量推荐 = 节点数量 * 1~3倍
1.每个分片20GB 则可以分为40个分片
2.分片数量推荐 = 节点数量 * 1~3倍 –> 40/2=20 即20个节点
•Elasticsearch 是怎么知道一个文档应该存放到哪个分片中呢?
•查询时,根据文档id查询文档, Elasticsearch 又该去哪个分片中查询数据呢?
•路由算法 :shard_index = hash(id) % number_of_primary_shards
查询id为5的文档:假如hash(5)=17 ,根据算法17%3=2
ElasticSearch 集群正常状态:
• 一个正常es集群中只有一个主节点(Master),主节点负责管理整个集群。如创建或删除索引,跟踪哪些节点是群集的一部分,并决定哪些分片分配给相关的节点。
1.网络原因:discovery.zen.ping.timeout 超时时间配置大一点。默认是3S
•node.master: true
•node.data: false
•node.master: false
•node.data: true
3.JVM内存回收:修改 config/jvm.options 文件的 -Xms 和 -Xmx 为服务器的内存一半。
30-ElasticSearch 集群-集群扩容
5 案例查询
5.1 查询数据准备
| create table `goods` ( `id` double , `title` varchar (300), `price` Decimal (22), `stock` double , `saleNum` double , `createTime` datetime , `categoryName` varchar (600), `brandName` varchar (300), `spec` varchar (600) ); insert into `goods` (`id`, `title`, `price`, `stock`, `saleNum`, `createTime`, `categoryName`, `brandName`, `spec`) values('536563','new2 - 阿尔卡特 (OT-927) 炭黑 联通3G手机 双卡双待','299000.00',NULL,'99999','2015-03-08 21:33:18','手机','阿尔卡特','{\"机身内存\":\"16G\",\"网络\":\"联通3G\"}'); insert into `goods` (`id`, `title`, `price`, `stock`, `saleNum`, `createTime`, `categoryName`, `brandName`, `spec`) values('562379','new8- 三星 W999 黑色 电信3G手机 双卡双待双通','11.00',NULL,'99999','2015-03-08 21:27:54','手机','三星','{\"机身内存\":\"16G\",\"网络\":\"联通3G\"}');
| PUT goods { "mappings": { "properties": { "title": { "type": "text", "analyzer": "ik_smart" }, "price": { "type": "double" }, "createTime": { "type": "date" }, "categoryName": { "type": "keyword" }, "brandName": { "type": "keyword" }, "spec": { "type": "object" }, "saleNum": { "type": "integer" }, "stock": { "type": "integer" } } } }
- title:商品标题
- price:商品价格
- createTime:创建时间
- categoryName:分类名称。如:家电,手机
- brandName:品牌名称。如:华为,小米
- spec: 商品规格。如: spec:{“屏幕尺寸”,”5寸”,”内存大小”,”128G”}
- saleNum:销量
- stock:库存量
| POST goods/_doc/1 { "title":"小米手机", "price":1000, "createTime":"2019-12-01", "categoryName":"手机", "brandName":"小米", "saleNum":3000, "stock":10000, "spec":{ "网络制式":"移动4G", "屏幕尺寸":"4.5" } }