餐饮 网站模板网站开发需要那些人才
文章目录
- 01.Clickhouse到HBase(Phoenix)数据导入 DataX
 - 介绍
 - 下载
 - 执行同步的组件
 - 配置数据同步
 - 查看官方读写配置样例
 - 创建Hbase和Phoenix表
 - 创建ClickHouse表
 - 写入ClickHouse测试数据
 - 编写ClickHouse2Hbase配置文件
 - 执行同步命令
 
- 拓展
 - ClickHouse同步到MySQL配置文件
 
01.Clickhouse到HBase(Phoenix)数据导入 DataX
介绍
DataX 是一个异构数据源离线同步工具,致力于实现包括关系型数据库(MySQL、Oracle等)、HDFS、Hive、ODPS、HBase、FTP等各种异构数据源之间稳定高效的数据同步功能。
下载
# 官方版本
wget http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz# 编译好ClickHouse和Hbase2的版本
链接:https://pan.baidu.com/s/1IYU93oGOnvcx34HJaPDudQ 
提取码:bool
 
解压
tar -zxvf datax.tar.gz
 
解压后目录结构如下
[root@dev-bigdata-24-146 datax]# ls
bin  conf  job  lib  log  log_perf  plugin  script  tmp
 
执行同步的组件
reader下是支持读的组件
writer下是支持写的组件
[root@dev-bigdata-24-146 datax]# tree -L 2 plugin/
plugin/
├── reader
│   ├── cassandrareader
│   ├── clickhousereader
│   ├── dbffilereader
│   ├── drdsreader
│   ├── elasticsearchreader
│   ├── ftpreader
│   ├── gaussdbreader
│   ├── gbasereader
│   ├── gdbreader
│   ├── hbase094xreader
│   ├── hbase11xkerberosreader
│   ├── hbase11xreader
│   ├── hbase11xsqlreader
│   ├── hbase20xsqlreader
│   ├── hdfsreader
│   ├── httpreader
│   ├── influxdbreader
│   ├── jsonfilereader
│   ├── kingbaseesreader
│   ├── kudureader
│   ├── mongodbreader
│   ├── mysql8reader
│   ├── mysqlreader
│   ├── odpsreader
│   ├── opentsdbreader
│   ├── oraclereader
│   ├── oscarreader
│   ├── ossreader
│   ├── otsreader
│   ├── otsstreamreader
│   ├── postgresqlreader
│   ├── rdbmsreader
│   ├── redisreader
│   ├── sqlserverreader
│   ├── streamreader
│   ├── tdenginereader
│   └── txtfilereader
└── writer├── adbpgwriter├── adswriter├── cassandrawriter├── clickhousewriter├── dbffilewriter├── dorisdbwriter├── doriswriter├── drdswriter├── elasticsearchwriter├── ftpwriter├── gaussdbwriter├── gbasewriter├── gdbwriter├── greenplumwriter├── hbase094xwriter├── hbase11xkerberoswriter├── hbase11xsqlwriter├── hbase11xwriter├── hbase20xsqlwriter├── hdfswriter├── influxdbwriter├── kingbaseeswriter├── kuduwriter├── mongodbwriter├── mysql8writer├── mysqlwriter├── ocswriter├── odpswriter├── oraclewriter├── oscarwriter├── osswriter├── otswriter├── postgresqlwriter├── rdbmswriter├── rediswriter├── sqlserverwriter├── streamwriter├── tdenginewriter├── tsdbwriter└── txtfilewriter
 
配置数据同步
本文介绍使用的是CDH6.3.2所带的之间HBase 版本2.1.0版本,需要安装Phoenix
Phoenix安装方法参考:https://datamining.blog.csdn.net/article/details/105572156
查看官方读写配置样例
命令:python bin/datax.py -r clickhousereader -w hbase20xsqlwriter
[root@jast datax]# python bin/datax.py -r clickhousereader -w hbase20xsqlwriterDataX (DATAX-OPENSOURCE-3.0), From Alibaba !
Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.Please refer to the clickhousereader document:https://github.com/alibaba/DataX/blob/master/clickhousereader/doc/clickhousereader.md Please refer to the hbase20xsqlwriter document:https://github.com/alibaba/DataX/blob/master/hbase20xsqlwriter/doc/hbase20xsqlwriter.md Please save the following configuration as a json file and  usepython {DATAX_HOME}/bin/datax.py {JSON_FILE_NAME}.json 
to run the job.{"job": {"content": [{"reader": {"name": "clickhousereader", "parameter": {"column": [], "connection": [{"jdbcUrl": [], "table": []}], "password": "", "username": "", "where": ""}}, "writer": {"name": "hbase20xsqlwriter", "parameter": {"batchSize": "100", "column": [], "nullMode": "skip", "queryServerAddress": "", "schema": "", "serialization": "PROTOBUF", "table": ""}}}], "setting": {"speed": {"channel": ""}}}
} 
创建Hbase和Phoenix表
DataX中Hbase2是通过Phoenix插入,所以也要同步创建Phoenix表
Hbase表创建
create 'test_datax',{NAME=>'cf'}
 
Phoenix表创建
create table "test_datax"("rowkey" varchar primary key, "cf"."data_id" varchar , "cf"."user_id" varchar, "cf"."name" varchar, "cf"."phone" varchar, "cf"."pt" varchar) column_encoded_bytes=0;
 
创建ClickHouse表
create table test_datax_ck
(data_id      String,user_id      Nullable(String),name    	 Nullable(String),phone        Nullable(String),pt           String
)engine = MergeTree PARTITION BY ptORDER BY (pt)SETTINGS index_granularity = 8192;
 
写入ClickHouse测试数据
insert into test_datax_ck("data_id","user_id","name","phone","pt") values ('1','1','张三1',13577665544,'20210101');
insert into test_datax_ck("data_id","user_id","name","phone","pt") values ('2','2','张三2',13577665546,'20210101');
insert into test_datax_ck("data_id","user_id","name","phone","pt") values ('3','3','张三3',13577665545,'20210101');
insert into test_datax_ck("data_id","user_id","name","phone","pt") values ('4','4','张三4',13577665543,'20210102');
 
编写ClickHouse2Hbase配置文件
创建配置文件vim ck2hbase.json内容如下
{"job":{"content":[{"reader":{"name":"clickhousereader","parameter":{"column":["data_id","data_id","user_id","name","phone","pt"],"connection":[{"jdbcUrl":["jdbc:clickhouse://172.16.24.150:8123/ms_db?socket_timeout=7200000"],"table":["test_datax_ck"]}],"password":"avris","username":"default","where":""}},"writer":{"name":"hbase20xsqlwriter","parameter":{"batchSize":"100","column":["rowkey","data_id","user_id","name","phone","pt"],"nullMode":"skip","queryServerAddress":"http://172.1.24.146:8765","serialization":"PROTOBUF","table":"test_datax"}}}],"setting":{"speed":{"channel":1}}}
}
 
执行同步命令
python bin/datax.py ck2hbase.json2022-04-29 15:31:51.894 [job-0] INFO  JobContainer - 
任务启动时刻                    : 2022-04-29 15:31:48
任务结束时刻                    : 2022-04-29 15:31:51
任务总计耗时                    :                  2s
任务平均流量                    :              176B/s
记录写入速度                    :             12rec/s
读出记录总数                    :                  12
读写失败总数                    :                   0
 
https://blog.csdn.net/BOOLTechnology/article/details/120534070
拓展
ClickHouse同步到MySQL配置文件
{"job": {"content": [{"reader": {"name": "clickhousereader", "parameter": {"column": ["one_id","aid","name","phone"], "connection": [{"jdbcUrl": ["jdbc:clickhouse://172.1.24.171:8123/default?socket_timeout=7200000"], "table": ["ads_user_test"]}], "password": "", "username": "", "where": ""}}, "writer": {"name": "mysqlwriter", "parameter": {"print": true,"column": ["one_id","aid","name","phone"], "connection": [{"jdbcUrl": ["jdbc:mysql://172.1.24.143:3306/test"], "table": ["test20220429"]}], "password": "root", "preSql": [], "session": [], "username": "root", "writeMode": ""}}}], "setting": {"speed": {"channel": ""}}}
}