2.8 LSQL安装及配置
使用rz命令或第三方工具将lsql的.tar.gz包上传到/opt/software 路径下
解压:tar -zxvf lsql-2_0_0_0.tar.gz
修改/opt/software/lsql/config/site/lsql-env.sh
#!/usr/bin/env bash export CL_HADOOP_CONF_DIR=/opt/software/hadoop/etc/hadoop export CL_HADOOP_HOME=/opt/software/hadoop export CL_JAVA_HOME=/opt/software/openjdk export CL_HADOOP_CLASS_PATH=`${CL_HADOOP_HOME}/bin/hadoop classpath` # lsql在hdfs上的存储路径 export CL_HDFS_PATH=/data # 默认lsql的driver和executor都给28G内存 export CL_DRIVER_MEMORY=28000m # executor数量是在每台机器上的数量总和,每台机器上的executor数量=yarn的内存/32G # 此处为:256G*3/4/32=6,256G*3/4/32-1(预留给NN、SN、Driver)=5 # 故Executor总数=6(资源)*4(节点)+5(资源)*3(节点)=39 export CL_EXECUTOR_COUNT=39 export CL_EXECUTOR_MEMORY=32000m # 以下两个参数为默认即可 export CL_EXECUTOR_CORES=7 export CL_EXECUTOR_PARTITIONS=2 export CL_HDFS_USER=root export CL_SERVER_ROLE=primary #安装jdk15需要配置此信息 export CL_ZGC_SUPPORT=true
修改 /opt/software/lsql/config/site/lsql-site.properties 文件,配置lsql的Kafka相关参数
# kafka配置例子 # cl.stream.reader.list=kafka1,dispatch # kafka1为入数据的参考配置,dispatch为不同表不同分片的参考配置,sqlmonitor为监控参考配置 cl.stream.reader.list=kafka1,dispatch #此处用户可以自定义解析类 cl.stream.consumer.class.kafka1=cn.lucene.plugins.service.stream.api.impl.CLKafkaConsumer cl.stream.parser.class.kafka1=cn.lucene.plugins.service.stream.api.impl.CLJsonParser kafka.topic.kafka1=kafka_test bootstrap.servers.kafka1=10.10.12.28:9092,10.10.12.29:9092,10.10.12.30:9092 kafka.group.kafka1=kafka_tets1 kafka.conf.params.kafka1=message.max.bytes:20000000;fetch.max.bytes:20485760;max.partition.fetch.bytes:20485760;fetch.message.max.bytes:20000000;replica.fetch.max.bytes:21000000 #kafka.topic.dispatch=dispatch_topic #bootstrap.servers.dispatch=10.10.12.28:9092,10.10.12.29:9092,10.10.12.30:9092 #kafka.group.dispatch=dispatch_group #kafka.conf.params.dispatch=message.max.bytes:20000000;fetch.max.bytes:20485760;max.partition.fetch.bytes:20485760;fetch.message.max.bytes:20000000;replica.fetch.max.bytes:21000000 ##dispatch目前kafka与json的设置不可以变,且kafka的partition的数量>=lsql的进程数*每个进程的分片数(非常重要) #cl.stream.consumer.class.dispatch=cn.lucene.plugins.service.stream.api.impl.CLKafkaConsumer #cl.stream.parser.class.dispatch=cn.lucene.plugins.service.stream.api.impl.CLJsonParser ###用于sql监控 sqlmonitor不需要配置在cl.stream.reader.list里### kafka.topic.sqlmonitor=sqlmonitor_topic bootstrap.servers.sqlmonitor=10.10.12.28:9092,10.10.12.29:9092,10.10.12.30:9092 kafka.group.sqlmonitor=sqlmonitor_group kafka.conf.params.sqlmonitor=message.max.bytes:20000000;fetch.max.bytes:20485760;max.partition.fetch.bytes:20485760;fetch.message.max.bytes:20000000;replica.fetch.max.bytes:21000000 cl.stream.consumer.class.sqlmonitor=cn.lucene.plugins.service.stream.api.impl.CLKafkaConsumer cl.stream.parser.class.sqlmonitor=cn.lucene.plugins.service.stream.api.impl.CLJsonParser
配置lsql基于hadoop的快照,在lsql节点上添加定时任务(与章节2.6.1相同)
# crontab -e # 每天快照一次,保存最近3天的快照,执行该脚本需要在~/.bashrc中配置好hadoop和jdk 10 1 * * * sh /opt/software/lsql/sbin/snapshot.sh /data 3 >/dev/null 2>&1
日志清理配置,每台机器上都要配置,防止系统盘爆满
Hadoop
mkdir -p /logs/hadoop rm -rf /opt/software/hadoop/logs ln -s /logs/hadoop /opt/software/hadoop/logs
Kafka
mkdir -p /logs/kafka rm -rf /opt/software/kafka/logs ln -s /logs/kafka /opt/software/kafka/logs
Lsql
mkdir -p /logs/lsql rm -rf /opt/software/lsql/logs ln -s /logs/lsql /opt/software/lsql/logs
在/root目录下
# vim auto-del-7-days-ago-log.sh #!/bin/bash find /opt/software/hadoop/logs/ -mtime +7 -name "*.log" -exec rm -rf {} \; find /opt/software/hadoop/logs/ -mtime +7 -name "*.log.*" -exec rm -rf {} \; find /opt/software/hadoop/logs/ -mtime +7 -name "*.out" -exec rm -rf {} \; find /opt/software/hadoop/logs/ -mtime +7 -name "*.out.*" -exec rm -rf {} \; find /opt/software/kafka/logs/ -mtime +7 -name "*.log" -exec rm -rf {} \; find /opt/software/kafka/logs/ -mtime +7 -name "*.log.*" -exec rm -rf {} \;
加入定时任务
# crontab -e 10 0 * * * sh /root/auto-del-7-days-ago-log.sh >/dev/null 2>&1
运行进入到/opt/software/lsql/sbin,执行./start.sh则开始启动