安装Greenplum集群 - 9dian/Index GitHub Wiki

准备

禁用SELinux和防火墙

/etc/selinux/config 
sestatus -b

* check and disable iptables
* check and disable firewalld(centos7)

#/sbin/chkconfig --list iptables
#/sbin/chkconfig iptables off
# systemctl stop firewalld.service
# systemctl disable firewalld.service

设置OS参数

/etc/hosts

包含集群的所有节点机器名设置网卡

/etc/sysctl.conf

* /etc/sysctl.conf
# for gp
kernel.shmmax = 500000000
kernel.shmmni = 4096
kernel.shmall = 4000000000
kernel.sem = 250 512000 100 2048
kernel.sysrq = 1
kernel.core_uses_pid = 1
kernel.msgmnb = 65536
kernel.msgmax = 65536
kernel.msgmni = 2048
net.ipv4.tcp_syncookies = 1
net.ipv4.conf.default.accept_source_route = 0
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_max_syn_backlog = 4096
net.ipv4.conf.all.arp_filter = 1
net.ipv4.ip_local_port_range = 1025 65535
net.core.netdev_max_backlog = 10000
net.core.rmem_max = 2097152
net.core.wmem_max = 2097152
vm.overcommit_memory = 2
vm.overcommit_ratio = 95
net.ipv4.ip_forward = 0

e.g.
echo -e '# for gp \nkernel.shmmax = 500000000 \nkernel.shmmni = 4096 \nkernel.shmall = 4000000000 \nkernel.sem = 250 512000 100 2048 \nkernel.sysrq = 1 \nkernel.core_uses_pid = 1 \nkernel.msgmnb = 65536 \nkernel.msgmax = 65536 \nkernel.msgmni = 2048 \nnet.ipv4.tcp_syncookies = 1 \nnet.ipv4.conf.default.accept_source_route = 0 \nnet.ipv4.tcp_tw_recycle = 1 \nnet.ipv4.tcp_max_syn_backlog = 4096 \nnet.ipv4.conf.all.arp_filter = 1 \nnet.ipv4.ip_local_port_range = 1025 65535 \nnet.core.netdev_max_backlog = 10000 \nnet.core.rmem_max = 2097152 \nnet.core.wmem_max = 2097152 \nvm.overcommit_memory = 2 \nvm.overcommit_ratio = 95\nnet.ipv4.ip_forward = 0' >> /etc/sysctl.conf

[root@sdw2 queue]# sysctl -a |grep sem
kernel.sem = 250        32000   32      128
kernel.sem_next_id = -1

/etc/security/limits.conf

/etc/security/limits.d/90-nproc.conf

* soft nofile 65536
* hard nofile 65536
* soft nproc 131072
* hard nproc 131072

e.g.  mdw
echo -e '# for gp\n* soft nofile 65536\n* hard nofile 65536\n* soft nproc 131072\n* hard nproc 131072' >> /etc/security/limits.conf
echo -e '# for gp\n* soft nproc 131072\n* hard nproc 131072' >> /etc/security/limits.conf

创建XFS文件系统,挂载磁盘

parted --script /dev/nvme2n1 "mklabel gpt"
parted --script /dev/nvme2n1 "mkpart primary btrfs 1 100%"
mkfs.btrfs /dev/nvme2n1
cfdisk /dev/nvme2n1
wipefs /dev/nvme2n1
mkfs.btrfs --help
mkfs.btrfs /dev/nvme2n1
fdisk -l
mkfs.btrfs /dev/nvme2n1 -f
mkdir /data
mount /dev/nvme2n1 /data


parted /dev/sdb mklabel gpt
parted -a optimal /dev/sdb "mkpart primary ext4 1 100%"
mkfs.ext4 -c /dev/sdb
mount -t ext4 /dev/sdb /data/

parted /dev/nvme3n1 mklabel gpt
parted -a optimal /dev/nvme3n1 mkpart primary ext4 1 100%
mkfs.ext4 -c /dev/nvme3n1
mount -t ext4 /dev/nvme3n1 /envs

* /etc/fstab
#...
/dev/vdb1 /data xfs nodev,noatime,nobarrier,inode64,allocsize=16m 0 0

echo -e '\n/dev/vdb1            /storage/disk1                xfs        nodev,noatime,nobarrier,inode64 0 0 \n/dev/vdc1            /storage/disk2                xfs        nodev,noatime,nobarrier,inode64 0 0 \n/dev/vdd1            /storage/disk3                xfs        nodev,noatime,nobarrier,inode64 0 0 \n/dev/vde1            /storage/disk4                xfs        nodev,noatime,nobarrier,inode64 0 0 \n/dev/vdf1            /storage/disk5                xfs        nodev,noatime,nobarrier,inode64 0 0 \n/dev/vdg1            /storage/disk6                xfs        nodev,noatime,nobarrier,inode64 0 0 \n \n/dev/vdh1            /fastdisk/ssd1                xfs        nodev,noatime,nobarrier,inode64 0 0 \n/dev/vdi1            /fastdisk/ssd2                xfs        nodev,noatime,nobarrier,inode64 0 0' >> /etc/fstab

echo -e '\n/dev/vdf1            /storage/disk5                xfs        nodev,noatime,nobarrier,inode64 0 0 \n/dev/vdg1            /storage/disk6                xfs        nodev,noatime,nobarrier,inode64 0 0'  >> /etc/fstab

for more than 2TB partition Here's the process:

Check whether you have mkfs.xfs installed by running it without arguments. If it's not present, install the userland XFS tools:

# yum install xfsprogs

If that failed, it's probably because you're on an older OS that doesn't have this in its default package repository. You really should upgrade, but if that is impossible, you can get this from CentOSPlus or EPEL. You may also need to install the kmod_xfs package.

Create the partition:

Since you say your 22 TiB volume is on /dev/sdb, the commands for parted are:

# parted /dev/vdb mklabel gpt

# parted -a optimal -- /dev/vdb mkpart primary xfs 1 -1

That causes it to take over the entire volume with a single partition. Actually, it ignores the first 1 MiB of the volume, to achieve the 4 KiB alignment required to get the full performance from Advanced Format HDDs and SSDs.

You could skip this step and format the entire volume with XFS. That is, you would use /dev/sdb in the example below instead of /dev/sdb1. This avoids the problem of sector alignment. In the case of a RAID array, there are no downsides worth speaking about, but I'd caution doing this on a single removable disk, since some OSes (Windows and macOS, for instance) will offer to format a partitionless hard drive for you every time you plug it in. Putting the filesystem on a partition solves this.

Format the partition:

#mkfs.xfs -L gp-data /dev/vdb1

Add the /etc/fstab entry:

LABEL=somelabel /some/mount/point xfs defaults 0 0

Mount up!

#mount /some/mount/point

parted /dev/vdb mklabel gpt && parted -a optimal -- /dev/vdb mkpart primary xfs 1 -1 && mkfs.xfs -L gp-data /dev/vdb1
parted /dev/vdf mklabel gpt && parted -a optimal -- /dev/vdf mkpart primary xfs 1 -1 && mkfs.xfs -L gp-data /dev/vdf1
parted /dev/vdg mklabel gpt && parted -a optimal -- /dev/vdg mkpart primary xfs 1 -1 && mkfs.xfs -L gp-data /dev/vdg1
parted /dev/sda mklabel gpt && parted -a optimal -- /dev/sda mkpart primary xfs 1 -1 && mkfs.xfs -L gp-data /dev/sda1
mount -t xfs -o nodev,noatime,nobarrier,inode64,allocsize=16m  /dev/sda1 /data

/dev/vdc1            /backup/disk1                xfs        nodev,noatime,nobarrier,inode64,allocsize=16m 0 0

disk device file should have a read-ahead (blockdev) value of 16384


/sbin/blockdev --setra 16384 /dev/sda
/sbin/blockdev --setra 16384 /dev/sdb (in /etc/rc.local)

echo -e '/sbin/blockdev --setra 16384 /dev/sda\n/sbin/blockdev --setra 16384 /dev/sdb' >> /etc/rc.local

echo -e '\n/sbin/blockdev --setra 16384 /dev/vda \n/sbin/blockdev --setra 16384 /dev/vdb \n/sbin/blockdev --setra 16384 /dev/vdc \n/sbin/blockdev --setra 16384 /dev/vdd \n/sbin/blockdev --setra 16384 /dev/vde' >> /etc/rc.local 

The Linux disk I/O scheduler for disk access supports different policies, such as CFQ, AS, and deadline.

  • The deadline scheduler option is recommended.
cat  /sys/block/vd{a,b,c,d}/queue/scheduler
echo deadline > /sys/block/vd{a,b,c,d}/queue/scheduler
  • /boot/grub/grub.conf elevator=deadline

  • Disable Transparent Huge Pages (THP). /boot/grub/grub.conf transparent_hugepage=never

cat /sys/kernel/mm/*transparent_hugepage/enabled
sed -e 's|kernel \/boot\/vmlinuz-2.6.32-696.6.3.el6.x86_64 ro root=\/dev\/vda1 console=ttyS0 console=tty0 printk.time=1 panic=5 rd_NO_LUKS  KEYBOARDTYPE=pc KEYTABLE=us LANG=C rd_NO_MD SYSFONT=latarcyrheb-sun16 rd_NO_LVM crashkernel=auto   rd_NO_DM|kernel \/boot\/vmlinuz-2.6.32-696.6.3.el6.x86_64 ro root=\/dev\/vda1 elevator=deadline console=ttyS0 console=tty0 printk.time=1 panic=5 rd_NO_LUKS  KEYBOARDTYPE=pc KEYTABLE=us LANG=C rd_NO_MD SYSFONT=latarcyrheb-sun16 rd_NO_LVM crashkernel=auto   rd_NO_DM transparent_hugepage=never|' /boot/grub/grub.conf.bak > /boot/grub/grub.conf

[root@localhost ~]# cat /boot/grub/grub.conf
default=0
timeout=5
splashimage=(hd0,0)/grub/splash.xpm.gz
hiddenmenu
title CentOS 6 (2.6.32-573.el6.x86_64)
        root (hd0,0)
        kernel /boot/vmlinuz-2.6.32-696.6.3.el6.x86_64 ro root=/dev/vda1 console=ttyS0 console=tty0 printk.time=1 panic=5 rd_NO_LUKS  KEYBOARDTYPE=pc KEYTABLE=us LANG=C rd_NO_MD SYSFONT=latarcyrheb-sun16 rd_NO_LVM crashkernel=auto   rd_NO_DM
        initrd /initramfs-2.6.32-573.el6.x86_64.img

 elevator=deadline
 transparent_hugepage=never

安装

二进制版本,参考pivotal包

源代码编译

配置SSH和gpadmin用户

* ssh rsa
 gpssh-exkeys -f host_file
 
* create group and users by hand
 gpssh -f host_file
 groupadd -g 550 gpadmin
 groupadd -g 560 gpmon
 useradd -u 550 -g gpadmin -m -s /bin/bash gpadmin
 useradd -u 560 -g gpmon -m -s /bin/bash gpmon
 echo 'P*****-2***Q4' | passwd gpadmin --stdin
 echo 'P*****-2***10' | passwd gpmon --stdin
 chown -R gpadmin:gpadmin /data

修改.bashrc配置

Master和Master Standby的MASTER_DATA_DIRECTORY。修改 ~/.bashrc文件,添加如下内容:

source /usr/local/greenplum-db/greenplum_path.sh

export MASTER_DATA_DIRECTORY=/data/master/gpseg-1 #(gpstart默认启动的目录)

echo -e 'source /usr/local/greenplum-db/greenplum_path.sh\nexport MASTER_DATA_DIRECTORY=/data/master/gpseg-1 #(gpstart默认启动的目录)'

【Segment主机】:

修改 ~/.bashrc文件,添加如下内容:

source /usr/local/greenplum-db/greenplum_path.sh

设置时钟同步

gpssh -f host_file
date
ntpdate
ntpdate ntp.ubuntu.com

关闭一些不必要的服务

* gpssh -f host_file
Note: command history unsupported on this machine ...
=> chkconfig avahi-daemon off
chkconfig avahi-dnsconfd off
chkconfig conman off
chkconfig bluetooth off
chkconfig cpuspeed off
chkconfig setroubleshoot off
chkconfig hidd off
chkconfig hplip off
chkconfig isdn off
chkconfig kudzu off
chkconfig yum-updatesd off
……

非Master节点安装软件包

gpseginstall /home/gpadmin/addingsegs 中包含master standbymaster all segments。

gpseginstall在所有机器上创建系统用户gpadmin并设置默认密码为changeme,所以配置配置SSH和gpadmin用户中部分操作是可选的。

[root@mdw greenplum-db]# gpseginstall -f hostfile_exkeys -u gpadmin -p ***
20170830:15:25:12:003173 gpseginstall:mdw:root-[INFO]:-Installation Info:
link_name greenplum-db

检查系统参数

[gpadmin@mdw greenplum-db]$ gpcheck -f hostfile -m mdw -s smdw
gpcheckperf -f hostfile -r N -dsN /data > checkperf.out
#检查网络性能
gpcheckperf -f hostfile -r N -d /tmp > checknetwork.out

安装数据库

cp /usr/local/greenplum-db/docs/cli_help/gpconfigs/gpinitsystem_config ~

gpinitsystem -c gpconfigs/gpinitsystem_config  -h onlysegs -s smdw (-S for mirror)

【Tablespace & Filespace】

  • Tablespaces 允许每个节点(每台机器)使用多种文件目录或文件系统,当前所有数据都存储在默认base目录下。我们可以根据需要制定最优的物理存储方式。

使用表空间的好处:

1.针对数据使用频率来选择不同的存储类型

2.控制某些数据库对象的 I/O 性能

例如:高使用率的表可以使用SSD,以提高性能,历史数据使用SATA硬盘,频繁使用的表使用SSD盘。

  • Filespace(所有的存储文件系统位置的集合)可以认为是多个文件目录的集合,可以对应一个或者多个tablespace 在GP里面tablespace必须建立在filespace上面,所以建立表空间之前,先要有指定的filespace

附加功能

扩展

dblink

psql -d payment -f /usr/local/greenplum-db/share/postgresql/contrib/dblink.sql

gp_toolkit

psql -f /usr/local/greenplum-db/share/postgresql/cdb_init.d/gp_toolkit.sql

配置类

递归查询

export PGPORT=15432
gpconfig -c gp_recursive_cte_prototype -v on --skipvalidation
⚠️ **GitHub.com Fallback** ⚠️