In this blog I discuss on pre-req setup for Hadoop 2 installation, I have made several improvements in the pre-req setup which you will find as compared to my Hadoop 1 Blog.
Our Controller Node is namenode or nn or namenode.cluster.com
All the below steps are on namenode
My Cluster consists of 7 nodes -
Below is my /etc/hosts which I created on namenode.
All other nodes are just installed with same password on all the nodes. The roles of the nodes are pretty much self intuitive as per the names
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
#::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.10.51 namenode.cluster.com namenode nn
192.168.10.52 rmanager.cluster.com rmanager rm
192.168.10.53 snamenode.cluster.com snamenode snn
192.168.10.54 d1node.cluster.com d1node d1n
192.168.10.55 d2node.cluster.com d2node d2n
192.168.10.58 d3node.cluster.com d3node d3n
192.168.10.57 d4node.cluster.com d4node d4n
Version on which I am installing
[Linux 7 Red Hat Enterprise Linux Server release 7.3 (Maipo) 3.10.0-514.el7.x86_64]
Step 1
[As root]
Install pdsh and expect
wget http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/p/pdsh-2.31-1.el7.x86_64.rpm
wget http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/p/pdsh-rcmd-rsh-2.31-1.el7.x86_64.rpm
yum install pdsh-2.31-1.el7.x86_64.rpm pdsh-rcmd-rsh-2.31-1.el7.x86_64.rpm -y
yum install expect -y
Step 2 - Create ssh key
[As root]
[root@namenode ~]# ssh-keygen
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
88:1c:af:e6:23:72:de:b2:76:8d:c6:86:d5:5a:96:1c root@namenode.cluster.com
The key's randomart image is:
+--[ RSA 2048]----+
| |
| |
| . |
| . +E. |
| ooooS |
| ..* |
| +o* |
|. *+O . |
| =oBo. |
+-----------------+
Step 2.1
[As root - Automation File Creation]
Create Below File (This is to setup passwordless ssh and file-copy)
(Source - http://www.techpaste.com/2013/04/shell-script-automate-ssh-key-transfer-hosts-linux/)
File Name - /tmp/keysetup.exp
#!/usr/bin/env expect
set host [lrange $argv 0 0]
set username [lrange $argv 1 1]
set password [lrange $argv 2 2]
set lusername [lrange $argv 3 3]
set timeout 10
spawn /usr/bin/ssh-keygen -R $host
spawn /usr/bin/ssh-copy-id -i /$lusername/.ssh/id_rsa.pub $username@$host
match_max 100000
expect "*?(yes/no)\?"
send -- "yes\r"
# Look for password prompt
expect {
"*?assword:*" { send -- "$password\r"; send -- "\r"; exp_continue }
eof { exit 1 }
"Now try*\r" { exit 0 }
timeout { exit 1 }
}
exit 0
Provide Execute Permissions
chmod u+x /tmp/keysetup.exp
Create all_hosts file with a hostname present in newline for each host
[root@namenode tmp]# cat /tmp/all_hosts
nn
snn
rm
d1n
d2n
d3n
d4n
chmod 777 /tmp/all_hosts
Step 3
[As root - Passworless Ssh Setup]
Run below to setup passwordless as user root.
# for i in $(cat /tmp/all_hosts) ; do /tmp/keysetup.exp ${i} root remote_password root ; done
Now namenode can login to all hosts without password.
Step 4
[As root - sysctl file creation and secure liimits file creation]
Create Sysctl file for hadoop
[root@namenode tmp]# cat /tmp/98-sysctl-hadoop.conf
fs.file-max=6815744
fs.aio-max-nr=1048576
net.core.rmem_default=262144
net.core.wmem_default=262144
net.core.rmem_max=16777216
net.core.wmem_max=16777216
net.ipv4.tcp_rmem=4096 262144 16777216
net.ipv4.tcp_wmem=4096 262144 16777216
vm.swappiness=10
Create secur_conf.conf file for security limits
[root@namenode tmp]# cat secur_conf.conf
@hadoop soft nofile 32768
@hadoop hard nofile 32768
@hadoop soft nproc 32768
@hadoop soft nproc 32768
Step 5
[As root - Distribute sysctl configuration file and Apply ]
# for i in $(cat /tmp/all_hosts) ; do scp /tmp/98-sysctl-hadoop.conf ${i}:/etc/sysctl.d/ ; done
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} mv /etc/sysctl.d/99-g03r02c00.conf /etc/sysctl.d/95-g03r02c00.conf ; done
[As root - Apply Settings ]
# export WCOLL=/tmp/all_hosts
# pdsh -R exec /usr/sbin/sysctl --system
Step 6
[As root - Distribute limits file and Deploy]
# for i in $(cat /tmp/all_hosts) ; do scp /tmp/secur_conf.conf ${i}:/tmp/ ; done
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} 'cat /tmp/secur_conf.conf >> /etc/security/limits.conf' ; done
Step 7
[As root - Disable Transparent Huge Pages Compaction]
# Create File
cat /tmp/thp.disable
echo 'never'; defrag_file_pathname
# for i in $(cat /tmp/all_hosts) ; do scp /tmp/thp.disable ${i}:/tmp/ ; done
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} 'cat /tmp/thp.disable >> /etc/rc.local' ; done
Step 8
[As root - Disable SE Linux ]
# export WCOLL=/tmp/all_hosts
# pdsh -R exec setenforce 0
Step 9
[As root - Reboot all machines (and wait)]
#for i in $(cat /tmp/all_hosts) ; do ssh ${i} reboot ; done
Step 10
[As root - Hosts File Updation]
#for i in $(cat /tmp/all_hosts) ; do scp /etc/hosts ${i}:/etc/hosts; done
Step 11
[As root - Group Creation]
## for i in $(cat /tmp/all_hosts) ; do echo "ssh ${i} groupadd -g 1000 hadoop" >> /tmp/useradd; done ; bash /tmp/useradd ; rm -f /tmp/useradd
[As root - User Creation]
#for i in $(cat /tmp/all_hosts) ; do echo "ssh ${i} useradd -u 1003 -g hadoop mapred" >> /tmp/useradd; done ; bash /tmp/useradd ; rm -f /tmp/useradd
#for i in $(cat /tmp/all_hosts) ; do echo "ssh ${i} useradd -u 1004 -g hadoop yarn" >> /tmp/useradd; done ; bash /tmp/useradd ; rm -f /tmp/useradd
## for i in $(cat /tmp/all_hosts) ; do echo "ssh ${i} useradd -u 1005 -g hadoop hdfs" >> /tmp/useradd; done ; bash /tmp/useradd ; rm -f /tmp/useradd
Step 12
[As root - hdfs, mapred an yarn user password change ]
Change hduser to mapred and then yarn for executions.
Create Script as below and run it
#!/bin/bash
for server in `cat /tmp/all_hosts`; do
echo $server;
ssh ${server} 'passwd hduser <<EOF
hadoop
hadoop
EOF';
done
Step 13
[As hdfs, mapred, yarn - Setup ssh equivalency on namenode, rmanager, snamenode]
File Name - /tmp/keysetup.exp
#!/usr/bin/env expect
set host [lrange $argv 0 0]
set username [lrange $argv 1 1]
set password [lrange $argv 2 2]
set lusername [lrange $argv 3 3]
set timeout 10
spawn /usr/bin/ssh-keygen -R $host
spawn /usr/bin/ssh-copy-id -i /home/$lusername/.ssh/id_rsa.pub $username@$host
match_max 100000
expect "*?(yes/no)\?"
send -- "yes\r"
# Look for password prompt
expect {
"*?assword:*" { send -- "$password\r"; send -- "\r"; exp_continue }
eof { exit 1 }
"Now try*\r" { exit 0 }
timeout { exit 1 }
}
exit 0
Provide Execute Permissions
chmod 777 /tmp/keysetup.exp
[As hdfs]
# ssh-keygen
#for i in $(cat /tmp/all_hosts) ; do /tmp/keysetup.exp ${i} hdfs remote_password hdfs ; done
[As mapred]
# ssh-keygen
# for i in $(cat /tmp/all_hosts) ; do /tmp/keysetup.exp ${i} mapred remote_password mapred ; done
[As yarn]
# ssh-keygen
#for i in $(cat /tmp/all_hosts) ; do /tmp/keysetup.exp ${i} yarn remote_password yarn ; done
Step 14
[As root - Java Installation]
#for i in $(cat /tmp/all_hosts) ; do echo "scp jdk-8u152-linux-x64.rpm ${i}:/tmp &" >> /tmp/sendjdk.bash ; done
Paste and run contents of the file
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} rpm -Uvh /tmp/jdk-8u152-linux-x64.rpm ; done;
Step 15
[As root - Set Env Variables]
Create file profile.sh
cat /tmp/profile.sh
export JAVA_HOME=/usr/java/latest
export HADOOP_PREFIX=/usr/local/hadoop
export LOG=/opt/HDPV2/logs
export CONF=/etc/hadoop/conf
export PATH=$JAVA_HOME/bin:$HADOOP_PREFIX/bin:$HADOOP_PREFIX/sbin:$PATH
#for i in $(cat /tmp/all_hosts) ; do scp /tmp/profile.sh ${i}:/etc/profile.d/ ; done
#for i in $(cat /tmp/all_hosts) ; do ssh ${i} chmod 755 /etc/profile.d/profile.sh ; done
#for i in $(cat /tmp/all_hosts) ; do ssh ${i} source /etc/profile.d/profile.sh ; done
Step 16
[As root - Permissions Set]
My Mount is going to /opt/HDPV2 for my hadoop based data.
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} chown root:hadoop /opt /opt/HDPV2 ; done
Our Controller Node is namenode or nn or namenode.cluster.com
All the below steps are on namenode
My Cluster consists of 7 nodes -
Below is my /etc/hosts which I created on namenode.
All other nodes are just installed with same password on all the nodes. The roles of the nodes are pretty much self intuitive as per the names
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
#::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.10.51 namenode.cluster.com namenode nn
192.168.10.52 rmanager.cluster.com rmanager rm
192.168.10.53 snamenode.cluster.com snamenode snn
192.168.10.54 d1node.cluster.com d1node d1n
192.168.10.55 d2node.cluster.com d2node d2n
192.168.10.58 d3node.cluster.com d3node d3n
192.168.10.57 d4node.cluster.com d4node d4n
Version on which I am installing
[Linux 7 Red Hat Enterprise Linux Server release 7.3 (Maipo) 3.10.0-514.el7.x86_64]
Step 1
[As root]
Install pdsh and expect
wget http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/p/pdsh-2.31-1.el7.x86_64.rpm
wget http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/p/pdsh-rcmd-rsh-2.31-1.el7.x86_64.rpm
yum install pdsh-2.31-1.el7.x86_64.rpm pdsh-rcmd-rsh-2.31-1.el7.x86_64.rpm -y
yum install expect -y
Step 2 - Create ssh key
[As root]
[root@namenode ~]# ssh-keygen
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
88:1c:af:e6:23:72:de:b2:76:8d:c6:86:d5:5a:96:1c root@namenode.cluster.com
The key's randomart image is:
+--[ RSA 2048]----+
| |
| |
| . |
| . +E. |
| ooooS |
| ..* |
| +o* |
|. *+O . |
| =oBo. |
+-----------------+
Step 2.1
[As root - Automation File Creation]
Create Below File (This is to setup passwordless ssh and file-copy)
(Source - http://www.techpaste.com/2013/04/shell-script-automate-ssh-key-transfer-hosts-linux/)
File Name - /tmp/keysetup.exp
#!/usr/bin/env expect
set host [lrange $argv 0 0]
set username [lrange $argv 1 1]
set password [lrange $argv 2 2]
set lusername [lrange $argv 3 3]
set timeout 10
spawn /usr/bin/ssh-keygen -R $host
spawn /usr/bin/ssh-copy-id -i /$lusername/.ssh/id_rsa.pub $username@$host
match_max 100000
expect "*?(yes/no)\?"
send -- "yes\r"
# Look for password prompt
expect {
"*?assword:*" { send -- "$password\r"; send -- "\r"; exp_continue }
eof { exit 1 }
"Now try*\r" { exit 0 }
timeout { exit 1 }
}
exit 0
Provide Execute Permissions
chmod u+x /tmp/keysetup.exp
Create all_hosts file with a hostname present in newline for each host
[root@namenode tmp]# cat /tmp/all_hosts
nn
snn
rm
d1n
d2n
d3n
d4n
chmod 777 /tmp/all_hosts
Step 3
[As root - Passworless Ssh Setup]
Run below to setup passwordless as user root.
# for i in $(cat /tmp/all_hosts) ; do /tmp/keysetup.exp ${i} root remote_password root ; done
Now namenode can login to all hosts without password.
Step 4
[As root - sysctl file creation and secure liimits file creation]
Create Sysctl file for hadoop
[root@namenode tmp]# cat /tmp/98-sysctl-hadoop.conf
fs.file-max=6815744
fs.aio-max-nr=1048576
net.core.rmem_default=262144
net.core.wmem_default=262144
net.core.rmem_max=16777216
net.core.wmem_max=16777216
net.ipv4.tcp_rmem=4096 262144 16777216
net.ipv4.tcp_wmem=4096 262144 16777216
vm.swappiness=10
Create secur_conf.conf file for security limits
[root@namenode tmp]# cat secur_conf.conf
@hadoop soft nofile 32768
@hadoop hard nofile 32768
@hadoop soft nproc 32768
@hadoop soft nproc 32768
Step 5
[As root - Distribute sysctl configuration file and Apply ]
# for i in $(cat /tmp/all_hosts) ; do scp /tmp/98-sysctl-hadoop.conf ${i}:/etc/sysctl.d/ ; done
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} mv /etc/sysctl.d/99-g03r02c00.conf /etc/sysctl.d/95-g03r02c00.conf ; done
[As root - Apply Settings ]
# export WCOLL=/tmp/all_hosts
# pdsh -R exec /usr/sbin/sysctl --system
Step 6
[As root - Distribute limits file and Deploy]
# for i in $(cat /tmp/all_hosts) ; do scp /tmp/secur_conf.conf ${i}:/tmp/ ; done
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} 'cat /tmp/secur_conf.conf >> /etc/security/limits.conf' ; done
Step 7
[As root - Disable Transparent Huge Pages Compaction]
# Create File
cat /tmp/thp.disable
echo 'never'; defrag_file_pathname
# for i in $(cat /tmp/all_hosts) ; do scp /tmp/thp.disable ${i}:/tmp/ ; done
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} 'cat /tmp/thp.disable >> /etc/rc.local' ; done
Step 8
[As root - Disable SE Linux ]
# export WCOLL=/tmp/all_hosts
# pdsh -R exec setenforce 0
Step 9
[As root - Reboot all machines (and wait)]
#for i in $(cat /tmp/all_hosts) ; do ssh ${i} reboot ; done
Step 10
[As root - Hosts File Updation]
#for i in $(cat /tmp/all_hosts) ; do scp /etc/hosts ${i}:/etc/hosts; done
Step 11
[As root - Group Creation]
## for i in $(cat /tmp/all_hosts) ; do echo "ssh ${i} groupadd -g 1000 hadoop" >> /tmp/useradd; done ; bash /tmp/useradd ; rm -f /tmp/useradd
[As root - User Creation]
#for i in $(cat /tmp/all_hosts) ; do echo "ssh ${i} useradd -u 1003 -g hadoop mapred" >> /tmp/useradd; done ; bash /tmp/useradd ; rm -f /tmp/useradd
#for i in $(cat /tmp/all_hosts) ; do echo "ssh ${i} useradd -u 1004 -g hadoop yarn" >> /tmp/useradd; done ; bash /tmp/useradd ; rm -f /tmp/useradd
## for i in $(cat /tmp/all_hosts) ; do echo "ssh ${i} useradd -u 1005 -g hadoop hdfs" >> /tmp/useradd; done ; bash /tmp/useradd ; rm -f /tmp/useradd
Step 12
[As root - hdfs, mapred an yarn user password change ]
Change hduser to mapred and then yarn for executions.
Create Script as below and run it
#!/bin/bash
for server in `cat /tmp/all_hosts`; do
echo $server;
ssh ${server} 'passwd hduser <<EOF
hadoop
hadoop
EOF';
done
Step 13
[As hdfs, mapred, yarn - Setup ssh equivalency on namenode, rmanager, snamenode]
File Name - /tmp/keysetup.exp
#!/usr/bin/env expect
set host [lrange $argv 0 0]
set username [lrange $argv 1 1]
set password [lrange $argv 2 2]
set lusername [lrange $argv 3 3]
set timeout 10
spawn /usr/bin/ssh-keygen -R $host
spawn /usr/bin/ssh-copy-id -i /home/$lusername/.ssh/id_rsa.pub $username@$host
match_max 100000
expect "*?(yes/no)\?"
send -- "yes\r"
# Look for password prompt
expect {
"*?assword:*" { send -- "$password\r"; send -- "\r"; exp_continue }
eof { exit 1 }
"Now try*\r" { exit 0 }
timeout { exit 1 }
}
exit 0
Provide Execute Permissions
chmod 777 /tmp/keysetup.exp
[As hdfs]
# ssh-keygen
#for i in $(cat /tmp/all_hosts) ; do /tmp/keysetup.exp ${i} hdfs remote_password hdfs ; done
[As mapred]
# ssh-keygen
# for i in $(cat /tmp/all_hosts) ; do /tmp/keysetup.exp ${i} mapred remote_password mapred ; done
[As yarn]
# ssh-keygen
#for i in $(cat /tmp/all_hosts) ; do /tmp/keysetup.exp ${i} yarn remote_password yarn ; done
Step 14
[As root - Java Installation]
#for i in $(cat /tmp/all_hosts) ; do echo "scp jdk-8u152-linux-x64.rpm ${i}:/tmp &" >> /tmp/sendjdk.bash ; done
Paste and run contents of the file
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} rpm -Uvh /tmp/jdk-8u152-linux-x64.rpm ; done;
Step 15
[As root - Set Env Variables]
Create file profile.sh
cat /tmp/profile.sh
export JAVA_HOME=/usr/java/latest
export HADOOP_PREFIX=/usr/local/hadoop
export LOG=/opt/HDPV2/logs
export CONF=/etc/hadoop/conf
export PATH=$JAVA_HOME/bin:$HADOOP_PREFIX/bin:$HADOOP_PREFIX/sbin:$PATH
#for i in $(cat /tmp/all_hosts) ; do scp /tmp/profile.sh ${i}:/etc/profile.d/ ; done
#for i in $(cat /tmp/all_hosts) ; do ssh ${i} chmod 755 /etc/profile.d/profile.sh ; done
#for i in $(cat /tmp/all_hosts) ; do ssh ${i} source /etc/profile.d/profile.sh ; done
Step 16
[As root - Permissions Set]
My Mount is going to /opt/HDPV2 for my hadoop based data.
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} chown root:hadoop /opt /opt/HDPV2 ; done
No comments:
Write comments