In this blog I will discuss spark installation on Hadoop (HDFS Cluster)
This is in continuation with my previous blog of Hadoop 2 Configuration
Step 1
[As root - Add spark user]
#for i in $(cat /tmp/all_hosts) ; do echo "ssh ${i} useradd -u 1006 -g hadoop spark" >> /tmp/useradd; done ; bash /tmp/useradd ; rm -f /tmp/useradd
[As root - spark user password change ]
Create Script as below and run it
#!/bin/bash
for server in `cat /tmp/all_hosts`; do
echo $server;
ssh ${server} 'passwd spark <<EOF
hadoop
hadoop
EOF';
done
Step 2
[As spark - Setup ssh equivalency on namenode, rmanager, snamenode]
File Name - /tmp/keysetup.exp
#!/usr/bin/env expect
set host [lrange $argv 0 0]
set username [lrange $argv 1 1]
set password [lrange $argv 2 2]
set lusername [lrange $argv 3 3]
set timeout 10
spawn /usr/bin/ssh-keygen -R $host
spawn /usr/bin/ssh-copy-id -i /home/$lusername/.ssh/id_rsa.pub $username@$host
match_max 100000
expect "*?(yes/no)\?"
send -- "yes\r"
# Look for password prompt
expect {
"*?assword:*" { send -- "$password\r"; send -- "\r"; exp_continue }
eof { exit 1 }
"Now try*\r" { exit 0 }
timeout { exit 1 }
}
exit 0
Provide Execute Permissions
chmod 777 /tmp/keysetup.exp
[As spark]
# ssh-keygen
#for i in $(cat /tmp/all_hosts) ; do /tmp/keysetup.exp ${i} spark hadoop spark ; done
Step 3
[As root - Add Spark Environment]
for i in $(cat /tmp/all_hosts) ; do ssh ${i} echo 'export SPARK_HOME=/usr/local/spark >> /etc/profile.d/profile.sh' ; done
for i in $(cat /tmp/all_hosts) ; do ssh ${i} echo 'PATH=\$SPARK_HOME/bin:\$PATH >> /etc/profile.d/profile.sh' ; done
for i in $(cat /tmp/all_hosts) ; do ssh ${i} echo 'PATH=\$SPARK_HOME/sbin:\$PATH >> /etc/profile.d/profile.sh' ; done
Step 4
[As root - Download Spark]
wget http://www-eu.apache.org/dist/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz
Step 5
[As root - Namenode - Send Spark Binaries]
# for i in $(cat /tmp/all_hosts) ; do echo "scp /tmp/spark-2.2.0-bin-hadoop2.7.tgz ${i}:/tmp &" >> /tmp/sendhdpv1.bash ; done
bash /tmp/sendhdpv1.bash
Step 5.1
[As root - Extract Spark]
#for i in $(cat /tmp/all_hosts) ;do ssh ${i} tar -xzf /tmp/spark-2.2.0-bin-hadoop2.7.tgz -C /usr/local --overwrite ; done
Step 6
[As root - Create Soft Link to Spark]
# for i in $(cat /tmp/all_hosts) ;do ssh ${i} ln -s /usr/local/spark-2.2.0-bin-hadoop2.7 /usr/local/spark ; done
Step 7
[As root - Spark Configuration Files]
# for i in $(cat /tmp/all_hosts) ;do ssh ${i} mkdir /etc/spark ; done
# for i in $(cat /tmp/all_hosts) ;do ssh ${i} mv /usr/local/spark-2.2.0-bin-hadoop2.7/conf /etc/spark/conf; done
[As root - Give Permissions]
# for i in $(cat /tmp/all_hosts) ;do ssh ${i} chmod -R 755 /etc/spark ; done;
[As root - Create soft link]
for i in $(cat /tmp/all_hosts) ;do ssh ${i} ln -s /etc/spark/conf /usr/local/spark-2.2.0-bin-hadoop2.7/conf ; done
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} ' mkdir -p /opt/HDPV2/spark_work ' ; done;
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} 'chmod 775 /opt/HDPV2/spark_work; echo' ; done;
Step 8
[As root - Spark Configuration]
cd /etc/spark/conf
cp spark-env.sh.template spark-env.sh
Configure as below
export JAVA_HOME=/usr/java/latest
export HADOOP_CONF_DIR=/etc/hadoo/conf
export SPARK_EXECUTORS_CORES=8
export SPARK_LOG_DIR=/opt/HDPV2/logs
export SPARK_PID_DIR=/opt/HDPV2/pids
export SPARK_WORKER_DIR=/opt/HDPV2/spark_work
# cp /etc/hadoop/conf/slaves .
[As root - configuration]
# for i in $(cat /tmp/all_hosts) ;do scp spark-env.sh slaves ${i}:/etc/spark/conf/ ; done
[As root - Give Permissions]
# for i in $(cat /tmp/all_hosts) ;do ssh ${i} chmod -R 755 /etc/spark/conf ; done;
Step 9
[As spark - on rm]
Start-Spark
which start-all.sh
/usr/local/spark/sbin/start-all.sh
Step 10
[As spark - on RM]
cd $LOG
less spark-spark-org.apache.spark.deploy.master.Master-1-rmanager.cluster.com.out | grep "Successfully started "
18/04/01 15:36:58 INFO Utils: Successfully started service 'sparkMaster' on port 7077.
18/04/01 15:36:59 INFO Utils: Successfully started service 'MasterUI' on port 8080.
18/04/01 15:37:00 INFO Utils: Successfully started service on port 6066.
Step 11
[As spark - on rm Verify Processes running ]
for i in $(cat /tmp/all_hosts) ; do ssh ${i} 'hostname; jps | grep -vi jps; echo' ; done;
This is in continuation with my previous blog of Hadoop 2 Configuration
Step 1
[As root - Add spark user]
#for i in $(cat /tmp/all_hosts) ; do echo "ssh ${i} useradd -u 1006 -g hadoop spark" >> /tmp/useradd; done ; bash /tmp/useradd ; rm -f /tmp/useradd
[As root - spark user password change ]
Create Script as below and run it
#!/bin/bash
for server in `cat /tmp/all_hosts`; do
echo $server;
ssh ${server} 'passwd spark <<EOF
hadoop
hadoop
EOF';
done
Step 2
[As spark - Setup ssh equivalency on namenode, rmanager, snamenode]
File Name - /tmp/keysetup.exp
#!/usr/bin/env expect
set host [lrange $argv 0 0]
set username [lrange $argv 1 1]
set password [lrange $argv 2 2]
set lusername [lrange $argv 3 3]
set timeout 10
spawn /usr/bin/ssh-keygen -R $host
spawn /usr/bin/ssh-copy-id -i /home/$lusername/.ssh/id_rsa.pub $username@$host
match_max 100000
expect "*?(yes/no)\?"
send -- "yes\r"
# Look for password prompt
expect {
"*?assword:*" { send -- "$password\r"; send -- "\r"; exp_continue }
eof { exit 1 }
"Now try*\r" { exit 0 }
timeout { exit 1 }
}
exit 0
Provide Execute Permissions
chmod 777 /tmp/keysetup.exp
[As spark]
# ssh-keygen
#for i in $(cat /tmp/all_hosts) ; do /tmp/keysetup.exp ${i} spark hadoop spark ; done
Step 3
[As root - Add Spark Environment]
for i in $(cat /tmp/all_hosts) ; do ssh ${i} echo 'export SPARK_HOME=/usr/local/spark >> /etc/profile.d/profile.sh' ; done
for i in $(cat /tmp/all_hosts) ; do ssh ${i} echo 'PATH=\$SPARK_HOME/bin:\$PATH >> /etc/profile.d/profile.sh' ; done
for i in $(cat /tmp/all_hosts) ; do ssh ${i} echo 'PATH=\$SPARK_HOME/sbin:\$PATH >> /etc/profile.d/profile.sh' ; done
Step 4
[As root - Download Spark]
wget http://www-eu.apache.org/dist/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz
Step 5
[As root - Namenode - Send Spark Binaries]
# for i in $(cat /tmp/all_hosts) ; do echo "scp /tmp/spark-2.2.0-bin-hadoop2.7.tgz ${i}:/tmp &" >> /tmp/sendhdpv1.bash ; done
bash /tmp/sendhdpv1.bash
Step 5.1
[As root - Extract Spark]
#for i in $(cat /tmp/all_hosts) ;do ssh ${i} tar -xzf /tmp/spark-2.2.0-bin-hadoop2.7.tgz -C /usr/local --overwrite ; done
Step 6
[As root - Create Soft Link to Spark]
# for i in $(cat /tmp/all_hosts) ;do ssh ${i} ln -s /usr/local/spark-2.2.0-bin-hadoop2.7 /usr/local/spark ; done
Step 7
[As root - Spark Configuration Files]
# for i in $(cat /tmp/all_hosts) ;do ssh ${i} mkdir /etc/spark ; done
# for i in $(cat /tmp/all_hosts) ;do ssh ${i} mv /usr/local/spark-2.2.0-bin-hadoop2.7/conf /etc/spark/conf; done
[As root - Give Permissions]
# for i in $(cat /tmp/all_hosts) ;do ssh ${i} chmod -R 755 /etc/spark ; done;
[As root - Create soft link]
for i in $(cat /tmp/all_hosts) ;do ssh ${i} ln -s /etc/spark/conf /usr/local/spark-2.2.0-bin-hadoop2.7/conf ; done
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} ' mkdir -p /opt/HDPV2/spark_work ' ; done;
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} 'chmod 775 /opt/HDPV2/spark_work; echo' ; done;
Step 8
[As root - Spark Configuration]
cd /etc/spark/conf
cp spark-env.sh.template spark-env.sh
Configure as below
export JAVA_HOME=/usr/java/latest
export HADOOP_CONF_DIR=/etc/hadoo/conf
export SPARK_EXECUTORS_CORES=8
export SPARK_LOG_DIR=/opt/HDPV2/logs
export SPARK_PID_DIR=/opt/HDPV2/pids
export SPARK_WORKER_DIR=/opt/HDPV2/spark_work
# cp /etc/hadoop/conf/slaves .
[As root - configuration]
# for i in $(cat /tmp/all_hosts) ;do scp spark-env.sh slaves ${i}:/etc/spark/conf/ ; done
[As root - Give Permissions]
# for i in $(cat /tmp/all_hosts) ;do ssh ${i} chmod -R 755 /etc/spark/conf ; done;
Step 9
[As spark - on rm]
Start-Spark
which start-all.sh
/usr/local/spark/sbin/start-all.sh
Step 10
[As spark - on RM]
cd $LOG
less spark-spark-org.apache.spark.deploy.master.Master-1-rmanager.cluster.com.out | grep "Successfully started "
18/04/01 15:36:58 INFO Utils: Successfully started service 'sparkMaster' on port 7077.
18/04/01 15:36:59 INFO Utils: Successfully started service 'MasterUI' on port 8080.
18/04/01 15:37:00 INFO Utils: Successfully started service on port 6066.
Step 11
[As spark - on rm Verify Processes running ]
for i in $(cat /tmp/all_hosts) ; do ssh ${i} 'hostname; jps | grep -vi jps; echo' ; done;
Very nice article,Thank you for sharing it.
ReplyDeleteKeep updating...
Big Data Hadoop Training