Advertisement

Sunday, April 1, 2018

Hadoop V2 - Spark Configuration

In this blog I will discuss spark installation on Hadoop (HDFS Cluster)
This is in continuation with my previous blog of Hadoop 2 Configuration


Step 1

[As root - Add spark user]

#for i in $(cat /tmp/all_hosts) ; do echo "ssh ${i} useradd -u 1006  -g hadoop spark" >> /tmp/useradd; done ; bash /tmp/useradd ; rm -f /tmp/useradd

[As root - spark user password change ]

Create Script as below and run it
#!/bin/bash
for server in `cat /tmp/all_hosts`; do
echo $server;
ssh ${server} 'passwd spark <<EOF
hadoop
hadoop
EOF';
done

Step 2
[As spark - Setup ssh equivalency on namenode, rmanager, snamenode]

File Name - /tmp/keysetup.exp
#!/usr/bin/env expect
set host [lrange $argv 0 0]

set username [lrange $argv 1 1]
set password [lrange $argv 2 2]

set lusername [lrange $argv 3 3]
set timeout 10
spawn /usr/bin/ssh-keygen -R $host
spawn /usr/bin/ssh-copy-id -i /home/$lusername/.ssh/id_rsa.pub $username@$host
match_max 100000
expect "*?(yes/no)\?"
send -- "yes\r"
# Look for password prompt
expect {
"*?assword:*" { send -- "$password\r"; send -- "\r"; exp_continue }
eof { exit 1 }
"Now try*\r" { exit 0 }
timeout { exit 1 }
}
exit 0

Provide Execute Permissions
chmod 777 /tmp/keysetup.exp

[As spark]
# ssh-keygen
#for i in $(cat /tmp/all_hosts) ; do /tmp/keysetup.exp ${i} spark hadoop spark ; done


Step 3
[As root - Add Spark Environment]
for i in $(cat /tmp/all_hosts) ; do ssh ${i} echo 'export SPARK_HOME=/usr/local/spark >> /etc/profile.d/profile.sh' ; done
for i in $(cat /tmp/all_hosts) ; do ssh ${i} echo 'PATH=\$SPARK_HOME/bin:\$PATH >> /etc/profile.d/profile.sh' ; done
for i in $(cat /tmp/all_hosts) ; do ssh ${i} echo 'PATH=\$SPARK_HOME/sbin:\$PATH >> /etc/profile.d/profile.sh' ; done

Step 4
[As root - Download Spark]
wget http://www-eu.apache.org/dist/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz

Step 5
[As root -  Namenode - Send Spark Binaries]
# for i in $(cat /tmp/all_hosts) ; do echo "scp /tmp/spark-2.2.0-bin-hadoop2.7.tgz ${i}:/tmp &" >> /tmp/sendhdpv1.bash ; done
bash /tmp/sendhdpv1.bash

Step 5.1
[As root - Extract Spark]
#for i in $(cat /tmp/all_hosts) ;do ssh ${i} tar -xzf /tmp/spark-2.2.0-bin-hadoop2.7.tgz -C /usr/local        --overwrite ; done

Step 6
[As root - Create Soft Link to Spark]

# for i in $(cat /tmp/all_hosts) ;do ssh ${i} ln -s /usr/local/spark-2.2.0-bin-hadoop2.7 /usr/local/spark  ; done

Step 7
[As root - Spark Configuration Files]
# for i in $(cat /tmp/all_hosts) ;do ssh ${i} mkdir /etc/spark  ; done
# for i in $(cat /tmp/all_hosts) ;do ssh ${i} mv /usr/local/spark-2.2.0-bin-hadoop2.7/conf /etc/spark/conf; done

[As root - Give Permissions]
# for i in $(cat /tmp/all_hosts) ;do ssh ${i} chmod -R 755 /etc/spark ; done;

[As root - Create soft link]
for i in $(cat /tmp/all_hosts) ;do ssh ${i} ln -s /etc/spark/conf /usr/local/spark-2.2.0-bin-hadoop2.7/conf ; done


# for i in $(cat /tmp/all_hosts) ; do ssh ${i} ' mkdir -p /opt/HDPV2/spark_work ' ;  done;

# for i in $(cat /tmp/all_hosts) ; do ssh ${i} 'chmod 775  /opt/HDPV2/spark_work; echo' ;  done;

Step 8
[As root - Spark Configuration]

cd /etc/spark/conf
cp spark-env.sh.template spark-env.sh

Configure as below

export JAVA_HOME=/usr/java/latest
export HADOOP_CONF_DIR=/etc/hadoo/conf
export SPARK_EXECUTORS_CORES=8
export SPARK_LOG_DIR=/opt/HDPV2/logs
export SPARK_PID_DIR=/opt/HDPV2/pids
export SPARK_WORKER_DIR=/opt/HDPV2/spark_work


# cp /etc/hadoop/conf/slaves .

[As root - configuration]
# for i in $(cat /tmp/all_hosts) ;do scp  spark-env.sh slaves ${i}:/etc/spark/conf/ ; done

[As root - Give Permissions]
# for i in $(cat /tmp/all_hosts) ;do ssh ${i} chmod -R 755 /etc/spark/conf ; done;

Step 9
[As spark - on rm]
Start-Spark

 which start-all.sh
/usr/local/spark/sbin/start-all.sh


Step 10
[As spark - on RM]
cd $LOG
less spark-spark-org.apache.spark.deploy.master.Master-1-rmanager.cluster.com.out | grep "Successfully started "
18/04/01 15:36:58 INFO Utils: Successfully started service 'sparkMaster' on port 7077.
18/04/01 15:36:59 INFO Utils: Successfully started service 'MasterUI' on port 8080.
18/04/01 15:37:00 INFO Utils: Successfully started service on port 6066.

Step 11
[As spark - on rm Verify Processes running ]
for i in $(cat /tmp/all_hosts) ; do ssh ${i} 'hostname; jps | grep -vi jps; echo' ;  done;

1 comment:
Write comments