In this blog I discuss on how to configure Fair Scheduler.
Fair Scheduler is also one of the scheduler used in production environments.
In my words it is more fairer than
Step 1. Enable Fair Scheduler
Configure (Append/modify) yarn-site.xml as in Appendix
Step 2. Configure fair-scheduler.xml file as in Appendix
Hadoop will automatically reload fair scheduler configuration every 10 seconds
Step 3. Restart yarn daemon
yarn-daemon.sh stop resourcemanager
yarn-daemon.sh start resourcemanager
Step 4. Verify on rm WebUI - configured queues.
rm:8088
or Use cmd to verify
[yarn@rm ~]$ hadoop queue -list
DEPRECATED: Use of this script to execute mapred command is deprecated.
Instead use the mapred command for it.
18/04/25 02:35:15 INFO client.RMProxy: Connecting to ResourceManager at rm/192.168.2.102:8032
======================
Queue Name : root.data_science
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.data_science.best_effort
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.data_science.priority
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.default
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.marketing
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.marketing.reports
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.marketing.website
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.sales
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.sales.asia
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.sales.europe
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.sales.northamerica
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.sqoop
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.sqoop.sql
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
Appendix
yarn-site.xml
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<name>yarn.scheduler.fair.allocation.file</name>
<value>/etc/hadoop/conf/fair-scheduler.xml</value>
</property>
<property>
<name>yarn.scheduler.fair.preemption</name>
<value>true</value>
</property>
<property>
<name>yarn.scheduler.fair.preemption</name>
<value>true</value>
</property>
fair-scheduler.xml
<?xml version="1.0"?>
<allocations>
<queueMaxAMShareDefault>0.5</queueMaxAMShareDefault>
<queue name="root">
<queue name="sales">
<queue name="northamerica" />
<queue name="europe" />
<queue name="asia" />
</queue>
<queue name="marketing">
<queue name="reports" />
<queue name="website" />
</queue>
<queue name="data_science">
<queue name="priority">
<weight>100.0</weight>
</queue>
<queue name="best_effort">
<weight>0.0</weight>
</queue>
</queue>
<queue name="sqoop">
<minResources>10000 mb,0vcores</minResources>
<maxResources>90000 mb,0vcores</maxResources>
<maxRunningApps>50</maxRunningApps>
<maxAMShare>0.1</maxAMShare>
<weight>2.0</weight>
<schedulingPolicy>fair</schedulingPolicy>
<queue name="sql">
<aclSubmitApps>charlie</aclSubmitApps>
<minResources>5000 mb,0vcores</minResources>
</queue>
</queue>
</queue>
<queuePlacementPolicy>
<rule name="specified" />
<rule name="primaryGroup" create="false" />
<rule name="nestedUserQueue">
<rule name="secondaryGroupExistingQueue" create="false" />
</rule>
<rule name="default" queue="default_queue"/>
</queuePlacementPolicy>
</allocations>
Fair Scheduler is also one of the scheduler used in production environments.
In my words it is more fairer than
Step 1. Enable Fair Scheduler
Configure (Append/modify) yarn-site.xml as in Appendix
Step 2. Configure fair-scheduler.xml file as in Appendix
Hadoop will automatically reload fair scheduler configuration every 10 seconds
Step 3. Restart yarn daemon
yarn-daemon.sh stop resourcemanager
yarn-daemon.sh start resourcemanager
Step 4. Verify on rm WebUI - configured queues.
rm:8088
or Use cmd to verify
[yarn@rm ~]$ hadoop queue -list
DEPRECATED: Use of this script to execute mapred command is deprecated.
Instead use the mapred command for it.
18/04/25 02:35:15 INFO client.RMProxy: Connecting to ResourceManager at rm/192.168.2.102:8032
======================
Queue Name : root.data_science
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.data_science.best_effort
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.data_science.priority
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.default
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.marketing
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.marketing.reports
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.marketing.website
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.sales
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.sales.asia
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.sales.europe
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.sales.northamerica
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.sqoop
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
======================
Queue Name : root.sqoop.sql
Queue State : running
Scheduling Info : Capacity: 0.0, MaximumCapacity: UNDEFINED, CurrentCapacity: 0.0
Appendix
yarn-site.xml
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<name>yarn.scheduler.fair.allocation.file</name>
<value>/etc/hadoop/conf/fair-scheduler.xml</value>
</property>
<property>
<name>yarn.scheduler.fair.preemption</name>
<value>true</value>
</property>
<property>
<name>yarn.scheduler.fair.preemption</name>
<value>true</value>
</property>
fair-scheduler.xml
<?xml version="1.0"?>
<allocations>
<queueMaxAMShareDefault>0.5</queueMaxAMShareDefault>
<queue name="root">
<queue name="sales">
<queue name="northamerica" />
<queue name="europe" />
<queue name="asia" />
</queue>
<queue name="marketing">
<queue name="reports" />
<queue name="website" />
</queue>
<queue name="data_science">
<queue name="priority">
<weight>100.0</weight>
</queue>
<queue name="best_effort">
<weight>0.0</weight>
</queue>
</queue>
<queue name="sqoop">
<minResources>10000 mb,0vcores</minResources>
<maxResources>90000 mb,0vcores</maxResources>
<maxRunningApps>50</maxRunningApps>
<maxAMShare>0.1</maxAMShare>
<weight>2.0</weight>
<schedulingPolicy>fair</schedulingPolicy>
<queue name="sql">
<aclSubmitApps>charlie</aclSubmitApps>
<minResources>5000 mb,0vcores</minResources>
</queue>
</queue>
</queue>
<queuePlacementPolicy>
<rule name="specified" />
<rule name="primaryGroup" create="false" />
<rule name="nestedUserQueue">
<rule name="secondaryGroupExistingQueue" create="false" />
</rule>
<rule name="default" queue="default_queue"/>
</queuePlacementPolicy>
</allocations>
No comments:
Write comments