此页面上的内容需要较新版本的 Adobe Flash Player。

获取 Adobe Flash Player

您现在的位置: 智可网 - 新技术 - Hadoop - 正文
Hadoop2.4.1部署+完整版配置文件(1)
教程录入:李隆权    责任编辑:quan 作者:佚名 文章来源:linuxidc

1.实验环境:
4节点集群,ZK节点3个,hosts文件和各节点角色分配如下:
hosts:
192.168.66.91 master
192.168.66.92 slave1
192.168.66.93 slave2
192.168.66.94 slave3


角色分配:
  Active NN Standby NN DN JournalNode Zookeeper FailoverController 
master V    V V V 
slave1  V V V V V 
slave2    V V V  
slave3    V 

--------------------------------------------------------------------------------

2.hadoop-env.sh  修改以下三处即可
# The Java implementation to use.
export Java_HOME=/usr/lib/jvm/jdk1.7.0_07


# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by the user that will run the hadoop daemons.  Otherwise there is the potential for a symlink attack.
export HADOOP_PID_DIR=/home/yarn/Hadoop/hadoop-2.4.1/hadoop_pid_dir
export HADOOP_SECURE_DN_PID_DIR=/home/yarn/Hadoop/hadoop-2.4.1/hadoop_pid_dir

--------------------------------------------------------------------------------

3.core-site.XML 完整文件

<?XML version="1.0" encoding="UTF-8"?>
<?XML-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Licensed under the apache License, Version 2.0 (the "License"); you 
    may not use this file except in compliance with the License. You may obtain 
    a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless 
    required by applicable law or agreed to in writing, software distributed 
    under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIE
    OR CONDITIONS OF ANY KIND, either express or implIEd. See the License for 
    the specific language governing permissions and limitations under the License. 
    See accompanying LICENSE file. -->
<!-- Put site-specific property overrides in this file. -->
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://myhadoop</value>
        <description>NameNode UR,格式是hdfs://host:port/,如果开启了NN
            HA特性,则配置集群的逻辑名,具体参见我的 http://www.linuxidc.com/Linux/2014-09/106292.htm
        </description>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/home/yarn/Hadoop/hadoop-2.4.1/tmp</value>
    </property>
    <property>
        <name>io.file.buffer.size</name>
        <value>131072</value>
        <description>Size of read/write buffer used in SequenceFiles.
        </description>
    </property>
    <property>
        <name>ha.zookeeper.quorum</name>
        <value>master:2181,slave1:2181,slave2:2181</value>
        <description>注意,配置了ZK以后,在格式化、启动NameNode之前必须先启动ZK,否则会报连接错误
        </description>
    </property>
</configuration> 


--------------------------------------------------------------------------------

4.hdfs-site.XML  完整文件

<?XML version="1.0" encoding="UTF-8"?>
<?XML-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Licensed under the apache License, Version 2.0 (the "License"); you 
    may not use this file except in compliance with the License. You may obtain 
    a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless 
    required by applicable law or agreed to in writing, software distributed 
    under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIE
    OR CONDITIONS OF ANY KIND, either express or implIEd. See the License for 
    the specific language governing permissions and limitations under the License. 
    See accompanying LICENSE file. -->
<!-- Put site-specific property overrides in this file. -->
<configuration>
    <!-- NN HA related configuration **BEGIN** -->
    <property>
        <name>dfs.nameservices</name>
        <value>myhadoop</value>
        <description>
            Comma-separated list of nameservices.
            as same as fs.defaultFS in core-site.XML.
        </description>
    </property>
    <property>
        <name>dfs.ha.namenodes.myhadoop</name>
        <value>nn1,nn2</value>
        <description>
            The prefix for a given nameservice, contains a comma-separated
            list of namenodes for a given nameservice (eg EXAMPLENAMESERVICE).
        </description>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.myhadoop.nn1</name>
        <value>master:8020</value>
        <description>
            RPC address for nomenode1 of hadoop-test
        </description>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.myhadoop.nn2</name>
        <value>slave1:8020</value>
        <description>
            RPC address for nomenode2 of hadoop-test
        </description>
    </property>
    <property>
        <name>dfs.namenode.http-address.myhadoop.nn1</name>
        <value>master:50070</value>
        <description>
            The address and the base port where the dfs namenode1 web ui will listen
            on.
        </description>
    </property>
    <property>
        <name>dfs.namenode.http-address.myhadoop.nn2</name>
        <value>slave1:50070</value>
        <description>
            The address and the base port where the dfs namenode2 web ui will listen
            on.
        </description>
    </property>
    <property>
        <name>dfs.namenode.servicerpc-address.myhadoop.n1</name>
        <value>master:53310</value>
    </property>
    <property>
        <name>dfs.namenode.servicerpc-address.myhadoop.n2</name>
        <value>slave1:53310</value>
    </property>
    <property>
        <name>dfs.ha.automatic-failover.enabled</name>
        <value>true</value>
        <description>
            Whether automatic failover is enabled. See the HDFS High
            Availability documentation for details on automatic HA
            configuration.
        </description>
    </property>
    <property>
        <name>dfs.clIEnt.failover.proxy.provider.myhadoop</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
        </value>
        <description>Configure the name of the Java class which will be used
            by the DFS ClIEnt to determine which NameNode is the current Active,
            and therefore which NameNode is currently serving clIEnt requests.
            这个类是Client的访问代理,是HA特性对于ClIEnt透明的关键!
        </description>
    </property>
    <property>
        <name>dfs.ha.fencing.methods</name>
        <value>sshfence</value>
        <description>how to communicate in the switch process</description>
    </property>
    <property>
        <name>dfs.ha.fencing.ssh.private-key-files</name>
        <value>/home/yarn/.ssh/id_rsa</value>
        <description>the location stored ssh key</description>
    </property>
    <property>
        <name>dfs.ha.fencing.ssh.connect-timeout</name>
        <value>1000</value>
    </property>
    <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/home/yarn/Hadoop/hadoop-2.4.1/hdfs_dir/journal/</value>
    </property>
    <property>
        <name>dfs.namenode.shared.edits.dir</name>
        <value>qjournal://master:8485;slave1:8485;slave2:8485/hadoop-journal
        </value>
        <description>A directory on shared storage between the multiple
            namenodes
            in an HA cluster. This directory will be written by the active and read
            by the standby in order to keep the namespaces synchronized. This
            directory
            does not need to be listed in dfs.namenode.edits.dir above. It should be
            left empty in a non-HA cluster.
        </description>
    </property>
    <!-- NN HA related configuration **END** -->
    <!-- NameNode related configuration **BEGIN** -->
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:///home/yarn/Hadoop/hadoop-2.4.1/hdfs_dir/name</value>
        <description>Path on the local filesystem where the NameNode stores
            the namespace and transactions logs persistently.If this is a
            comma-delimited list of directorIEs then the name table is replicated
            in all of the directorIEs, for redundancy.</description>
    </property>
    <property>
        <name>dfs.blocksize</name>
        <value>1048576</value>
        <description>
        HDFS blocksize of 128MB for large file-systems.
        Minimum block size is 1048576.
        </description>
    </property>
    <property>
        <name>dfs.namenode.handler.count</name>
        <value>10</value>
        <description>More NameNode server threads to handle RPCs from large
            number of DataNodes.</description>
    </property>
    <!-- <property> <name>dfs.namenode.hosts</name> <value>master</value> <description>If 
        necessary, use this to control the list of allowable datanodes.</description> 
        </property> <property> <name>dfs.namenode.hosts.exclude</name> <value>slave1,slave2,slave3</value> 
        <description>If necessary, use this to control the list of exclude datanodes.</description> 
        </property> -->
    <!-- NameNode related configuration **END** -->
    <!-- DataNode related configuration **BEGIN** -->
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:///home/yarn/Hadoop/hadoop-2.4.1/hdfs_dir/data</value>
        <description>Comma separated list of paths on the local filesystem of
            a DataNode where it should store its blocks.If this is a
            comma-delimited list of directorIEs, then data will be stored in all
            named directorIEs, typically on different devices.</description>
    </property>
    <!-- DataNode related configuration **END** -->
</configuration> 


--------------------------------------------------------------------------------
5.yarn-site.XML

<?XML version="1.0"?>
<!-- Licensed under the apache License, Version 2.0 (the "License"); you 
    may not use this file except in compliance with the License. You may obtain 
    a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless 
    required by applicable law or agreed to in writing, software distributed 
    under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIE
    OR CONDITIONS OF ANY KIND, either express or implIEd. See the License for 
    the specific language governing permissions and limitations under the License. 
    See accompanying LICENSE file. -->
<configuration>
    <!-- ResourceManager and NodeManager related configuration ***BEGIN*** -->
    <property>
        <name>yarn.acl.enable</name>
        <value>false</value>
        <description>Enable ACLs? Defaults to false.</description>
    </property>
    <property>
        <name>yarn.admin.acl</name>
        <value>*</value>
        <description>
        ACL to set admins on the cluster. ACLs are of for comma-separated-usersspace comma-separated-groups. 
        Defaults to special value of * which means anyone. Special value of just space means no one has Access.
        </description>
    </property>
    <property>
        <name>yarn.log-aggregation-enable</name>
        <value>false</value>
        <description>Configuration to enable or disable log aggregation</description>
    </property>
    <!-- ResourceManager and NodeManager related configuration ***END*** -->
    
    <!-- ResourceManager related configuration ***BEGIN*** -->
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>master</value>
        <description>The hostname of the RM.</description>
    </property>
    
    <property>
        <name>yarn.resourcemanager.webapp.https.address</name>
        <value>${yarn.resourcemanager.hostname}:8090</value>
        <description>The https adddress of the RM web application.</description>
    </property>
    
    <property>
        <name>yarn.resourcemanager.address</name>
        <value>${yarn.resourcemanager.hostname}:8032</value>
        <description>ResourceManager host:port for clIEnts to submit jobs.</description>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address</name>
        <value>${yarn.resourcemanager.hostname}:8030</value>
        <description>ResourceManager host:port for ApplicationMasters to talk to Scheduler to obtain resources.</description>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address</name>
        <value>${yarn.resourcemanager.hostname}:8031</value>
        <description>ResourceManager host:port for NodeManagers.</description>
    </property>
    <property>
        <name>yarn.resourcemanager.admin.address</name>
        <value>${yarn.resourcemanager.hostname}:8033</value>
        <description>ResourceManager host:port for administrative commands.</description>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address</name>
        <value>${yarn.resourcemanager.hostname}:8088</value>
        <description>ResourceManager web-ui host:port.</description>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.class</name>
        <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
        <description>
        ResourceManager Scheduler class.
        CapacityScheduler (recommended), FairScheduler (also recommended), or FifoScheduler
        </description>
    </property>
    <property>
        <name>yarn.scheduler.minimum-allocation-mb</name>
        <value>512</value>
        <description>
        Minimum limit of memory to allocate to each container request at the Resource Manager.    
        In MBs
        </description>
    </property>
    <property>
        <name>yarn.scheduler.maximum-allocation-mb</name>
        <value>2048</value>
        <description>
        Maximum limit of memory to allocate to each container request at the Resource Manager.    
        In MBs.
        According to my configuration,yarn.scheduler.maximum-allocation-mb > yarn.nodemanager.resource.memory-mb
        </description>
    </property>
    
    <!--
    <property>
        <name>yarn.resourcemanager.nodes.include-path</name>
        <value></value>
        <description>
        List of permitted NodeManagers.    
        If necessary, use this to control the list of allowable NodeManagers.
        </description>
    </property>
    <property>
        <name>yarn.resourcemanager.nodes.exclude-path</name>
        <value></value>
        <description>
        List of exclude NodeManagers.    
        If necessary, use this to control the list of exclude NodeManagers.
        </description>
    </property>
    -->
    <!-- ResourceManager related configuration ***END*** -->
    
    <!-- NodeManager related configuration ***BEGIN*** -->
    <property>
        <name>yarn.nodemanager.resource.memory-mb</name>
        <value>1024</value>
        <description>
        Resource i.e. available physical memory, in MB, for given NodeManager.    
        Defines total available resources on the NodeManager to be made available to running containers.
        </description>
    </property>
    <property>
        <name>yarn.nodemanager.vmem-pmem-ratio</name>
        <value>2.1</value>
        <description>
        Ratio between virtual memory to physical memory when setting memory limits for containers. 
        Container allocations are expressed in terms of physical memory, 
        and virtual memory usage is allowed to exceed this allocation by this ratio.
        </description>
    </property>
    <property>
        <name>yarn.nodemanager.local-dirs</name>
        <value>/home/yarn/Hadoop/hadoop-2.4.1/yarn_dir/local</value>
        <description>
        Comma-separated list of paths on the local filesystem where intermediate data is written.
        Multiple paths help spread disk i/o.
        </description>
    </property>    
    <property>
        <name>yarn.nodemanager.log-dirs</name>
        <value>/home/yarn/Hadoop/hadoop-2.4.1/yarn_dir/log</value>
        <description>
        Comma-separated list of paths on the local filesystem where logs are written.    
        Multiple paths help spread disk i/o.
        </description>
    </property>
    <property>
        <name>yarn.nodemanager.log.retain-seconds</name>
        <value>10800</value>
        <description>
        Default time (in seconds) to retain log files on the NodeManager.
        ***Only applicable if log-aggregation is disabled.
        </description>
    </property>
    <property>
        <name>yarn.nodemanager.remote-app-log-dir</name>
        <value>/yarn/log-aggregation</value>
        <description>
        HDFS directory where the application logs are moved on application completion. 
        Need to set appropriate permissions. 
        ***Only applicable if log-aggregation is enabled.
        </description>
    </property>
    <property>
        <name>yarn.nodemanager.remote-app-log-dir-suffix</name>
        <value>logs</value>
        <description>
        Suffix appended to the remote log dir. 
        Logs will be aggregated to ${yarn.nodemanager.remote-app-log-dir}/${user}/${thisParam}. 
        ***Only applicable if log-aggregation is enabled.
        </description>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
        <description>Shuffle service that needs to be set for Map Reduce applications.</description>
    </property>
    <property>
        <name>yarn.nodemanager.resource.cpu-vcores</name>
        <value>1</value>
        <description>Number of CPU cores that can be allocated for containers.</description>
    </property>
    <!-- NodeManager related configuration ***END*** -->
    
    <!-- History Server related configuration ***BEGIN*** -->
    <property>
        <name>yarn.log-aggregation.retain-seconds</name>
        <value>-1</value>
        <description>
        How long to keep aggregation logs before deleting them. 
        -1 disables. 
        Be careful, set this too small and you will spam the name node.
        </description>
    </property>
    <property>
        <name>yarn.log-aggregation.retain-check-interval-seconds</name>
        <value>-1</value>
        <description>
        Time between checks for aggregated log retention. 
        If set to 0 or a negative value then the value is computed as one-tenth of the aggregated log retention time. 
        Be careful, set this too small and you will spam the name node.
        </description>
    </property>
    <!-- History Server related configuration ***END*** -->
    
    <property>
        <name>yarn.scheduler.fair.allocation.file</name>
        <value>${yarn.home.dir}/etc/hadoop/fairscheduler.XML</value>
        <description>fairscheduler config file path</description>
        <!-- 官网文档居然找不到该属性!但该属性还是work的! -->
    </property>
</configuration> 


分享
打赏我
打开支付宝"扫一扫" 打开微信"扫一扫"
客户端
"扫一扫"下载智可网App
意见反馈
Hadoop2.4.1部署+完整版配置文件(1)
作者:佚名 来源:linuxidc

1.实验环境:
4节点集群,ZK节点3个,hosts文件和各节点角色分配如下:
hosts:
192.168.66.91 master
192.168.66.92 slave1
192.168.66.93 slave2
192.168.66.94 slave3


角色分配:
  Active NN Standby NN DN JournalNode Zookeeper FailoverController 
master V    V V V 
slave1  V V V V V 
slave2    V V V  
slave3    V 

--------------------------------------------------------------------------------

2.hadoop-env.sh  修改以下三处即可
# The Java implementation to use.
export Java_HOME=/usr/lib/jvm/jdk1.7.0_07


# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by the user that will run the hadoop daemons.  Otherwise there is the potential for a symlink attack.
export HADOOP_PID_DIR=/home/yarn/Hadoop/hadoop-2.4.1/hadoop_pid_dir
export HADOOP_SECURE_DN_PID_DIR=/home/yarn/Hadoop/hadoop-2.4.1/hadoop_pid_dir

--------------------------------------------------------------------------------

3.core-site.XML 完整文件

<?XML version="1.0" encoding="UTF-8"?>
<?XML-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Licensed under the apache License, Version 2.0 (the "License"); you 
    may not use this file except in compliance with the License. You may obtain 
    a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless 
    required by applicable law or agreed to in writing, software distributed 
    under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIE
    OR CONDITIONS OF ANY KIND, either express or implIEd. See the License for 
    the specific language governing permissions and limitations under the License. 
    See accompanying LICENSE file. -->
<!-- Put site-specific property overrides in this file. -->
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://myhadoop</value>
        <description>NameNode UR,格式是hdfs://host:port/,如果开启了NN
            HA特性,则配置集群的逻辑名,具体参见我的 http://www.linuxidc.com/Linux/2014-09/106292.htm
        </description>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/home/yarn/Hadoop/hadoop-2.4.1/tmp</value>
    </property>
    <property>
        <name>io.file.buffer.size</name>
        <value>131072</value>
        <description>Size of read/write buffer used in SequenceFiles.
        </description>
    </property>
    <property>
        <name>ha.zookeeper.quorum</name>
        <value>master:2181,slave1:2181,slave2:2181</value>
        <description>注意,配置了ZK以后,在格式化、启动NameNode之前必须先启动ZK,否则会报连接错误
        </description>
    </property>
</configuration> 


--------------------------------------------------------------------------------

4.hdfs-site.XML  完整文件

<?XML version="1.0" encoding="UTF-8"?>
<?XML-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Licensed under the apache License, Version 2.0 (the "License"); you 
    may not use this file except in compliance with the License. You may obtain 
    a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless 
    required by applicable law or agreed to in writing, software distributed 
    under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIE
    OR CONDITIONS OF ANY KIND, either express or implIEd. See the License for 
    the specific language governing permissions and limitations under the License. 
    See accompanying LICENSE file. -->
<!-- Put site-specific property overrides in this file. -->
<configuration>
    <!-- NN HA related configuration **BEGIN** -->
    <property>
        <name>dfs.nameservices</name>
        <value>myhadoop</value>
        <description>
            Comma-separated list of nameservices.
            as same as fs.defaultFS in core-site.XML.
        </description>
    </property>
    <property>
        <name>dfs.ha.namenodes.myhadoop</name>
        <value>nn1,nn2</value>
        <description>
            The prefix for a given nameservice, contains a comma-separated
            list of namenodes for a given nameservice (eg EXAMPLENAMESERVICE).
        </description>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.myhadoop.nn1</name>
        <value>master:8020</value>
        <description>
            RPC address for nomenode1 of hadoop-test
        </description>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.myhadoop.nn2</name>
        <value>slave1:8020</value>
        <description>
            RPC address for nomenode2 of hadoop-test
        </description>
    </property>
    <property>
        <name>dfs.namenode.http-address.myhadoop.nn1</name>
        <value>master:50070</value>
        <description>
            The address and the base port where the dfs namenode1 web ui will listen
            on.
        </description>
    </property>
    <property>
        <name>dfs.namenode.http-address.myhadoop.nn2</name>
        <value>slave1:50070</value>
        <description>
            The address and the base port where the dfs namenode2 web ui will listen
            on.
        </description>
    </property>
    <property>
        <name>dfs.namenode.servicerpc-address.myhadoop.n1</name>
        <value>master:53310</value>
    </property>
    <property>
        <name>dfs.namenode.servicerpc-address.myhadoop.n2</name>
        <value>slave1:53310</value>
    </property>
    <property>
        <name>dfs.ha.automatic-failover.enabled</name>
        <value>true</value>
        <description>
            Whether automatic failover is enabled. See the HDFS High
            Availability documentation for details on automatic HA
            configuration.
        </description>
    </property>
    <property>
        <name>dfs.clIEnt.failover.proxy.provider.myhadoop</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
        </value>
        <description>Configure the name of the Java class which will be used
            by the DFS ClIEnt to determine which NameNode is the current Active,
            and therefore which NameNode is currently serving clIEnt requests.
            这个类是Client的访问代理,是HA特性对于ClIEnt透明的关键!
        </description>
    </property>
    <property>
        <name>dfs.ha.fencing.methods</name>
        <value>sshfence</value>
        <description>how to communicate in the switch process</description>
    </property>
    <property>
        <name>dfs.ha.fencing.ssh.private-key-files</name>
        <value>/home/yarn/.ssh/id_rsa</value>
        <description>the location stored ssh key</description>
    </property>
    <property>
        <name>dfs.ha.fencing.ssh.connect-timeout</name>
        <value>1000</value>
    </property>
    <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/home/yarn/Hadoop/hadoop-2.4.1/hdfs_dir/journal/</value>
    </property>
    <property>
        <name>dfs.namenode.shared.edits.dir</name>
        <value>qjournal://master:8485;slave1:8485;slave2:8485/hadoop-journal
        </value>
        <description>A directory on shared storage between the multiple
            namenodes
            in an HA cluster. This directory will be written by the active and read
            by the standby in order to keep the namespaces synchronized. This
            directory
            does not need to be listed in dfs.namenode.edits.dir above. It should be
            left empty in a non-HA cluster.
        </description>
    </property>
    <!-- NN HA related configuration **END** -->
    <!-- NameNode related configuration **BEGIN** -->
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:///home/yarn/Hadoop/hadoop-2.4.1/hdfs_dir/name</value>
        <description>Path on the local filesystem where the NameNode stores
            the namespace and transactions logs persistently.If this is a
            comma-delimited list of directorIEs then the name table is replicated
            in all of the directorIEs, for redundancy.</description>
    </property>
    <property>
        <name>dfs.blocksize</name>
        <value>1048576</value>
        <description>
        HDFS blocksize of 128MB for large file-systems.
        Minimum block size is 1048576.
        </description>
    </property>
    <property>
        <name>dfs.namenode.handler.count</name>
        <value>10</value>
        <description>More NameNode server threads to handle RPCs from large
            number of DataNodes.</description>
    </property>
    <!-- <property> <name>dfs.namenode.hosts</name> <value>master</value> <description>If 
        necessary, use this to control the list of allowable datanodes.</description> 
        </property> <property> <name>dfs.namenode.hosts.exclude</name> <value>slave1,slave2,slave3</value> 
        <description>If necessary, use this to control the list of exclude datanodes.</description> 
        </property> -->
    <!-- NameNode related configuration **END** -->
    <!-- DataNode related configuration **BEGIN** -->
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:///home/yarn/Hadoop/hadoop-2.4.1/hdfs_dir/data</value>
        <description>Comma separated list of paths on the local filesystem of
            a DataNode where it should store its blocks.If this is a
            comma-delimited list of directorIEs, then data will be stored in all
            named directorIEs, typically on different devices.</description>
    </property>
    <!-- DataNode related configuration **END** -->
</configuration> 


--------------------------------------------------------------------------------
5.yarn-site.XML

<?XML version="1.0"?>
<!-- Licensed under the apache License, Version 2.0 (the "License"); you 
    may not use this file except in compliance with the License. You may obtain 
    a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless 
    required by applicable law or agreed to in writing, software distributed 
    under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIE
    OR CONDITIONS OF ANY KIND, either express or implIEd. See the License for 
    the specific language governing permissions and limitations under the License. 
    See accompanying LICENSE file. -->
<configuration>
    <!-- ResourceManager and NodeManager related configuration ***BEGIN*** -->
    <property>
        <name>yarn.acl.enable</name>
        <value>false</value>
        <description>Enable ACLs? Defaults to false.</description>
    </property>
    <property>
        <name>yarn.admin.acl</name>
        <value>*</value>
        <description>
        ACL to set admins on the cluster. ACLs are of for comma-separated-usersspace comma-separated-groups. 
        Defaults to special value of * which means anyone. Special value of just space means no one has Access.
        </description>
    </property>
    <property>
        <name>yarn.log-aggregation-enable</name>
        <value>false</value>
        <description>Configuration to enable or disable log aggregation</description>
    </property>
    <!-- ResourceManager and NodeManager related configuration ***END*** -->
    
    <!-- ResourceManager related configuration ***BEGIN*** -->
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>master</value>
        <description>The hostname of the RM.</description>
    </property>
    
    <property>
        <name>yarn.resourcemanager.webapp.https.address</name>
        <value>${yarn.resourcemanager.hostname}:8090</value>
        <description>The https adddress of the RM web application.</description>
    </property>
    
    <property>
        <name>yarn.resourcemanager.address</name>
        <value>${yarn.resourcemanager.hostname}:8032</value>
        <description>ResourceManager host:port for clIEnts to submit jobs.</description>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address</name>
        <value>${yarn.resourcemanager.hostname}:8030</value>
        <description>ResourceManager host:port for ApplicationMasters to talk to Scheduler to obtain resources.</description>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address</name>
        <value>${yarn.resourcemanager.hostname}:8031</value>
        <description>ResourceManager host:port for NodeManagers.</description>
    </property>
    <property>
        <name>yarn.resourcemanager.admin.address</name>
        <value>${yarn.resourcemanager.hostname}:8033</value>
        <description>ResourceManager host:port for administrative commands.</description>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address</name>
        <value>${yarn.resourcemanager.hostname}:8088</value>
        <description>ResourceManager web-ui host:port.</description>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.class</name>
        <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
        <description>
        ResourceManager Scheduler class.
        CapacityScheduler (recommended), FairScheduler (also recommended), or FifoScheduler
        </description>
    </property>
    <property>
        <name>yarn.scheduler.minimum-allocation-mb</name>
        <value>512</value>
        <description>
        Minimum limit of memory to allocate to each container request at the Resource Manager.    
        In MBs
        </description>
    </property>
    <property>
        <name>yarn.scheduler.maximum-allocation-mb</name>
        <value>2048</value>
        <description>
        Maximum limit of memory to allocate to each container request at the Resource Manager.    
        In MBs.
        According to my configuration,yarn.scheduler.maximum-allocation-mb > yarn.nodemanager.resource.memory-mb
        </description>
    </property>
    
    <!--
    <property>
        <name>yarn.resourcemanager.nodes.include-path</name>
        <value></value>
        <description>
        List of permitted NodeManagers.    
        If necessary, use this to control the list of allowable NodeManagers.
        </description>
    </property>
    <property>
        <name>yarn.resourcemanager.nodes.exclude-path</name>
        <value></value>
        <description>
        List of exclude NodeManagers.    
        If necessary, use this to control the list of exclude NodeManagers.
        </description>
    </property>
    -->
    <!-- ResourceManager related configuration ***END*** -->
    
    <!-- NodeManager related configuration ***BEGIN*** -->
    <property>
        <name>yarn.nodemanager.resource.memory-mb</name>
        <value>1024</value>
        <description>
        Resource i.e. available physical memory, in MB, for given NodeManager.    
        Defines total available resources on the NodeManager to be made available to running containers.
        </description>
    </property>
    <property>
        <name>yarn.nodemanager.vmem-pmem-ratio</name>
        <value>2.1</value>
        <description>
        Ratio between virtual memory to physical memory when setting memory limits for containers. 
        Container allocations are expressed in terms of physical memory, 
        and virtual memory usage is allowed to exceed this allocation by this ratio.
        </description>
    </property>
    <property>
        <name>yarn.nodemanager.local-dirs</name>
        <value>/home/yarn/Hadoop/hadoop-2.4.1/yarn_dir/local</value>
        <description>
        Comma-separated list of paths on the local filesystem where intermediate data is written.
        Multiple paths help spread disk i/o.
        </description>
    </property>    
    <property>
        <name>yarn.nodemanager.log-dirs</name>
        <value>/home/yarn/Hadoop/hadoop-2.4.1/yarn_dir/log</value>
        <description>
        Comma-separated list of paths on the local filesystem where logs are written.    
        Multiple paths help spread disk i/o.
        </description>
    </property>
    <property>
        <name>yarn.nodemanager.log.retain-seconds</name>
        <value>10800</value>
        <description>
        Default time (in seconds) to retain log files on the NodeManager.
        ***Only applicable if log-aggregation is disabled.
        </description>
    </property>
    <property>
        <name>yarn.nodemanager.remote-app-log-dir</name>
        <value>/yarn/log-aggregation</value>
        <description>
        HDFS directory where the application logs are moved on application completion. 
        Need to set appropriate permissions. 
        ***Only applicable if log-aggregation is enabled.
        </description>
    </property>
    <property>
        <name>yarn.nodemanager.remote-app-log-dir-suffix</name>
        <value>logs</value>
        <description>
        Suffix appended to the remote log dir. 
        Logs will be aggregated to ${yarn.nodemanager.remote-app-log-dir}/${user}/${thisParam}. 
        ***Only applicable if log-aggregation is enabled.
        </description>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
        <description>Shuffle service that needs to be set for Map Reduce applications.</description>
    </property>
    <property>
        <name>yarn.nodemanager.resource.cpu-vcores</name>
        <value>1</value>
        <description>Number of CPU cores that can be allocated for containers.</description>
    </property>
    <!-- NodeManager related configuration ***END*** -->
    
    <!-- History Server related configuration ***BEGIN*** -->
    <property>
        <name>yarn.log-aggregation.retain-seconds</name>
        <value>-1</value>
        <description>
        How long to keep aggregation logs before deleting them. 
        -1 disables. 
        Be careful, set this too small and you will spam the name node.
        </description>
    </property>
    <property>
        <name>yarn.log-aggregation.retain-check-interval-seconds</name>
        <value>-1</value>
        <description>
        Time between checks for aggregated log retention. 
        If set to 0 or a negative value then the value is computed as one-tenth of the aggregated log retention time. 
        Be careful, set this too small and you will spam the name node.
        </description>
    </property>
    <!-- History Server related configuration ***END*** -->
    
    <property>
        <name>yarn.scheduler.fair.allocation.file</name>
        <value>${yarn.home.dir}/etc/hadoop/fairscheduler.XML</value>
        <description>fairscheduler config file path</description>
        <!-- 官网文档居然找不到该属性!但该属性还是work的! -->
    </property>
</configuration>