In a new Docker container, you need to set basic environment first before setting Hadoop.
$ sudo apt-get update
$ sudo apt-get install default-jdk
The default JDK will be installed at /usr/lib/jvm/<java-version>
$ sudo apt-get install git wget vim ssh
$ adduser hduser
Grant a user privileges
$ visudo
Append the hduser
you just created below the root
and just set privileges specification as root
$ ssh-keygen -t rsa
$ cat .ssh/id_rsa.pub >> .ssh/authorized_keys
Because I use Docker
as my distributed tool, so the default ssh port 22
has been listened by host machine, I need to use another port to communicate between Docker containers
in different host machine.
In my example, I will use 2122
port for listening and sending request.
$ sudo vi /etc/ssh/ssh_config
-> Port 2122
wq!
$ sudo vi /etc/ssh/sshd_config
-> Port 2122
-> UsePAM no
wq!
I supposed my cloud environment is as following:
Download hadoop-2.6.0
$ wget http://ftp.twaren.net/Unix/Web/apache/hadoop/common/hadoop-2.6.0/hadoop-2.6.0.tar.gz
$ tar zxvf hadoop-2.6.0.tar.gz
Set environment path
$ sudo vi /etc/profile
-> export JAVA_HOME=/usr/lib/jvm/<java-version>
-> export HADOOP_HOME=<YOUR_HADOOP_PACKAGE_PATH>
-> export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
-> export PATH=$HADOOP_HOME/bin:$PATH
-> export CLASSPATH=$HADOOP_HOME/lib:$CLASSPATH
:wq!
$ source /etc/profile
All needed will be stored in <HADOOP_HOME>/etc/hadoop
<configuration>
<property>
<name>fs.DefaultFS</name>
<value>hdfs://master:9000</value>
<description>The master endpoint in cluster.</description>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/<HADOOP_HOME>/temp</value>
<description>A base for other temporary directories.</description>
</property>
</configuration>
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master2:90001</value>
<description>Set secondary namenode to prevent the master node crash.</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/<HADOOP_HOME>/dfs/name</value>
<description>Set the location of name node version.</description>
</property>
<property>
<name>dfs.namenode.data.dir</name>
<value>file:/<HADOOP_HOME>/dfs/data</value>
<description>Set the location of data node version.</description>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
<description>The number of replication in cluster.</description>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.max.xcievers</name>
<value>4096</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.support.append</name>
<value>true</value>
</property>
</configuration>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
</configuration>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
</configuration>
$ vi <HADOOP_CONF_DIR>/hadoop-env.sh
-> export JAVA_HOME=/usr/lib/jvm/<java-version>
wq!
$ vi <HADOOP_CONF_DIR>/yarn-env.sh
-> export JAVA_HOME=/usr/lib/jvm/<java-version>
wq!
$ vi <HADOOP_CONF_DIR>/slaves
-> master2
-> slave1
-> slave2
-> slave3