Hadoop install on Linux (Fedora or any distros)
Requirements
- LinuxOS or Virtualbox on Linux
- Java 8 or Java 11
- Download Hadoop-3.3.0.tar.gz
Download: https://hadoop.apache.org/releases.html
extract to /home/cevher/apps/hadoop/
and open terminal
$ java -version
$ sudo dnf search openjdk
$ sudo dnf install java-11-openjdk-devel.x86_64
$ vi .bashrc
# set hadoop home <ch:2021>
export HADOOP_HOME=$HOME/apps/hadoop
export HADOOP_CONF_DIR=$HOME/apps/hadoop/etc/hadoop
export HADOOP_MAPRED_HOME=$HOME/apps/hadoop
export HADOOP_COMMON_HOME=$HOME/apps/hadoop
export HADOOP_HDFS_HOME=$HOME/apps/hadoop
export YARN_HOME=$HOME/apps/hadoop
export PATH=$PATH:$HOME/apps/hadoop/bin
and open terminal
$ cd $HOME/apps/hadoop/etc/hadoop
$ vi hadoop-env.sh
export JAVA_HOME=”/usr/lib/jvm/java-11-openjdk”
$ cd $HOME/apps/hadoop/etc/hadoop
$ vi core-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/cevher/apps/hadoop/tmp</value>
</property>
</configuration>
$ vi hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>file:///home/cevher/apps/hadoop/hadoopdata/hdfs/namenode</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>file:///home/cevher/apps/hadoop/hadoopdata/hdfs/datanode</value>
</property>
</configuration>
$ vi mapred-site.xml
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>localhost:9001</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
</property>
</configuration>
$ vi yarn-site.xml
<configuration>
<! — Site specific YARN configuration properties →
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
</configuration>
$ cd $HOME/apps/hadoop/
$ mkdir hadoopdata/hdfs/namenode
$ mkdir hadoopdata/hdfs/datanode
$ mkdir cache
$ mkdir logs
$ mkdir tmp
$ mkdir input
$ bin/hdfs namenode -format
$ sbin/start-all.sh
$ cp etc/hadoop/*.xml input
$ bin/hadoop fs -put conf input
Then stop all hadoop service….
$ sbin/stop-all.sh
if ssh localhost notconnected then
$ ssh localhost
$ ssh-keygen -t rsa -P ‘’ -f ~/.ssh/id_rsa
$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
$ chmod 0600 ~/.ssh/authorized_keys