星期四, 9月 26, 2013

將Hadoop 1.2.1 安裝到單台standalone的方法 (Pseudo-Distributed Mode)

前言
一般而言Hadoop會需要5node, 其中有
一個namenode,
一個secondary namenode,
三個data node

為了lab節省資源, namenode, datanode放到同一台, replica設定為1

VM環境為: Redhat 5.6, x64bit, RAM : 1GB

1.Download jdk 1.6 (jdk-6u45-linux-x64.bin)  (from http://www.oracle.com/technetwork/java/javasebusiness/downloads/java-archive-downloads-javase6-419409.html )
  chmod +x jdk-6u45-linux-x64.bin
  ./jdk-6u45-linux-x64.bin
  將解開的資料夾jdk1.6.0_45放到/usr底下
2. Download Hadoop 1.2.1版本 :  hadoop-1.2.1-bin.tar.gz  (from http://www.apache.org/dyn/closer.cgi/hadoop/common/ )
  #把檔案hadoop-1.2.1-bin.tar.gz解壓縮, 放到/opt底下
  tar -xzvf hadoop-1.2.1-bin.tar.gz
  mv hadoop-1.2.1 /opt/.
3.修改必要檔案
  cd /opt/hadoop-1.2.1/conf
vi hadoop-env.sh
# The java implementation to use.  Required.
# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
export JAVA_HOME=/usr/jdk1.6.0_45

vi core-site.xml
=========================
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
=========================


vi hdfs-site.xml
=========================
<configuration>
<property>
<name>dfs.data.dir</name>
<value>/usr/hadoop_1.2.1/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
=========================


vi mapred-site.xml
=========================
<configuration>
<property>
<name>mapred.job.tracker </name>
<value>localhost:9001</value>
</property>
</configuration>
=========================



[root@h1 bin]# ./start-all.sh
starting namenode, logging to /opt/hadoop-1.2.1/libexec/../logs/hadoop-root-namenode-h1.out
localhost: starting datanode, logging to /opt/hadoop-1.2.1/libexec/../logs/hadoop-root-datanode-h1.out
localhost: starting secondarynamenode, logging to /opt/hadoop-1.2.1/libexec/../logs/hadoop-root-secondarynamenode-h1.out
starting jobtracker, logging to /opt/hadoop-1.2.1/libexec/../logs/hadoop-root-jobtracker-h1.out
localhost: starting tasktracker, logging to /opt/hadoop-1.2.1/libexec/../logs/hadoop-root-tasktracker-h1.out

#啟動後發現無法列出HDFS資料
[root@h1 bin]# ./hadoop dfs -ls
13/09/26 13:30:46 INFO ipc.Client: Retrying connect to server: localhost/127.0.0.1:9000. Already tried 0 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1 SECONDS)\

#遵照網路上方法, 需先將namenode format
[root@h1 bin]# ./hadoop namenode -format
13/09/26 13:32:15 INFO namenode.NameNode: STARTUP_MSG:
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG:   host = h1/192.168.35.129
STARTUP_MSG:   args = [-format]
STARTUP_MSG:   version = 1.2.1
STARTUP_MSG:   build = https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1.2 -r 1503152; compiled by 'mattf' on Mon Jul 22 15:23:09 PDT 2013
STARTUP_MSG:   java = 1.6.0_45
************************************************************/
13/09/26 13:32:15 INFO util.GSet: Computing capacity for map BlocksMap
13/09/26 13:32:15 INFO util.GSet: VM type       = 64-bit
13/09/26 13:32:15 INFO util.GSet: 2.0% max memory = 1013645312
13/09/26 13:32:15 INFO util.GSet: capacity      = 2^21 = 2097152 entries
13/09/26 13:32:15 INFO util.GSet: recommended=2097152, actual=2097152
13/09/26 13:32:16 INFO namenode.FSNamesystem: fsOwner=root
13/09/26 13:32:17 INFO namenode.FSNamesystem: supergroup=supergroup
13/09/26 13:32:17 INFO namenode.FSNamesystem: isPermissionEnabled=true
13/09/26 13:32:17 INFO namenode.FSNamesystem: dfs.block.invalidate.limit=100
13/09/26 13:32:17 INFO namenode.FSNamesystem: isAccessTokenEnabled=false accessKeyUpdateInterval=0 min(s), accessTokenLifetime=0 min(s)
13/09/26 13:32:17 INFO namenode.FSEditLog: dfs.namenode.edits.toleration.length = 0
13/09/26 13:32:17 INFO namenode.NameNode: Caching file names occuring more than 10 times
13/09/26 13:32:17 INFO common.Storage: Image file /tmp/hadoop-root/dfs/name/current/fsimage of size 110 bytes saved in 0 seconds.
13/09/26 13:32:18 INFO namenode.FSEditLog: closing edit log: position=4, editlog=/tmp/hadoop-root/dfs/name/current/edits
13/09/26 13:32:18 INFO namenode.FSEditLog: close success: truncate to 4, editlog=/tmp/hadoop-root/dfs/name/current/edits
13/09/26 13:32:18 INFO common.Storage: Storage directory /tmp/hadoop-root/dfs/name has been successfully formatted.
13/09/26 13:32:18 INFO namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at h1/192.168.35.129
************************************************************/
[root@h1 bin]#

[root@h1 bin]# ./stop-all.sh
stopping jobtracker
localhost: stopping tasktracker
no namenode to stop
localhost: stopping datanode
localhost: stopping secondarynamenode
[root@h1 bin]#
[root@h1 bin]# ./start-all.sh

#列出所有Java process
[root@h1 tmp]# /usr/jdk1.6.0_45/bin/jps
4960 SecondaryNameNode
4840 DataNode
5039 JobTracker
6334 Jps
5174 TaskTracker
4717 NameNode
[root@h1 tmp]#

#HDFS建立資料夾
[root@h1 tmp]# hadoop fs -mkdir testjay
[root@h1 tmp]# hadoop dfs -ls
Found 1 items
drwxr-xr-x   - root supergroup          0 2013-09-26 13:46 /user/root/testjay
[root@h1 tmp]#

#Linux建出測試資料
[root@h1 ~]# cd ~
[root@h1 ~]# mkdir input
[root@h1 ~]# cd input
[root@h1 ~]# echo "hello world" > test1.txt
[root@h1 ~]# echo "hello hadoop" > test2.txt


#linux資料放到HDFS
[root@h1 ~]# hadoop dfs -put ./input in
[root@h1 ~]# hadoop dfs -ls ./in/*
-rw-r--r--   1 root supergroup         12 2013-09-26 13:58 /user/root/in/test1.txt
-rw-r--r--   1 root supergroup         13 2013-09-26 13:58 /user/root/in/test2.txt

#列出資料, 成功
[root@h1 ~]# hadoop dfs -cat ./in/test1.txt
hello world
[root@h1 ~]# hadoop dfs -cat ./in/test2.txt
hello hadoop
[root@h1 ~]#

#檢查jobtracker 狀態

#Block資料都寫到了哪?
[root@h1 current]# ls -ltrh
total 32K
-rw-r--r-- 1 root root 158 Sep 26 13:34 VERSION
-rw-r--r-- 1 root root  11 Sep 26 13:34 blk_-6668900420856905772_1022.meta
-rw-r--r-- 1 root root   4 Sep 26 13:34 blk_-6668900420856905772
-rw-r--r-- 1 root root  11 Sep 26 13:58 blk_8720904599102954439_1024.meta
-rw-r--r-- 1 root root  13 Sep 26 13:58 blk_8720904599102954439
-rw-r--r-- 1 root root  11 Sep 26 13:58 blk_2440773453556412055_1023.meta
-rw-r--r-- 1 root root  12 Sep 26 13:58 blk_2440773453556412055
-rw-r--r-- 1 root root 289 Sep 26 14:00 dncp_block_verification.log.curr
[root@h1 current]# pwd
/usr/hadoop_1.2.1/data/current

我沒有做後續的測試, 估計其他元件包括HBase, Hive等等的都要另外安裝才對.

沒有留言:

LinkWithin-相關文件

Related Posts Plugin for WordPress, Blogger...