[root@linuxsource ~]# yum install -y java-1.8.0-openjdk.x86_64Create ssh key on localhost and set permissions.
[root@linuxsource ~]# ssh-keygen -t rsa -P "" Generating public/private rsa key pair. Enter file in which to save the key (/root/.ssh/id_rsa): Your identification has been saved in /root/.ssh/id_rsa. Your public key has been saved in /root/.ssh/id_rsa.pub. The key fingerprint is: 2f:4c:14:55:c6:89:a9:cc:54:4e:a1:fd:43:d4:7f:8d root@linuxsource.delphix.local The key's randomart image is: +--[ RSA 2048]----+ | .o=*+o | | .*ooo . | | +o.o . o.| | .+ o E +| | S o .| | o . . | | o . | | . | | | +-----------------+ [root@linuxsource ~]# cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys [root@linuxsource ~]# ssh localhost The authenticity of host 'localhost (127.0.0.1)' can't be established. RSA key fingerprint is f3:9a:e9:88:be:b9:f9:16:71:35:0f:73:d7:18:86:cf. Are you sure you want to continue connecting (yes/no)? yes Warning: Permanently added 'localhost' (RSA) to the list of known hosts. Last login: Mon Aug 17 07:57:25 2020 from 192.168.247.1 [root@linuxsource ~]# exit [root@linuxsource ~]# ssh localhost Last login: Mon Aug 17 08:04:10 2020 from localhost [root@linuxsource ~]# chmod 600 ~/.ssh/authorized_keysNow it's time to install HADOOP
wget https://www-us.apache.org/dist/hadoop/common/stable/hadoop-3.2.1.tar.gz tar -zxvf hadoop-3.2.1.tar.gz mv hadoop-3.2.1 hadoopUpdate delphix user profile environment (.bashrc or .profile)
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.201.b09-1.el6_10.x86_64/jre ## Change it according to your system export HADOOP_HOME=/u02/hadoop ## Change it according to your system export HADOOP_INSTALL=$HADOOP_HOME export HADOOP_MAPRED_HOME=$HADOOP_HOME export HADOOP_COMMON_HOME=$HADOOP_HOME export HADOOP_HDFS_HOME=$HADOOP_HOME export HADOOP_YARN_HOME=$HADOOP_HOME export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/binUpdate HADOOP envirnoment file $HADOOP_HOME/etc/hadoop/hadoop-env.sh with JAVA_HOME variable.
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://linuxsource.delphix.local:9000</value>
</property>
</configuration>
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>file:///u02/hadoop/hadoopdata/hdfs/namenode</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>file:///u02/hadoop/hadoopdata/hdfs/datanode</value>
</property>
</configuration>
Create NameNode and DataNode directories your HADOOP will be using
mkdir -p /u02/hadoop/hadoopdata/hdfs/{namenode,datanode}
Modify $HADOOP_HOME/etc/hadoop/mapred-site.xml file<configuration>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
</configuration>Edit $HADOOP_HOME/etc/hadoop/yarn-site.xml file<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
$hdfs namenode -format ... 2020-08-17 09:58:23,066 INFO common.Storage: Storage directory /u02/hadoop/hadoopdata/hdfs/namenode has been successfully formatted. 2020-08-17 09:58:23,175 INFO namenode.FSImageFormatProtobuf: Saving image file /u02/hadoop/hadoopdata/hdfs/namenode/current/fsimage.ckpt_0000000000000000000 using no compression 2020-08-17 09:58:23,348 INFO namenode.FSImageFormatProtobuf: Image file /u02/hadoop/hadoopdata/hdfs/namenode/current/fsimage.ckpt_0000000000000000000 of size 402 bytes saved in 0 seconds . 2020-08-17 09:58:23,362 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0 2020-08-17 09:58:23,373 INFO namenode.FSImage: FSImageSaver clean checkpoint: txid=0 when meet shutdown. 2020-08-17 09:58:23,373 INFO namenode.NameNode: SHUTDOWN_MSG: /************************************************************ SHUTDOWN_MSG: Shutting down NameNode at linuxsource/192.168.247.133 ************************************************************/Start ResourceManager and NodeManager daemons
$HDOOP_HOME/sbin/start-hdfs.sh $HDOOP_HOME/sbin/start-yarn.shLet's browse the NameNode from a web browser, connect to http://<yourhost>:9870
Before carrying out the upload to test our cluster, let us create first a directory into HDFS.
[delphix@linuxsource ~]$ hdfs dfs -mkdir /msa 2020-08-17 10:58:44,648 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicableTry uploading a file into msa HDFS directory and check the content.
[delphix@linuxsource ~]$ hdfs dfs -put ~/.bashrc /msa 2020-08-17 10:58:59,552 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 2020-08-17 10:59:01,340 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false [delphix@linuxsource ~]$ [delphix@linuxsource ~]$ hadoop fs -ls / 2020-08-17 11:02:35,611 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Found 1 items drwxr-xr-x - delphix supergroup 0 2020-08-17 10:59 /msa [delphix@linuxsource ~]$ hadoop fs -ls /msa 2020-08-17 11:02:41,961 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Found 1 items -rw-r--r-- 1 delphix supergroup 952 2020-08-17 10:59 /msa/.bashrc [delphix@linuxsource ~]$ [delphix@linuxsource ~]$ hadoop fs -ls hdfs://192.168.247.133:9001/ 2020-08-17 11:03:48,023 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Found 1 items drwxr-xr-x - delphix supergroup 0 2020-08-17 10:59 hdfs://192.168.247.133:9001/msa [delphix@linuxsource ~]$
Great, I will now try to perform some MapReduce jobs, some examples are available as jars (hadoop-mapreduce-examples.jar) in your $HADOOP_HOME
Compute PI value and workcount
[delphix@linuxsource logs]$ hadoop jar /u02/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.1.jar pi 2 5
Number of Maps = 2
Samples per Map = 5
2020-08-18 05:27:10,741 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2020-08-18 05:27:11,596 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
Wrote input for Map #0
2020-08-18 05:27:11,739 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
Wrote input for Map #1
Starting Job
2020-08-18 05:27:11,839 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032
2020-08-18 05:27:12,204 INFO mapreduce.JobResourceUploader: Disabling Erasure Coding for path: /tmp/hadoop-yarn/staging/delphix/.staging/job_1597742243565_0003
2020-08-18 05:27:12,255 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-08-18 05:27:12,339 INFO input.FileInputFormat: Total input files to process : 2
2020-08-18 05:27:12,358 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-08-18 05:27:12,384 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-08-18 05:27:12,392 INFO mapreduce.JobSubmitter: number of splits:2
2020-08-18 05:27:12,527 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-08-18 05:27:12,551 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1597742243565_0003
2020-08-18 05:27:12,551 INFO mapreduce.JobSubmitter: Executing with tokens: []
2020-08-18 05:27:12,755 INFO conf.Configuration: resource-types.xml not found
2020-08-18 05:27:12,756 INFO resource.ResourceUtils: Unable to find 'resource-types.xml'.
2020-08-18 05:27:12,834 INFO impl.YarnClientImpl: Submitted application application_1597742243565_0003
2020-08-18 05:27:12,876 INFO mapreduce.Job: The url to track the job: http://linuxsource:8088/proxy/application_1597742243565_0003/
2020-08-18 05:27:12,876 INFO mapreduce.Job: Running job: job_1597742243565_0003
2020-08-18 05:27:20,042 INFO mapreduce.Job: Job job_1597742243565_0003 running in uber mode : false
2020-08-18 05:27:20,045 INFO mapreduce.Job: map 0% reduce 0%
2020-08-18 05:27:27,155 INFO mapreduce.Job: map 100% reduce 0%
2020-08-18 05:27:32,189 INFO mapreduce.Job: map 100% reduce 100%
2020-08-18 05:27:32,216 INFO mapreduce.Job: Job job_1597742243565_0003 completed successfully
2020-08-18 05:27:32,345 INFO mapreduce.Job: Counters: 54
File System Counters
FILE: Number of bytes read=50
FILE: Number of bytes written=679350
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=544
HDFS: Number of bytes written=215
HDFS: Number of read operations=13
HDFS: Number of large read operations=0
HDFS: Number of write operations=3
HDFS: Number of bytes read erasure-coded=0
Job Counters
Launched map tasks=2
Launched reduce tasks=1
Data-local map tasks=2
Total time spent by all maps in occupied slots (ms)=8358
Total time spent by all reduces in occupied slots (ms)=2530
Total time spent by all map tasks (ms)=8358
Total time spent by all reduce tasks (ms)=2530
Total vcore-milliseconds taken by all map tasks=8358
Total vcore-milliseconds taken by all reduce tasks=2530
Total megabyte-milliseconds taken by all map tasks=8558592
Total megabyte-milliseconds taken by all reduce tasks=2590720
Map-Reduce Framework
Map input records=2
Map output records=4
Map output bytes=36
Map output materialized bytes=56
Input split bytes=308
Combine input records=0
Combine output records=0
Reduce input groups=2
Reduce shuffle bytes=56
Reduce input records=4
Reduce output records=0
Spilled Records=8
Shuffled Maps =2
Failed Shuffles=0
Merged Map outputs=2
GC time elapsed (ms)=234
CPU time spent (ms)=1500
Physical memory (bytes) snapshot=819863552
Virtual memory (bytes) snapshot=8361353216
Total committed heap usage (bytes)=622329856
Peak Map Physical memory (bytes)=310808576
Peak Map Virtual memory (bytes)=2787803136
Peak Reduce Physical memory (bytes)=203501568
Peak Reduce Virtual memory (bytes)=2787303424
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=236
File Output Format Counters
Bytes Written=97
Job Finished in 20.601 seconds
2020-08-18 05:27:32,422 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
Estimated value of Pi is 3.60000000000000000000
[delphix@linuxsource logs]$
hadoop fs -mkdir /user/delphix/books
hadoop fs -put /tmp/pg5000.txt /user/delphix/books
hadoop fs -put /tmp/pg1661.txt /user/delphix/books
hadoop fs -put /tmp/pg135.txt /user/delphix/books
[delphix@linuxsource logs]$ hadoop fs -ls /user/delphix/books
2020-08-18 05:20:23,814 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Found 3 items
-rw-r--r-- 1 delphix supergroup 3322651 2020-08-18 05:20 /user/delphix/books /pg135.txt
-rw-r--r-- 1 delphix supergroup 594933 2020-08-18 05: /user/delphix/books /pg1661.txt
-rw-r--r-- 1 delphix supergroup 1423803 2020-08-18 05:19 /user/delphix/books /pg5000.txt
[delphix@linuxsource logs]$
[delphix@linuxsource logs]$ hadoop jar /u02/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.1.jar wordcount books output
[delphix@linuxsource logs]$ hadoop jar /u02/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.1.jar wordcount books output
2020-08-18 05:23:36,894 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2020-08-18 05:23:37,676 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032
2020-08-18 05:23:38,468 INFO mapreduce.JobResourceUploader: Disabling Erasure Coding for path: /tmp/hadoop-yarn/staging/delphix/.staging/job_1597742243565_0002
2020-08-18 05:23:38,574 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-08-18 05:23:38,720 INFO input.FileInputFormat: Total input files to process : 3
2020-08-18 05:23:38,758 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-08-18 05:23:39,189 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-08-18 05:23:39,607 INFO mapreduce.JobSubmitter: number of splits:3
2020-08-18 05:23:39,763 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-08-18 05:23:39,811 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1597742243565_0002
2020-08-18 05:23:39,811 INFO mapreduce.JobSubmitter: Executing with tokens: []
2020-08-18 05:23:40,197 INFO conf.Configuration: resource-types.xml not found
2020-08-18 05:23:40,198 INFO resource.ResourceUtils: Unable to find 'resource-types.xml'.
2020-08-18 05:23:40,601 INFO impl.YarnClientImpl: Submitted application application_1597742243565_0002
2020-08-18 05:23:40,856 INFO mapreduce.Job: The url to track the job: http://linuxsource:8088/proxy/application_1597742243565_0002/
2020-08-18 05:23:40,857 INFO mapreduce.Job: Running job: job_1597742243565_0002
2020-08-18 05:23:52,081 INFO mapreduce.Job: Job job_1597742243565_0002 running in uber mode : false
2020-08-18 05:23:52,083 INFO mapreduce.Job: map 0% reduce 0%
2020-08-18 05:24:06,288 INFO mapreduce.Job: map 33% reduce 0%
2020-08-18 05:24:08,301 INFO mapreduce.Job: map 100% reduce 0%
2020-08-18 05:24:12,323 INFO mapreduce.Job: map 100% reduce 100%
2020-08-18 05:24:13,341 INFO mapreduce.Job: Job job_1597742243565_0002 completed successfully
2020-08-18 05:24:13,447 INFO mapreduce.Job: Counters: 55
File System Counters
FILE: Number of bytes read=1490561
FILE: Number of bytes written=3885503
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=5341752
HDFS: Number of bytes written=886472
HDFS: Number of read operations=14
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
HDFS: Number of bytes read erasure-coded=0
Job Counters
Killed map tasks=1
Launched map tasks=4
Launched reduce tasks=1
Data-local map tasks=4
Total time spent by all maps in occupied slots (ms)=39885
Total time spent by all reduces in occupied slots (ms)=3043
Total time spent by all map tasks (ms)=39885
Total time spent by all reduce tasks (ms)=3043
Total vcore-milliseconds taken by all map tasks=39885
Total vcore-milliseconds taken by all reduce tasks=3043
Total megabyte-milliseconds taken by all map tasks=40842240
Total megabyte-milliseconds taken by all reduce tasks=3116032
Map-Reduce Framework
Map input records=113284
Map output records=927416
Map output bytes=8898193
Map output materialized bytes=1490573
Input split bytes=365
Combine input records=927416
Combine output records=101835
Reduce input groups=80709
Reduce shuffle bytes=1490573
Reduce input records=101835
Reduce output records=80709
Spilled Records=203670
Shuffled Maps =3
Failed Shuffles=0
Merged Map outputs=3
GC time elapsed (ms)=1335
CPU time spent (ms)=10640
Physical memory (bytes) snapshot=1168629760
Virtual memory (bytes) snapshot=11191828480
Total committed heap usage (bytes)=907542528
Peak Map Physical memory (bytes)=323743744
Peak Map Virtual memory (bytes)=2801008640
Peak Reduce Physical memory (bytes)=203014144
Peak Reduce Virtual memory (bytes)=2792550400
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=5341387
File Output Format Counters
Bytes Written=886472
[delphix@linuxsource logs]$
[delphix@linuxsource ~]$ hadoop fs -ls /user/delphix/output
2020-10-15 04:16:11,111 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Found 2 items
-rw-r--r-- 1 delphix supergroup 0 2020-08-18 05:24 /user/delphix/output/_SUCCESS
-rw-r--r-- 1 delphix supergroup 886472 2020-08-18 05:24 /user/delphix/output/part-r-00000
[delphix@linuxsource ~]$ hadoop fs -cat /user/delphix/output/part-r-00000
2020-10-15 04:20:57,285 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2020-10-15 04:20:58,376 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
" 27
"'A 1
"'About 1
"'Absolute 1
"'Ah!' 2
"'Ah, 2
"'Ample.' 1
"'And 10
"'Arthur!' 1
"'As 1
"'At 1
"'Because 1
"'Breckinridge, 1
"'But 1
"'But, 1
"'But,' 1
"'Certainly 2
"'Certainly,' 1
"'Come! 1
"'Come, 1
"'DEAR 1
"'Dear 2
"'Dearest 1
"'Death,' 1
"'December 1
"'Do 3
"'Don't 1
"'Entirely.' 1
"'Flock'; 1
"'For 1
"'Fritz! 1
"'From 1
"'Gone 1
"'Hampshire. 1
"'Have 2
"'Here 1
"'How 2
"'I 23
"'If 2
"'In 2
"'Is 3
"'It 7
"'It's 1
"'Jephro,' 1
"'Keep 1
"'Ku 1
"'L'homme 1
"'Look 2
"'Lord 1
"'MY 2
"'Mariage 1
"'May 1
"'Monsieur 2
"'Most 1
"'Mr. 2
"'My 4
"'Never 1
"'Never,' 1
...
Now that my Hadoop cluster is working fine, I will replicate the same installation on my linuxtarget environment.
Notice, that I can also virtualize the HADOOP_HOME (but I will go for a separate installation)
This concluded the first part of the study, so stay tuned for the next part ...



No comments:
Post a Comment