本文共 4954 字,大约阅读时间需要 16 分钟。
目录
hello wordword counthello MapReduce
cloudera https://repository.cloudera.com/artifactory/cloudera-repos/ org.apache.Hadoop Hadoop-client 2.6.0-mr1-cdh5.14.0 org.apache.Hadoop Hadoop-common 2.6.0-cdh5.14.0 org.apache.Hadoop Hadoop-hdfs 2.6.0-cdh5.14.0 org.apache.Hadoop Hadoop-mapreduce-client-core 2.6.0-cdh5.14.0 junit junit 4.11 test org.testng testng RELEASE org.apache.maven.plugins maven-compiler-plugin 3.0 org.apache.maven.plugins maven-shade-plugin 2.4.3 package shade true
package com.czxy.wordCount;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;public class WordCountMapper extends Mapper{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // 将 Text类型转换为String 类型 String s = value.toString(); // 安装空格切分 String[] split = s.split(" "); // 循环遍历输出 for (String s1 : split) { // 输出 key=单词 value =1 context.write(new Text(s1), new LongWritable(1)); } }}
package com.czxy.wordCount;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;public class WordCountReduce extends Reducer{ @Override protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { // 定义一个变量用来记录单词出现的次数 int sumCount=0; for (LongWritable value : values) { sumCount+=value.get(); } // 结果数据 context.write(key, new LongWritable(sumCount)); }}
package com.czxy.wordCount;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class WordCountDriver extends Configured implements Tool { @Override public int run(String[] args) throws Exception { // 获取job Job job = Job.getInstance(new Configuration()); // 设置支持jar执行 job.setJarByClass(WordCountDriver.class); // 设置执行的napper job.setMapperClass(WordCountMapper.class); // 设置map输出的key类型 job.setMapOutputKeyClass(Text.class); // 设置map输出value类型 job.setMapOutputValueClass(LongWritable.class); // 设置执行的reduce job.setReducerClass(WordCountReduce.class); // 设置reduce输出key的类型 job.setOutputKeyClass(Text.class); // 设置reduce输出value的类型 job.setOutputValueClass(LongWritable.class); // 设置文件输入 job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path("./data/wordCount/")); // 设置文件输出 job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path("./outPut/wordCount/")); // 设置启动类 boolean b = job.waitForCompletion(true); return b ? 0 : 1; } public static void main(String[] args) throws Exception { // 调用启动方法 ToolRunner.run(new WordCountDriver(), args); }}
MapReduce 1count 1hello 2word 2
转载地址:http://zakzi.baihongyu.com/