求大神详解，不太懂

民疯***

package com.zkpk.us;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

class UserMapper extends Mapper {

protected void map(
LongWritable key,
Text value,
org.apache.hadoop.mapreduce.Mapper.Context context)
throws java.io.IOException, InterruptedException {
IntWritable one = new IntWritable(1);
String[] columns = value.toString().split("\t");
if (columns != null && columns.length == 6) {

Text uid = new Text(columns[1]);

context.write(uid, one);

}
};
}

class UserReducer extends Reducer {
protected void reduce(
Text key,
java.lang.Iterable values,
org.apache.hadoop.mapreduce.Reducer.Context context)
throws java.io.IOException, InterruptedException {
int sum = 0;
for(IntWritable value : values){
sum += value.get();
}

context.write(key, new IntWritable(sum));
};
}

public class UserCount {

/**
* @param args
*/
public static void main(String[] args)throws Exception {

Configuration conf = new Configuration();
Job job = new Job(conf, "UserUid");
job.setJarByClass(UserCount.class);

job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);

job.setMapperClass(UserMapper.class);
job.setReducerClass(UserReducer.class);

FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

job.waitForCompletion(true);

}

}

晃悠***

这是hadoop的wordCount程序啊，对input文件下的文档单词分类计数，你在hadoop output 文件夹下可以看到，每个单词出现的频数

kiss***

这是MapReduce程序，是Hadoop处理数据的核心。程序有三部分组成：
第一部分：Map。表示将数据进行分词处理，分隔符是制表键
第二部分：Reduce。将Map的输出进行汇总，得到最后的输出。
第三部分：主程序。将Map和Reduce组成一个任务job，来执行，数据的输入和输出都来至于HDFS。
有问题，可以再问我。呵呵

ham***

直接说什么问题
直接贴代码不想看

已有账号？

找人解决需求

求大神详解，不太懂

全部参与3

热门问答

民疯***

今日需求悬赏

今日问答求助

发布任务需求已有1031167位用户正在使用天盟网服务

新手帮助

平台规则

关于天盟

一键快捷导航

微信公众号

手机客户端