hadoop共同好友

hadoop共同好友

分析friend.txt中的共同好友

解:
A:B,C,D,F,E,O
B-C:A B和C的共同好友为A
B-D:A B和D的共同好友为A
B-F:A B和F的共同好友为A

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
package friends;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.Arrays;

/*分析friend.txt中的共同好友*/
public class Friends {
//map端
public static class MapTask extends Mapper<LongWritable, Text,Text,Text>{
@Override
//(A:B,C,D,F,E,O) ==> (B-C,A) (B-D,A) (B-F,A) (B-E,A) ......

protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {
String[] split = value.toString().split(":");
String o_value = split[0];
//B,C,D,F,E,O
String[] friends = split[1].split(",");
//对friends数组排序
Arrays.sort(friends);
for (int i=0;i<friends.length-1;i++){
for (int j=i+1;j<friends.length;j++){
context.write(new Text(friends[i]+"-"+friends[j]),new Text(o_value));
}
}
}
}
//reduce端
public static class ReduceTask extends Reducer<Text,Text,Text,Text>{
@Override
//(B-C,A,D,E,F...) (B-D,(A,E,F...)) ==> (B-D A,E,F...)
//将values拼接输出
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
String str="";
int flag=0;
for (Text value : values) {
if (flag==0){
str+=value.toString();
flag++;
}else str+=","+value.toString();
}
context.write(key,new Text(str));
}

}
public static void main(String[] args) throws Exception {
//在提交到集群的时候 注意使用root用户
System.setProperty("HADOOP_USER_NAME","root");
//接下来 需要提交任务,我们需要创建一个JOB对象,这个哥们是专门负责提交MR任务的
//我们要把这个MR程序 提交给集群去运行
Configuration conf = new Configuration();
conf.set("fs.defaultFS","hdfs://192.168.120.110:9000");
Job job = Job.getInstance(conf);

//把map端与reduce端的俩个静态类提交给JOB
job.setMapperClass(Friends.MapTask.class);
job.setReducerClass(Friends.ReduceTask.class);
job.setJarByClass(Friends.class);

//在告诉job四个输出参数的类型 注意:这里只要输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

//判断 如果输出目录存在,我们就删除
String outPath="/bigdata/output/friend";
/* File file = new File(outPath);
if(file.exists()){
FileUtils.deleteDirectory(file);
}*/
FileSystem fileSystem = FileSystem.get(conf);
if(fileSystem.exists(new Path(outPath))){
fileSystem.delete(new Path(outPath),true);
}

//告诉job输入 输出的路径
FileInputFormat.addInputPath(job,new Path("/bigdata/input/friend.txt"));
FileOutputFormat.setOutputPath(job,new Path("/bigdata/output/friend"));

//温馨提示
boolean b = job.waitForCompletion(true);
System.out.println(b?"代码没毛病!!!":"出BUG,赶快看一下!!!");
}
}

-------------本文结束感谢您的阅读-------------