hadoop共同好友

发表于 2021-11-28 更新于 2022-05-25 分类于大数据本文字数： 3.2k
hadoop共同好友

分析friend.txt中的共同好友
解：
A:B,C,D,F,E,O
B-C:A B和C的共同好友为A
B-D:A B和D的共同好友为A
B-F:A B和F的共同好友为A
…
package friends;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.Arrays;

/*分析friend.txt中的共同好友*/
public class Friends {
    //map端
    public static class MapTask extends Mapper<LongWritable, Text,Text,Text>{
        @Override
        //(A:B,C,D,F,E,O) ==> (B-C,A) (B-D,A) (B-F,A) (B-E,A) ......

        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split(":");
            String o_value = split[0];
            //B,C,D,F,E,O
            String[] friends  = split[1].split(",");
            //对friends数组排序
            Arrays.sort(friends);
            for (int i=0;i<friends.length-1;i++){
                for (int j=i+1;j<friends.length;j++){
                    context.write(new Text(friends[i]+"-"+friends[j]),new Text(o_value));
                }
            }
        }
    }
    //reduce端
    public static class ReduceTask extends Reducer<Text,Text,Text,Text>{
        @Override
        //(B-C,A,D,E,F...) (B-D,(A,E,F...)) ==> (B-D  A,E,F...)
        //将values拼接输出
        protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
            String str="";
            int flag=0;
            for (Text value : values) {
                if (flag==0){
                    str+=value.toString();
                    flag++;
                }else str+=","+value.toString();
            }
            context.write(key,new Text(str));
        }

    }
    public static void main(String[] args) throws Exception {
        //在提交到集群的时候  注意使用root用户
        System.setProperty("HADOOP_USER_NAME","root");
        //接下来  需要提交任务，我们需要创建一个JOB对象，这个哥们是专门负责提交MR任务的
        //我们要把这个MR程序  提交给集群去运行
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://192.168.120.110:9000");
        Job job = Job.getInstance(conf);

        //把map端与reduce端的俩个静态类提交给JOB
        job.setMapperClass(Friends.MapTask.class);
        job.setReducerClass(Friends.ReduceTask.class);
        job.setJarByClass(Friends.class);

        //在告诉job四个输出参数的类型   注意：这里只要输出类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        //判断   如果输出目录存在，我们就删除
        String outPath="/bigdata/output/friend";
       /* File file = new File(outPath);
        if(file.exists()){
            FileUtils.deleteDirectory(file);
        }*/
        FileSystem fileSystem = FileSystem.get(conf);
        if(fileSystem.exists(new Path(outPath))){
            fileSystem.delete(new Path(outPath),true);
        }

        //告诉job输入  输出的路径
        FileInputFormat.addInputPath(job,new Path("/bigdata/input/friend.txt"));
        FileOutputFormat.setOutputPath(job,new Path("/bigdata/output/friend"));

        //温馨提示
        boolean b = job.waitForCompletion(true);
        System.out.println(b?"代码没毛病！！！":"出BUG,赶快看一下！！！");
    }
}
-------------本文结束感谢您的阅读-------------