Hadoop之电影评分全排序

编程入门 行业动态 更新时间:2024-10-13 02:19:05

Hadoop之电影<a href=https://www.elefans.com/category/jswz/34/1762927.html style=评分全排序"/>

Hadoop之电影评分全排序

测试数据

中国机长 72
机械师2 83
奇异博士 87
流浪地球 79
复仇者联盟4:终局之战 94
惊奇队长 68
蜘蛛侠:英雄远征 80
长城 56
夺路而逃 69
神奇动物在哪里 57
驴得水 59
我不是潘金莲 55
速度与激情:特别行动 77
哪吒之魔童降世 96
捉迷藏 78
上海堡垒 9
叶问4 75
勇士之门 35
罗曼蒂克消亡史 67
阿丽塔:战斗天使 89

自定义Bean

package com.sort;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class SortBean implements WritableComparable<SortBean> {private String name;private int hot;public SortBean() {}public SortBean(String name, int hot) {this.name = name;this.hot = hot;}public String getName() {return name;}public void setName(String name) {this.name = name;}public int getHot() {return hot;}public void setHot(int hot) {this.hot = hot;}@Overridepublic String toString() {return name+"\t"+hot;}@Overridepublic int compareTo(SortBean o) {return o.getHot() - this.getHot();}@Overridepublic void write(DataOutput dataOutput) throws IOException {dataOutput.writeUTF(name);dataOutput.writeInt(hot);}@Overridepublic void readFields(DataInput dataInput) throws IOException {name = dataInput.readUTF();hot = dataInput.readInt();}
}

自定义Mapper

package com.sort;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class SortMapper extends Mapper<LongWritable, Text, SortBean, NullWritable> {
@Override
protected void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException{String line=value.toString();   //拿到一行数据String[]fields=line.split(" ");  //切分成各个字段String name=fields[0];int hot=Integer.parseInt(fields[1]);SortBean bean = new SortBean();bean.setName(name);bean.setHot(hot);//封装数据为key-value进行输出context.write(bean, NullWritable.get());}
}

自定义Recude

package com.sort;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class SortReducer extends Reducer<SortBean, NullWritable, SortBean, NullWritable> {@Overrideprotected void reduce(SortBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {for (NullWritable value : values) {context.write(key, NullWritable.get());}}
}

自定义Drivrer

package com.sort;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class SortDrivere {public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {// 数据输入路径和输出路径args = new String[2];args[0] = "src/main/resources/input/test2.txt";args[1] = "src/main/resources/output";Configuration cfg = new Configuration();// 读取配置文件cfg.set("mapreduce.framework.name", "local");cfg.set("fs.defaultFS", "file:///");final FileSystem filesystem = FileSystem.get(cfg);if (filesystem.exists(new Path(args[0]))) {filesystem.delete(new Path(args[1]), true);}// 新建一个任务Job job = Job.getInstance(cfg);job.setJarByClass(SortDrivere.class);  // 设置主类job.setMapperClass(SortMapper.class);   // Mapperjob.setReducerClass(SortReducer.class);job.setMapOutputKeyClass(SortBean.class);job.setMapOutputValueClass(NullWritable.class);job.setOutputKeyClass(SortBean.class);job.setOutputValueClass(NullWritable.class);FileInputFormat.addInputPath(job, new Path(args[0]));        // 输入路径FileOutputFormat.setOutputPath(job, new Path(args[1]));        // 输出路径// 提交任务int ec = job.waitForCompletion(true) ? 0 : 1;System.exit(ec);}
}

更多推荐

Hadoop之电影评分全排序

本文发布于:2024-03-08 16:09:50,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1721394.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:评分   电影   Hadoop

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!