hadoopMapReducejava示例

wordcount工作流程
input-> 拆分Split->映射map->派發(fā)Shuffle->縮減reduce->output
hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar wordcount 10803060234.txt /output

成都創(chuàng)新互聯(lián)是一家集網(wǎng)站建設(shè),北鎮(zhèn)企業(yè)網(wǎng)站建設(shè),北鎮(zhèn)品牌網(wǎng)站建設(shè),網(wǎng)站定制,北鎮(zhèn)網(wǎng)站建設(shè)報(bào)價(jià),網(wǎng)絡(luò)營銷,網(wǎng)絡(luò)優(yōu)化,北鎮(zhèn)網(wǎng)站推廣為一體的創(chuàng)新建站企業(yè),幫助傳統(tǒng)企業(yè)提升企業(yè)形象加強(qiáng)企業(yè)競爭力??沙浞譂M足這一群體相比中小企業(yè)更為豐富、高端、多元的互聯(lián)網(wǎng)需求。同時(shí)我們時(shí)刻保持專業(yè)、時(shí)尚、前沿,時(shí)刻以成就客戶成長自我,堅(jiān)持不斷學(xué)習(xí)、思考、沉淀、凈化自己,讓我們?yōu)楦嗟钠髽I(yè)打造出實(shí)用型網(wǎng)站。

package wordcount;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class Test {

public Test() {
    // TODO Auto-generated constructor stub
}

public static void main(String[] args) throws Exception {
    // TODO Auto-generated method stub

    Configuration conf = new Configuration();
    conf.set("fs.defaultFS","hdfs://172.26.19.40:9000");
    conf.set("mapreduce.job.jar", "target/wc.jar");
    conf.set("mapreduce.framework.name", "yarn");
    conf.set("yarn.resourcemanager.hostname", "hmaster");
    conf.set("mapreduce.app-submission.cross-platform", "true");
    Job job = Job.getInstance(conf);
    job.setMapperClass(WordMapper.class);
    job.setReducerClass(WordReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    FileInputFormat.setInputPaths(job, "");
    FileOutputFormat.setOutputPath(job, new Path(""));

    job.waitForCompletion(true);
}

}

package wordcount;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WordMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
        throws IOException, InterruptedException {
    String lineValue = value.toString();
    String[] words = lineValue.split(" ");
    IntWritable cIntWritable = new IntWritable(1);
    for(String word : words) {
        context.write(new Text(word), cIntWritable);
    }
}

}

package wordcount;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WordReducer extends Reducer<Text, IntWritable, Text, LongWritable> {

@Override
protected void reduce(Text key, Iterable<IntWritable> values,
        Reducer<Text, IntWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {

     Long tmpCount = 0L;
     for(IntWritable value : values) {
         tmpCount = tmpCount + value.get();
     }

     context.write(key, new LongWritable(tmpCount));

}

}

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.skcc</groupId>
<artifactId>wordcount</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>wordcount</name>
<description>count the word</description>

<properties>
    <project.build.sourceencoding>UTF-8</project.build.sourceencoding>
    <hadoop.version>2.7.3</hadoop.version>
</properties>
<dependencies>
    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.12</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>${hadoop.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>${hadoop.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-hdfs</artifactId>
        <version>${hadoop.version}</version>
    </dependency>
</dependencies>

</project>

網(wǎng)站欄目:hadoopMapReducejava示例
URL網(wǎng)址:http://bm7419.com/article34/ipdese.html

成都網(wǎng)站建設(shè)公司_創(chuàng)新互聯(lián),為您提供電子商務(wù)、全網(wǎng)營銷推廣網(wǎng)站設(shè)計(jì)公司、搜索引擎優(yōu)化、靜態(tài)網(wǎng)站標(biāo)簽優(yōu)化

廣告

聲明:本網(wǎng)站發(fā)布的內(nèi)容(圖片、視頻和文字)以用戶投稿、用戶轉(zhuǎn)載內(nèi)容為主,如果涉及侵權(quán)請盡快告知,我們將會(huì)在第一時(shí)間刪除。文章觀點(diǎn)不代表本網(wǎng)站立場,如需處理請聯(lián)系客服。電話:028-86922220;郵箱:631063699@qq.com。內(nèi)容未經(jīng)允許不得轉(zhuǎn)載,或轉(zhuǎn)載時(shí)需注明來源: 創(chuàng)新互聯(lián)

網(wǎng)站建設(shè)網(wǎng)站維護(hù)公司