利用MRUnit进行MapReduce单元测试

一、MRUnit简介

官网地址：https://mrunit.apache.org/

Apache MRUnit is a Java library that helps developers unit test Apache Hadoop map reduce jobs.

MRUnit是一个帮助开发者测试map reduce 作业的单元测试库。

二、代码示例

以maven项目为例，演示如何使用MRUnit进行MR单元测试。

关于示例的讲解，请参考：https://cwiki.apache.org/confluence/display/MRUNIT/MRUnit+Tutorial

项目pom.xml文件，重点关注mrunit，mockito-all, junit三个类库的引入，MRUnit是利用mockito+junit针对MR程序进行模拟测试。

MR单元测试类

package mrunit;

import static org.junit.Assert.assertEquals;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import mrunit.SMSCDRMapper.CDRCounter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.Before;
import org.junit.Test;

/**
 * 测试数据说明 CDRID;CDRType;Phone1;Phone2;SMS Status Code
 * 655209;1;796764372490213;804422938115889;6
 * 353415;0;356857119806206;287572231184798;4
 * 835699;1;252280313968413;889717902341635;0
 *
 */
public class SMSCDRMapperReducerTest {
	Configuration conf = new Configuration();
	MapDriver<LongWritable, Text, Text, IntWritable> mapDriver;
	ReduceDriver<Text, IntWritable, Text, IntWritable> reduceDriver;
	MapReduceDriver<LongWritable, Text, Text, IntWritable, Text, IntWritable> mapReduceDriver;

	@Before
	public void setUp() {

		//测试mapreduce
		SMSCDRMapper mapper = new SMSCDRMapper();
		SMSCDRReducer reducer = new SMSCDRReducer();
		mapDriver = MapDriver.newMapDriver(mapper);
		reduceDriver = ReduceDriver.newReduceDriver(reducer);
		mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer);

		//测试配置参数
		mapDriver.setConfiguration(conf);
		conf.set("myParameter1", "20");
		conf.set("myParameter2", "23");

	}

	@Test
	public void testMapper() throws IOException {
		mapDriver.withInput(new LongWritable(), new Text(
				"655209;1;796764372490213;804422938115889;6"));
		mapDriver.withOutput(new Text("6"), new IntWritable(1));
		mapDriver.runTest();
	}

	@Test
	public void testReducer() throws IOException {
		List<IntWritable> values = new ArrayList<IntWritable>();
		values.add(new IntWritable(1));
		values.add(new IntWritable(1));
		reduceDriver.withInput(new Text("6"), values);
		reduceDriver.withOutput(new Text("6"), new IntWritable(2));
		reduceDriver.runTest();
	}

	@Test
	public void testMapperReducer() throws IOException {
		mapReduceDriver.withInput(new LongWritable(), new Text(
				"655209;1;796764372490213;804422938115889;6"));
		mapReduceDriver.withOutput(new Text("6"), new IntWritable(1));
	}

	@Test
	public void testMapperCount() throws IOException {
		mapDriver.withInput(new LongWritable(), new Text(
				"655209;0;796764372490213;804422938115889;6"));
		// mapDriver.withOutput(new Text("6"), new IntWritable(1));
		mapDriver.runTest();
		assertEquals("Expected 1 counter increment", 1, mapDriver.getCounters()
				.findCounter(CDRCounter.NonSMSCDR).getValue());
	}
}

Mapper类

package mrunit;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class SMSCDRMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

	  private Text status = new Text();
	  private final static IntWritable addOne = new IntWritable(1);

	  static enum CDRCounter {
			NonSMSCDR;
		};

	  /**
	   * Returns the SMS status code and its count
	   */
	  protected void map(LongWritable key, Text value, Context context)
	      throws java.io.IOException, InterruptedException {

	    //655209;1;796764372490213;804422938115889;6 is the Sample record format
	    String[] line = value.toString().split(";");
	    // If record is of SMS CDR
	    if (Integer.parseInt(line[1]) == 1) {
	      status.set(line[4]);
	      context.write(status, addOne);
	    }else{
	    	// CDR record is not of type SMS so increment the counter
			context.getCounter(CDRCounter.NonSMSCDR).increment(1);
		}
	  }
	}

Reducer类

package mrunit;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class SMSCDRReducer extends
  Reducer<Text, IntWritable, Text, IntWritable> {

  protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws java.io.IOException, InterruptedException {
    int sum = 0;
    for (IntWritable value : values) {
      sum += value.get();
    }
    context.write(key, new IntWritable(sum));
  }
}

项目的pom.xml文件

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>com.cdh</groupId>
	<artifactId>cdh-test</artifactId>
	<version>SNAPSHOT-1.0.0</version>
	<packaging>jar</packaging>

	<name>cdh-test</name>
	<url>http://maven.apache.org</url>

	<properties>
		<hadoop.version>2.0.0-mr1-cdh4.4.0</hadoop.version>
		<hbase.version>0.94.6-cdh4.4.0</hbase.version>
		<project.build.sourceEncoding>utf-8</project.build.sourceEncoding>
		<maven.compiler.encoding>utf-8</maven.compiler.encoding>
	</properties>

	<build>
		<pluginManagement>
			<plugins>
				<plugin>
					<groupId>org.apache.maven.plugins</groupId>
					<artifactId>maven-compiler-plugin</artifactId>
					<version>3.1</version>
					<configuration>
						<encoding>utf-8</encoding>
						<source>1.6</source>
						<target>1.6</target>
					</configuration>
				</plugin>
			</plugins>
		</pluginManagement>

		<plugins>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-shade-plugin</artifactId>
				<version>2.1</version>
				<executions>
					<execution>
						<phase>package</phase>
						<goals>
							<goal>shade</goal>
						</goals>
					</execution>
				</executions>
			</plugin>

			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-eclipse-plugin</artifactId>
				<version>2.9</version>
				<configuration>
					<buildOutputDirectory>eclipse-classes</buildOutputDirectory>
					<downloadSources>true</downloadSources>
					<downloadJavadocs>false</downloadJavadocs>
				</configuration>
			</plugin>
		</plugins>
	</build>

	<dependencies>
		<dependency>
			<groupId>jdk.tools</groupId>
			<artifactId>jdk.tools</artifactId>
			<version>1.6</version>
			<scope>system</scope>
			<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
		</dependency>

		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-client</artifactId>
			<version>${hadoop.version}</version>
			<scope>provided</scope>
			<exclusions>
				<exclusion>
					<artifactId>mockito-all</artifactId>
					<groupId>org.mockito</groupId>
				</exclusion>
			</exclusions>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-mapreduce-client-core</artifactId>
			<version>2.0.0-cdh4.4.0</version>
			<exclusions>
				<exclusion>
					<artifactId>
						jersey-test-framework-grizzly2
					</artifactId>
					<groupId>
						com.sun.jersey.jersey-test-framework
					</groupId>
				</exclusion>
				<exclusion>
					<artifactId>netty</artifactId>
					<groupId>org.jboss.netty</groupId>
				</exclusion>
			</exclusions>
			<scope>provided</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.hbase</groupId>
			<artifactId>hbase</artifactId>
			<version>${hbase.version}</version>
			<scope>provided</scope>
		</dependency>
		<dependency>
			<groupId>com.hadoop.gplcompression</groupId>
			<artifactId>hadoop-lzo-cdh4</artifactId>
			<version>0.4.15-gplextras</version>
		</dependency>

		<dependency>
			<groupId>org.hsqldb</groupId>
			<artifactId>hsqldb</artifactId>
			<version>2.2.9</version>
		</dependency>

		<dependency>
			<groupId>redis.clients</groupId>
			<artifactId>jedis</artifactId>
			<version>2.5.1</version>
		</dependency>

		<!-- junit test -->
		<dependency>
			<groupId>org.apache.mrunit</groupId>
			<artifactId>mrunit</artifactId>
			<version>1.1.0</version>
			<classifier>hadoop2</classifier>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.mockito</groupId>
			<artifactId>mockito-all</artifactId>
			<version>1.9.5</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.10</version>
			<scope>test</scope>
		</dependency>
	</dependencies>

	<repositories>
		<repository>
			<id>cloudera</id>
			<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
			<releases>
				<enabled>true</enabled>
			</releases>
			<snapshots>
				<enabled>false</enabled>
			</snapshots>
		</repository>
	</repositories>
</project>

时间： 2024-09-05 04:56:30

利用MRUnit进行MapReduce单元测试的相关文章

Hadoop专业解决方案-第5章开发可靠的MapReduce应用

本章主要内容: 1.利用MRUnit创建MapReduce的单元测试. 2.MapReduce应用的本地实例. 3.理解MapReduce的调试. 4.利用MapReduce防御式程序设计. 在WOX.COM下载本章源代码本章在wox.com网站的源码可以在www.wiley.com/go/prohadoopsolutions的源码下载标签找到.第五章的源码根据本章的内容各自分别命名放在了第五章下载目录中. 到目前为止,你应该对MapReduce体系结构,应用程序设计,和定制MapReduce

Hadoop：用MRUnit做单元测试

引言借年底盛宴品鉴之风,继续抒我Hadoop之情,本篇文章介绍如何对Hadoop的MapReduce进行单元测试.MapReduce的开发周期差不多是这样:编写mapper和reducer.编译.打包.提交作业和结果检索等,这个过程比较繁琐,一旦提交到分布式环境出了问题要定位调试,重复这样的过程实在无趣,因此先对MapReduce做单元测试,消除明显的代码bug尤为必要. MRUnit简介 MRUnit是一款由Couldera公司开发的专门针对Hadoop中编写MapReduce单元测试的框架

求一份vs2008单元测试代码实例

问题描述 RT简单的就行,能说明单元测试问题就行,谢谢!!work_51@126.com 解决方案解决方案二: 解决方案三:我利用vs2010创建里单元测试源代码publicstaticintAdd(intn,intm){returnn+m;} 单元测试生成代码[TestMethod()]publicvoidAddTest(){intn=0;//TODO:初始化为适当的值intm=0;//TODO:初始化为适当的值intexpected=0;//TODO:初始化为适当的值intactual;a

Hadoop MapReduce开发最佳实践

前言本文是Hadoop最佳实践系列第二篇,上一篇为<Hadoop管理员的十个最佳实践>. MapRuduce开发对于大多数程序员都会觉得略显复杂,运行一个WordCount(Hadoop中hello word程序)不仅要熟悉MapRuduce模型,还要了解Linux命令(尽管有Cygwin,但在Windows下运行MapRuduce仍然很麻烦),此外还要学习程序的打包.部署.提交job.调试等技能,这足以让很多学习者望而退步. 所以如何提高MapReduce开发效率便成了大家很关注的问题.

Apache Gora介绍

介绍 Gora是一个开源的ORM框架,主要为大数据提供内存数据模型与数据的持久化.目前Gora支持对于列数据.key-value数据,文档数据与RDBMS数据的存储,还支持使用Apache Hadoop来对对大数据进行分析特点虽然目前市面上有很多不错的关系数据库的ORM框架,但是基于数据模型的框架如JDO还是有一些不足,如对于列数据模型的存储与持久化.Gora正好弥补了这个问题,它能使用户很容易对大数据时行内存建模与持久化,而且支持Hadoop来对大

对Nutch 2.1抽象存储层的一些看法

Nutch2.1通过gora对存储层进行了扩展,可以选择使用HBase.Accumulo.Cassandra .MySQL .DataFileAvroStore.AvroStore中任何一种来存储数据,但其中一些并不成熟.在我的反复测试中发现,整体来说,Nutch2.1比起Nutch1.6的性能要差得多,最重要的是不能长期稳定运行.Nutch1.6使用Hadoop Distributed File System (HDFS)来作为存储,稳定可靠.下面分别说说每一种存储方式的情况: HBase(c

MongoDB集成Hadoop进行统计计算

MongoDB本身可以做一些简单的统计工作,包括其内置的基于Javascript的 MapReduce框架,也包括在MongoDB 2.2版本中引入的新的统计框架.除此之外, MongoDB 还提供了对外部统计工具的接口,这就是本文要说的MongoDB-Hadoop的数据中间件.文章内容来源于MongoDB官方博客. 原理图解 MongoDB与Hadoop相结合的方式如下图所未,MongoDB作为数据源存储以及数据结果存储.而具体的计算过程在Hadoop中进行. 这一套处理流程,允许我们通过

使用Mina框架开发 QQ Android 客户端（1） Mina初级教程

Apache MINA是一个网络应用程序框架,用来帮助用户简单地开发高性能和高可靠性的网络应用程序.它提供了一个通过Java NIO在不同的传输例如TCP/IP和UDP/IP上抽象的事件驱动的异步API. Apache MINA 也称为: ● NIO 框架库 ● 客户端服务器框架库 ● 一个网络套接字库 MINA虽然简单但是仍然提供了全功能的网络应用程序框架: ● 为不同的传输类型提供了统一的API: ○ 通过Java NIO提供TCP/IP 和 UDP/IP支持 ○ 通过RXTX提供串口通讯(

铁庵：NoSQL、RDS和大数据异构融合实战，详解PostgreSQL FDW功能原理

直播视频: (点击图片查看视频) 幻灯片下载地址:https://oss-cn-hangzhou.aliyuncs.com/yqfiles/e95acb3cc257c377ad8df5e944760638.pdf Why PostgreSQL ? 图一 PostgreSQL的发展历程 PostgreSQL的前身为ingres Database,从1973年伯克利分校的Ingres项目至今,PostgreSQL现已经过了几十年的发展.相比较于广泛使用MySQL开源数据库,我们之所以对Postgr