我尝试使用 Spring Data - Hadoop 来执行MR代码远程集群从我的本地机器的IDE
// Hadoop 1.1.2,Spring 3.2.4,Spring-Data-Hadoop 1.0.0
试用这些版本:
Hadoop 1.2.1,Spring 4.0.1,Spring-Data-Hadoop 2.0.2 strong>
applicationContext.xml :
<?xml version =1.0encoding =UTF-8?> < beans xmlns =www.springframework/schema/beans xmlns:xsi =www.w3/2001/XMLSchema-instancexmlns :hdp =www.springframework/schema/hadoop xmlns:context =www.springframework/schema/context xsi:schemaLocation = www.springframework/schema/beans www.springframework/schema/beans/spring-beans.xsd www.springframework/schema/hadoop http ://www.springframework/schema/hadoop/spring-hadoop.xsd www.springframework/schema/context www.springframework/schema/context/spring -context-3.2.xsd> < context:property-placeholder location =resources / hadoop.properties/>什么配置。我错过了吗?是否真的有可能使用Spring Data远程提交Hadoop作业而不创建jar等? 然后分离映射器和reducer类的工作,并在 applicationContext.xml 中完成以下更改:
<?xml version =1.0encoding =UTF-8?> < beans xmlns =www.springframework/schema/beans xmlns:xsi =www.w3/2001/XMLSchema-instancexmlns :util =www.springframework/schema/util xmlns:context =www.springframework/schema/context xmlns:hdp = www.springframework/schema/hadoopxmlns:batch =www.springframework/schema/batch xsi:schemaLocation = http:// www.springframework/schema/beans www.springframework/schema/beans/spring-beans.xsd www.springframework/schema/hadoop http:// www .springframework / schema / hadoop / spring-hadoop.xsd www.springframework/schema/context www.springframework/schema/context/spring-context.xsd www.springframework/schema/batch www.springframework/schema/batch/spring-batch.xsd www.springframework/ schema / util www.springframework/schema/util/spring-util-4.2.xsd\"> < context:property-placeholder location =classpath:application.properties/>I'm trying out Spring Data - Hadoop for executing the MR code on a remote cluster from my local machine's IDE
//Hadoop 1.1.2, Spring 3.2.4, Spring-Data-Hadoop 1.0.0
Tried with these versions :
Hadoop 1.2.1, Spring 4.0.1, Spring-Data-Hadoop 2.0.2
applicationContext.xml :
<?xml version="1.0" encoding="UTF-8"?> <beans xmlns="www.springframework/schema/beans" xmlns:xsi="www.w3/2001/XMLSchema-instance" xmlns:hdp="www.springframework/schema/hadoop" xmlns:context="www.springframework/schema/context" xsi:schemaLocation="www.springframework/schema/beans www.springframework/schema/beans/spring-beans.xsd www.springframework/schema/hadoop www.springframework/schema/hadoop/spring-hadoop.xsd www.springframework/schema/context www.springframework/schema/context/spring-context-3.2.xsd"> <context:property-placeholder location="resources/hadoop.properties" /> <hdp:configuration file-system-uri="${hd.fs}" job-tracker-uri="${hd.jobtracker.uri}"> </hdp:configuration> <hdp:job id="wc-job" mapper="com.hadoop.basics.WordCounter.WCMapper" reducer="com.hadoop.basics.WordCounter.WCReducer" input-path="${wordcount.input.path}" output-path="${wordcount.output.path}" user="bigdata"> </hdp:job> <hdp:job-runner id="myjobs-runner" job-ref="wc-job" run-at-startup="true" /> <hdp:resource-loader id="resourceLoader" uri="${hd.fs}" user="bigdata" /> </beans>WordCounter.java :
package com.hadoop.basics; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.springframework.context.support.AbstractApplicationContext; import org.springframework.context.support.ClassPathXmlApplicationContext; public class WordCounter { private static IntWritable one = new IntWritable(1); public static class WCMapper extends Mapper<Text, Text, Text, IntWritable> { @Override protected void map( Text key, Text value, org.apache.hadoop.mapreduce.Mapper<Text, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub StringTokenizer strTokenizer = new StringTokenizer(value.toString()); Text token = new Text(); while (strTokenizer.hasMoreTokens()) { token.set(strTokenizer.nextToken()); context.write(token, one); } } } public static class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable> { @Override protected void reduce( Text key, Iterable<IntWritable> values, org.apache.hadoop.mapreduce.Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub int sum = 0; for (IntWritable value : values) { sum += value.get(); } context.write(key, new IntWritable(sum)); } } public static void main(String[] args) { AbstractApplicationContext context = new ClassPathXmlApplicationContext( "applicationContext.xml", WordCounter.class); System.out.println("Word Count Application Running"); context.registerShutdownHook(); } }The output is :
Aug 23, 2013 11:07:48 AM org.springframework.context.support.AbstractApplicationContext prepareRefresh INFO: Refreshing org.springframework.context.support.ClassPathXmlApplicationContext@1815338: startup date [Fri Aug 23 11:07:48 IST 2013]; root of context hierarchy Aug 23, 2013 11:07:48 AM org.springframework.beans.factory.xml.XmlBeanDefinitionReader loadBeanDefinitions INFO: Loading XML bean definitions from class path resource [com/hadoop/basics/applicationContext.xml] Aug 23, 2013 11:07:48 AM org.springframework.core.io.support.PropertiesLoaderSupport loadProperties INFO: Loading properties file from class path resource [resources/hadoop.properties] Aug 23, 2013 11:07:48 AM org.springframework.beans.factory.support.DefaultListableBeanFactory preInstantiateSingletons INFO: Pre-instantiating singletons in org.springframework.beans.factory.support.DefaultListableBeanFactory@7c197e: defining beans [org.springframework.context.support.PropertySourcesPlaceholderConfigurer#0,hadoopConfiguration,wc-job,myjobs-runner,resourceLoader]; root of factory hierarchy Aug 23, 2013 11:07:49 AM org.springframework.data.hadoop.mapreduce.JobExecutor$2 run INFO: Starting job [wc-job] Aug 23, 2013 11:07:49 AM org.apache.hadoop.mapred.JobClient copyAndConfigureFiles WARNING: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String). Aug 23, 2013 11:07:49 AM org.apache.hadoop.mapreduce.lib.input.FileInputFormat listStatus INFO: Total input paths to process : 1 Aug 23, 2013 11:07:50 AM org.apache.hadoop.util.NativeCodeLoader <clinit> WARNING: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Aug 23, 2013 11:07:50 AM org.apache.hadoop.iopress.snappy.LoadSnappy <clinit> WARNING: Snappy native library not loaded Aug 23, 2013 11:07:52 AM org.apache.hadoop.mapred.JobClient monitorAndPrintJob INFO: Running job: job_201308231532_0002 Aug 23, 2013 11:07:53 AM org.apache.hadoop.mapred.JobClient monitorAndPrintJob INFO: map 0% reduce 0% Aug 23, 2013 11:08:12 AM org.apache.hadoop.mapred.JobClient monitorAndPrintJob INFO: Task Id : attempt_201308231532_0002_m_000000_0, Status : FAILED java.lang.RuntimeException: java.lang.ClassNotFoundException: com.hadoop.basics.WordCounter$WCMapper at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:849) at org.apache.hadoop.mapreduce.JobContext.getMapperClass(JobContext.java:199) at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:719) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370) at org.apache.hadoop.mapred.Child$4.run(Child.java:255) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1149) at org.apache.hadoop.mapred.Child.main(Child.java:249) Caused by: java.lang.ClassNotFoundException: com.hadoop.basics.WordCounter$WCMapper at java.URLClassLoader$1.run(URLClassLoader.java:366) at java.URLClassLoader$1.run(URLClassLoader.java:355) at java.security.AccessController.doPrivileged(Native Method) at java.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:423) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) at java.lang.ClassLoader.loadClass(ClassLoader.java:356) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:264) at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:802) at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:847) ... 8 more Aug 23, 2013 11:08:33 AM org.apache.hadoop.mapred.JobClient monitorAndPrintJob INFO: Task Id : attempt_201308231532_0002_m_000000_1, Status : FAILED java.lang.RuntimeException: java.lang.ClassNotFoundException: com.hadoop.basics.WordCounter$WCMapper at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:849) at org.apache.hadoop.mapreduce.JobContext.getMapperClass(JobContext.java:199) at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:719) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370) at org.apache.hadoop.mapred.Child$4.run(Child.java:255) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1149) at org.apache.hadoop.mapred.Child.main(Child.java:249) Caused by: java.lang.ClassNotFoundException: com.hadoop.basics.WordCounter$WCMapper at java.URLClassLoader$1.run(URLClassLoader.java:366) at java.URLClassLoader$1.run(URLClassLoader.java:355) at java.security.AccessController.doPrivileged(Native Method) at java.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:423) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) at java.lang.ClassLoader.loadClass(ClassLoader.java:356) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:264) at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:802) at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:847) ... 8 more Aug 23, 2013 11:08:51 AM org.apache.hadoop.mapred.JobClient monitorAndPrintJob INFO: Task Id : attempt_201308231532_0002_m_000000_2, Status : FAILED java.lang.RuntimeException: java.lang.ClassNotFoundException: com.hadoop.basics.WordCounter$WCMapper at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:849) at org.apache.hadoop.mapreduce.JobContext.getMapperClass(JobContext.java:199) at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:719) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370) at org.apache.hadoop.mapred.Child$4.run(Child.java:255) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1149) at org.apache.hadoop.mapred.Child.main(Child.java:249) Caused by: java.lang.ClassNotFoundException: com.hadoop.basics.WordCounter$WCMapper at java.URLClassLoader$1.run(URLClassLoader.java:366) at java.URLClassLoader$1.run(URLClassLoader.java:355) at java.security.AccessController.doPrivileged(Native Method) at java.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:423) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) at java.lang.ClassLoader.loadClass(ClassLoader.java:356) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:264) at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:802) at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:847) ... 8 more Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.JobClient monitorAndPrintJob INFO: Job complete: job_201308231532_0002 Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log INFO: Counters: 7 Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log INFO: Job Counters Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log INFO: SLOTS_MILLIS_MAPS=86688 Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log INFO: Total time spent by all reduces waiting after reserving slots (ms)=0 Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log INFO: Total time spent by all maps waiting after reserving slots (ms)=0 Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log INFO: Launched map tasks=4 Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log INFO: Data-local map tasks=4 Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log INFO: SLOTS_MILLIS_REDUCES=0 Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log INFO: Failed map tasks=1 Aug 23, 2013 11:09:24 AM org.springframework.data.hadoop.mapreduce.JobExecutor$2 run INFO: Completed job [wc-job] Aug 23, 2013 11:09:24 AM org.springframework.beans.factory.support.DefaultSingletonBeanRegistry destroySingletons INFO: Destroying singletons in org.springframework.beans.factory.support.DefaultListableBeanFactory@7c197e: defining beans [org.springframework.context.support.PropertySourcesPlaceholderConfigurer#0,hadoopConfiguration,wc-job,myjobs-runner,resourceLoader]; root of factory hierarchy Exception in thread "main" org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'myjobs-runner': Invocation of init method failed; nested exception is java.lang.IllegalStateException: Job wc-job] failed to start; status=FAILED at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1482) at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.doCreateBean(AbstractAutowireCapableBeanFactory.java:521) at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.createBean(AbstractAutowireCapableBeanFactory.java:458) at org.springframework.beans.factory.support.AbstractBeanFactory$1.getObject(AbstractBeanFactory.java:295) at org.springframework.beans.factory.support.DefaultSingletonBeanRegistry.getSingleton(DefaultSingletonBeanRegistry.java:223) at org.springframework.beans.factory.support.AbstractBeanFactory.doGetBean(AbstractBeanFactory.java:292) at org.springframework.beans.factory.support.AbstractBeanFactory.getBean(AbstractBeanFactory.java:194) at org.springframework.beans.factory.support.DefaultListableBeanFactory.preInstantiateSingletons(DefaultListableBeanFactory.java:628) at org.springframework.context.support.AbstractApplicationContext.finishBeanFactoryInitialization(AbstractApplicationContext.java:932) at org.springframework.context.support.AbstractApplicationContext.refresh(AbstractApplicationContext.java:479) at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:197) at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:172) at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:158) at com.hadoop.basics.WordCounter.main(WordCounter.java:58) Caused by: java.lang.IllegalStateException: Job wc-job] failed to start; status=FAILED at org.springframework.data.hadoop.mapreduce.JobExecutor$2.run(JobExecutor.java:219) at org.springframework.core.task.SyncTaskExecutor.execute(SyncTaskExecutor.java:49) at org.springframework.data.hadoop.mapreduce.JobExecutor.startJobs(JobExecutor.java:168) at org.springframework.data.hadoop.mapreduce.JobExecutor.startJobs(JobExecutor.java:160) at org.springframework.data.hadoop.mapreduce.JobRunner.call(JobRunner.java:52) at org.springframework.data.hadoop.mapreduce.JobRunner.afterPropertiesSet(JobRunner.java:44) at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.invokeInitMethods(AbstractAutowireCapableBeanFactory.java:1541) at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1479) ... 13 moreWhat config. have I missed? Is it really possible to submit a Hadoop job remotely using Spring Data without creation of a jar etc. ?
解决方案I was getting the same issue then separating the mapper and reducer class works and the following changes were done in applicationContext.xml:
<?xml version="1.0" encoding="UTF-8"?> <beans xmlns="www.springframework/schema/beans" xmlns:xsi="www.w3/2001/XMLSchema-instance" xmlns:util="www.springframework/schema/util" xmlns:context="www.springframework/schema/context" xmlns:hdp="www.springframework/schema/hadoop" xmlns:batch="www.springframework/schema/batch" xsi:schemaLocation=" www.springframework/schema/beans www.springframework/schema/beans/spring-beans.xsd www.springframework/schema/hadoop www.springframework/schema/hadoop/spring-hadoop.xsd www.springframework/schema/context www.springframework/schema/context/spring-context.xsd www.springframework/schema/batch www.springframework/schema/batch/spring-batch.xsd www.springframework/schema/util www.springframework/schema/util/spring-util-4.2.xsd"> <context:property-placeholder location="classpath:application.properties" /> <hdp:configuration namenode-principal="hdfs://xx.yy" rm-manager-uri="xx.yy" security-method="kerb" user-keytab="location" rm-manager-principal="username" user-principal="username"> fs.default.name=${fs.default.name} mapred.job.tracker=${mapred.job.tracker} </hdp:configuration> <hdp:job id="wordCountJobId" input-path="${input.path}" output-path="${output.path}" jar-by-class="com.xx.poc.Application" mapper="com.xx.poc.Map" reducer="com.xx.poc.Reduce" /> <hdp:job-runner id="wordCountJobRunner" job-ref="wordCountJobId" run-at-startup="true" /> </beans>
更多推荐
作业提交后的ClassNotFoundException
发布评论