问题描述
15/05/1511:45:21INFOTaskSetManager:Startingtask0.0instage1.0(TID1,localhost,ANY,1327bytes)15/05/1511:45:21INFOExecutor:Runningtask0.0instage1.0(TID1)15/05/1511:45:21INFOHadoopRDD:Inputsplit:hdfs://192.168.105.226:9000/LesterDemoTest/Data/draft/patent.db:0+4843715/05/1511:45:22ERRORExecutor:Exceptionintask0.0instage1.0(TID1)org.elasticsearch.hadoop.rest.EsHadoopTransportException:InvalidtargetURIfe80:0:0:0:20c:29ff:feac:b7e2%2:9200atorg.elasticsearch.hadoop.rest.commonshttp.CommonsHttpTransport.<init>(CommonsHttpTransport.java:173)atorg.elasticsearch.hadoop.rest.NetworkClient.selectNextNode(NetworkClient.java:86)atorg.elasticsearch.hadoop.rest.NetworkClient.<init>(NetworkClient.java:69)atorg.elasticsearch.hadoop.rest.RestClient.<init>(RestClient.java:80)atorg.elasticsearch.hadoop.rest.InitializationUtils.discoverEsVersion(InitializationUtils.java:81)atorg.elasticsearch.hadoop.rest.RestService.createWriter(RestService.java:348)atorg.elasticsearch.spark.rdd.EsRDDWriter.write(EsRDDWriter.scala:31)atorg.elasticsearch.spark.rdd.EsSpark$$anonfun$saveToEs$1.apply(EsSpark.scala:34)atorg.elasticsearch.spark.rdd.EsSpark$$anonfun$saveToEs$1.apply(EsSpark.scala:34)atorg.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)atorg.apache.spark.scheduler.Task.run(Task.scala:64)atorg.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)atjava.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)atjava.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)atjava.lang.Thread.run(Thread.java:722)Causedby:org.apache.commons.httpclient.URIException:invalidportnumberatorg.apache.commons.httpclient.URI.parseAuthority(URI.java:2248)atorg.apache.commons.httpclient.URI.parseUriReference(URI.java:1978)atorg.apache.commons.httpclient.URI.<init>(URI.java:167)atorg.elasticsearch.hadoop.rest.commonshttp.CommonsHttpTransport.<init>(CommonsHttpTransport.java:171)...14more15/05/1511:45:22WARNTaskSetManager:Losttask0.0instage1.0(TID1,localhost):org.elasticsearch.hadoop.rest.EsHadoopTransportException:InvalidtargetURIfe80:0:0:0:20c:29ff:feac:b7e2%2:9200atorg.elasticsearch.hadoop.rest.commonshttp.CommonsHttpTransport.<init>(CommonsHttpTransport.java:173)atorg.elasticsearch.hadoop.rest.NetworkClient.selectNextNode(NetworkClient.java:86)atorg.elasticsearch.hadoop.rest.NetworkClient.<init>(NetworkClient.java:69)atorg.elasticsearch.hadoop.rest.RestClient.<init>(RestClient.java:80)atorg.elasticsearch.hadoop.rest.InitializationUtils.discoverEsVersion(InitializationUtils.java:81)atorg.elasticsearch.hadoop.rest.RestService.createWriter(RestService.java:348)atorg.elasticsearch.spark.rdd.EsRDDWriter.write(EsRDDWriter.scala:31)atorg.elasticsearch.spark.rdd.EsSpark$$anonfun$saveToEs$1.apply(EsSpark.scala:34)atorg.elasticsearch.spark.rdd.EsSpark$$anonfun$saveToEs$1.apply(EsSpark.scala:34)atorg.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)atorg.apache.spark.scheduler.Task.run(Task.scala:64)atorg.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)atjava.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)atjava.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)atjava.lang.Thread.run(Thread.java:722)Causedby:org.apache.commons.httpclient.URIException:invalidportnumberatorg.apache.commons.httpclient.URI.parseAuthority(URI.java:2248)atorg.apache.commons.httpclient.URI.parseUriReference(URI.java:1978)atorg.apache.commons.httpclient.URI.<init>(URI.java:167)atorg.elasticsearch.hadoop.rest.commonshttp.CommonsHttpTransport.<init>(CommonsHttpTransport.java:171)...14more15/05/1511:45:22ERRORTaskSetManager:Task0instage1.0failed1times;abortingjob15/05/1511:45:22INFOTaskSchedulerImpl:RemovedTaskSet1.0,whosetaskshaveallcompleted,frompool15/05/1511:45:22INFOTaskSchedulerImpl:Cancellingstage115/05/1511:45:22INFODAGScheduler:Job1failed:runJobatEsSpark.scala:34,took0.653925sExceptioninthread"main"org.apache.spark.SparkException:Jobabortedduetostagefailure:Task0instage1.0failed1times,mostrecentfailure:Losttask0.0instage1.0(TID1,localhost):org.elasticsearch.hadoop.rest.EsHadoopTransportException:InvalidtargetURIfe80:0:0:0:20c:29ff:feac:b7e2%2:9200atorg.elasticsearch.hadoop.rest.commonshttp.CommonsHttpTransport.<init>(CommonsHttpTransport.java:173)atorg.elasticsearch.hadoop.rest.NetworkClient.selectNextNode(NetworkClient.java:86)atorg.elasticsearch.hadoop.rest.NetworkClient.<init>(NetworkClient.java:69)atorg.elasticsearch.hadoop.rest.RestClient.<init>(RestClient.java:80)atorg.elasticsearch.hadoop.rest.InitializationUtils.discoverEsVersion(InitializationUtils.java:81)atorg.elasticsearch.hadoop.rest.RestService.createWriter(RestService.java:348)atorg.elasticsearch.spark.rdd.EsRDDWriter.write(EsRDDWriter.scala:31)atorg.elasticsearch.spark.rdd.EsSpark$$anonfun$saveToEs$1.apply(EsSpark.scala:34)atorg.elasticsearch.spark.rdd.EsSpark$$anonfun$saveToEs$1.apply(EsSpark.scala:34)atorg.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)atorg.apache.spark.scheduler.Task.run(Task.scala:64)atorg.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)atjava.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)atjava.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)atjava.lang.Thread.run(Thread.java:722)Causedby:org.apache.commons.httpclient.URIException:invalidportnumberatorg.apache.commons.httpclient.URI.parseAuthority(URI.java:2248)atorg.apache.commons.httpclient.URI.parseUriReference(URI.java:1978)atorg.apache.commons.httpclient.URI.<init>(URI.java:167)atorg.elasticsearch.hadoop.rest.commonshttp.CommonsHttpTransport.<init>(CommonsHttpTransport.java:171)...14moreDriverstacktrace:atorg.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)atorg.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)atorg.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)atscala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)atscala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)atorg.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)atorg.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693)atorg.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693)atscala.Option.foreach(Option.scala:236)atorg.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:693)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1393)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)atorg.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
解决方案
解决方案二:
主要的问题就是:org.elasticsearch.hadoop.rest.EsHadoopTransportException:InvalidtargetURIfe80:0:0:0:20c:29ff:feac:b7e2%2:9200在构造sparkConf设置的es.nodes,在spark运行时替换成了fe80:0:0:0:20c:29ff:feac:b7e2%2。
解决方案三:
已经解决,禁用掉es所在机器的ipv6就可以了