hadoop - Map Reduce 作业中的 NullPointerException

标签 hadoop mapreduce hbase

我正在尝试使用 java api 批量上传到 Hbase。 调用 Mapper 类时出现以下异常。 这是我在调试我的驱动程序代码时发现的。当调试器试图命中映射器代码时会出现此错误。 我的 Hfile 已创建但无法加载到 Hbase

16/08/10 04:09:56 INFO mapred.Task:  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@7363c839
16/08/10 04:09:56 INFO mapred.MapTask: Processing split: file:/home/cloudera/su.txt:0+50
16/08/10 04:09:56 INFO mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
16/08/10 04:09:56 INFO mapred.MapTask: io.sort.mb = 100
16/08/10 04:09:57 INFO mapred.MapTask: data buffer = 79691776/99614720
16/08/10 04:09:57 INFO mapred.MapTask: record buffer = 262144/327680
16/08/10 04:09:57 INFO mapred.LocalJobRunner: Map task executor complete.
16/08/10 04:09:57 WARN mapred.LocalJobRunner: job_local930363008_0001
java.lang.Exception: java.lang.NullPointerException
    at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:406)
Caused by: java.lang.NullPointerException
    at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:843)
    at org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:376)
    at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:85)
    at org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:584)
    at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:656)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:330)
    at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:268)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
    at java.util.concurrent.FutureTask.run(FutureTask.java:262)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
    at java.lang.Thread.run(Thread.java:745)
16/08/10 04:09:57 INFO mapred.JobClient:  map 0% reduce 0%
16/08/10 04:09:57 INFO mapred.JobClient: Job complete: job_local930363008_0001
16/08/10 04:09:57 INFO mapred.JobClient: Counters: 0

这是我执行该操作的代码

package com.sample.bulkload.hbase;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class HBaseBulkLoad {


    public static class BulkLoadMap extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] values = value.toString().split(",");
            String rowKey = values[0];

            // ImmutableBytesWritable HKey = new
            // ImmutableBytesWritable(put.getRow());
            // context.write(HKey, put);
            System.out.println("Entered into Mapper Method");
            Put HPut = new Put(Bytes.toBytes(rowKey));
            HPut.add(Bytes.toBytes("personalDetails"), Bytes.toBytes("first_name"), Bytes.toBytes(values[1]));
            HPut.add(Bytes.toBytes("personalDetails"), Bytes.toBytes("last_name"), Bytes.toBytes(values[2]));
            HPut.add(Bytes.toBytes("contactDetails"), Bytes.toBytes("email"), Bytes.toBytes(values[3]));
            HPut.add(Bytes.toBytes("contactDetails"), Bytes.toBytes("city"), Bytes.toBytes(values[4]));
            context.write(new ImmutableBytesWritable(Bytes.toBytes(rowKey)), HPut);
            System.out.println("Written into Context");
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", "localhost");
        conf.set("hbase.zookeeper.property.clientport", "2181");

        Job job = new Job(conf, "HBase_Bulk_loader");
        HTable hTable = new HTable(conf, args[2]);

        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);

        job.setOutputKeyClass(ImmutableBytesWritable.class);
        job.setOutputValueClass(Put.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(HFileOutputFormat.class);

        job.setJarByClass(HBaseBulkLoad.class);

        job.setMapperClass(HBaseBulkLoad.BulkLoadMap.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        HFileOutputFormat.configureIncrementalLoad(job, hTable);
        job.waitForCompletion(true);
    }
}

最佳答案

Mapper输出键值类需要继承自Writable接口(interface)

关于hadoop - Map Reduce 作业中的 NullPointerException,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/38871945/

相关文章:

hadoop - 对于某些配置单元查询,我看不到o/p?

hadoop - 最多 200 列的行交互式查询的最佳大数据解决方案是什么?

java - 如何从 Hadoop java API 获取特定的 MR 计数器?

workflow - 仅在选定的节点上运行某些 Hadoop 作业,而不在其他节点上运行,使用 Oozie 管理流程

java - 如何在 pig 文件中使用 .jar

java - Hadoop MapReduce 程序在 Eclipse 中运行良好,但在导出到 .jar 文件时运行不佳

java - 如果某些字段的值已知,则在java中查询hbase

mysql - HBase表结构转MySQL

hadoop - 从 SQL Server 导入,数据类型未正确转换

Mongodb 分组数据 - mapReduce 还是聚合?