import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class HdfsWriter extends Configured implements Tool {
public int run(String[] args) throws Exception {
//String localInputPath = args[0];
Path outputPath = new Path(args[0]); // ARGUMENT FOR OUTPUT_LOCATION
Configuration conf = getConf();
FileSystem fs = FileSystem.get(conf);
OutputStream os = fs.create(outputPath);
InputStream is = new BufferedInputStream(new FileInputStream("/home/acadgild/acadgild.txt")); //Data set is getting copied into input stream through buffer mechanism.
IOUtils.copyBytes(is, os, conf); // Copying the dataset from input stream to output stream
return 0;
}
public static void main(String[] args) throws Exception {
int returnCode = ToolRunner.run(new HdfsWriter(), args);
System.exit(returnCode);
}
}
需要将数据从本地移动到HDFS。
我从另一个博客获得的上述代码不起作用。谁可以帮我这个事。
我还需要使用MR解析Json并按DateTime分组并移至HDFS
最佳答案
有多种工具可用于从一个节点的本地复制到HDFS
关于java - 将JSON文件从本地复制到HDFS,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/46979546/