Java MapReduce read data
- by Tatiana
Hi I am having following map-reduce code by which I am trying to read records from my database.
There's code:
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.mapred.lib.db.DBConfiguration;
import org.apache.hadoop.mapred.lib.db.DBInputFormat;
import org.apache.hadoop.mapred.lib.db.DBWritable;
import org.apache.hadoop.util.*;
import org.apache.hadoop.conf.*;
public class Connection extends Configured implements Tool {
public int run(String[] args) throws IOException {
JobConf conf = new JobConf(getConf(), Connection.class);
conf.setInputFormat(DBInputFormat.class);
DBConfiguration.configureDB(conf, "com.sun.java.util.jar.pack.Driver",
"jdbc:postgresql://localhost:5432/polyclinic", "postgres",
"12345");
String[] fields = { "name" };
DBInputFormat.setInput(conf, MyRecord.class, "doctors", null, null,
fields);
conf.setMapOutputKeyClass(LongWritable.class);
conf.setMapOutputValueClass(MyRecord.class);
conf.setOutputKeyClass(LongWritable.class);
conf.setOutputValueClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(conf, new Path(args[0]));
JobClient.runJob(conf);
return 0;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new Connection(), args);
System.exit(exitCode);
}
}
Class Mapper:
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class MyMapper extends MapReduceBase implements Mapper<LongWritable, MyRecord, Text, IntWritable> {
public void map(LongWritable key, MyRecord val, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
output.collect(new Text(val.name), new IntWritable(1));
}
}
Class Record:
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.lib.db.DBWritable;
class MyRecord implements Writable, DBWritable {
String name;
public void readFields(DataInput in) throws IOException {
this.name = Text.readString(in); }
public void readFields(ResultSet resultSet) throws SQLException {
this.name = resultSet.getString(1); }
public void write(DataOutput out) throws IOException {
}
public void write(PreparedStatement stmt) throws SQLException {
}
}
After this I got error:
WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
Can you give me any suggestion how to solve this problem?