Java MapReduce read data

Posted by Tatiana on Stack Overflow See other posts from Stack Overflow or by Tatiana
Published on 2013-10-31T15:45:58Z Indexed on 2013/10/31 15:53 UTC
Read the original article Hit count: 282

Filed under:
|

Hi I am having following map-reduce code by which I am trying to read records from my database.

There's code:

import java.io.*;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.mapred.lib.db.DBConfiguration;
import org.apache.hadoop.mapred.lib.db.DBInputFormat;
import org.apache.hadoop.mapred.lib.db.DBWritable;
import org.apache.hadoop.util.*;
import org.apache.hadoop.conf.*;

public class Connection extends Configured implements Tool {

public int run(String[] args) throws IOException {

    JobConf conf = new JobConf(getConf(), Connection.class);
    conf.setInputFormat(DBInputFormat.class);
    DBConfiguration.configureDB(conf, "com.sun.java.util.jar.pack.Driver",
            "jdbc:postgresql://localhost:5432/polyclinic", "postgres",
            "12345");
    String[] fields = { "name" };
    DBInputFormat.setInput(conf, MyRecord.class, "doctors", null, null,
            fields);

    conf.setMapOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(MyRecord.class);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(TextOutputFormat.class);

    TextOutputFormat.setOutputPath(conf, new Path(args[0]));

    JobClient.runJob(conf);

    return 0;
}

public static void main(String[] args) throws Exception {
    int exitCode = ToolRunner.run(new Connection(), args);
    System.exit(exitCode);
}

}

Class Mapper:

import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper; 
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

public class MyMapper extends MapReduceBase implements Mapper<LongWritable, MyRecord, Text, IntWritable> { 
public void map(LongWritable key, MyRecord val, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { 
    output.collect(new Text(val.name), new IntWritable(1));
}

}

Class Record:

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable; 
import org.apache.hadoop.mapred.lib.db.DBWritable;

class MyRecord implements Writable, DBWritable { 
String name; 
public void readFields(DataInput in) throws IOException { 
    this.name = Text.readString(in); } 
public void readFields(ResultSet resultSet) throws SQLException { 
    this.name = resultSet.getString(1); } 

public void write(DataOutput out) throws IOException { 
    } 
public void write(PreparedStatement stmt) throws SQLException { 
    } 
} 

After this I got error:

WARN mapred.JobClient: No job jar file set.  User classes may not be found. See JobConf(Class) or JobConf#setJar(String).

Can you give me any suggestion how to solve this problem?

© Stack Overflow or respective owner

Related posts about java

Related posts about mapreduce