Strange Map Reduce Behavior in CouchDB. Rereduce?
Posted
by
Tony
on Stack Overflow
See other posts from Stack Overflow
or by Tony
Published on 2011-01-14T17:12:31Z
Indexed on
2011/01/14
17:53 UTC
Read the original article
Hit count: 272
I have a mapreduce issue with couchdb (both functions shown below): when I run it with grouplevel = 2 (exact) I get accurate output:
{"rows":[
{"key":["2011-01-11","staff-1"],"value":{"total":895.72,"count":2,"services":6,"services_ignored":6,"services_liked":0,"services_disliked":0,"services_disliked_avg":0,"Revise":{"total":275.72,"count":1},"Review":{"total":620,"count":1}}},
{"key":["2011-01-11","staff-2"],"value":{"total":8461.689999999999,"count":2,"services":41,"services_ignored":37,"services_liked":4,"services_disliked":0,"services_disliked_avg":0,"Revise":{"total":4432.4,"count":1},"Review":{"total":4029.29,"count":1}}},
{"key":["2011-01-11","staff-3"],"value":{"total":2100.72,"count":1,"services":10,"services_ignored":4,"services_liked":3,"services_disliked":3,"services_disliked_avg":2.3333333333333335,"Revise":{"total":2100.72,"count":1}}},
However, changing to grouplevel=1 so the values for all the different staff keys should be all grouped by date no longer gives accurate output (notice the total is currect but all others are wrong):
{"rows":[
{"key":["2011-01-11"],"value":{"total":11458.130000000001,"count":2,"services":0,"services_ignored":0,"services_liked":0,"services_disliked":0,"services_disliked_avg":0,"None":{"total":11458.130000000001,"count":2}}},
My only theory is this has something to do with rereduce, which I have not yet learned. Should I explore that option or am I missing something else here?
This is the Map function:
function(doc) {
if(doc.doc_type == 'Feedback') {
emit([doc.date.split('T')[0], doc.staff_id], doc);
}
}
And this is the Reduce:
function(keys, vals) {
// sum all key points by status: total, count, services (liked, rejected, ignored)
var ret = {
'total':0,
'count':0,
'services': 0,
'services_ignored': 0,
'services_liked': 0,
'services_disliked': 0,
'services_disliked_avg': 0,
};
var total_disliked_score = 0;
// handle status
function handle_status(doc) {
if(!doc.status || doc.status == '' || doc.status == undefined) {
status = 'None';
} else if (doc.status == 'Declined') {
status = 'Rejected';
} else {
status = doc.status;
}
if(!ret[status]) ret[status] = {'total':0, 'count':0};
ret[status]['total'] += doc.total;
ret[status]['count'] += 1;
};
// handle likes / dislikes
function handle_services(services) {
ret.services += services.length;
for(var a in services) {
if (services[a].user_likes == 10) {
ret.services_liked += 1;
} else if (services[a].user_likes >= 1) {
ret.services_disliked += 1;
total_disliked_score += services[a].user_likes;
if (total_disliked_score >= ret.services_disliked) {
ret.services_disliked_avg = total_disliked_score / ret.services_disliked;
}
} else {
ret.services_ignored += 1;
}
}
}
// loop thru docs
for(var i in vals) {
// increment the total $
ret.total += vals[i].total;
ret.count += 1;
// update totals and sums for the status of this route
handle_status(vals[i]);
// do the likes / dislikes stats
if(vals[i].groups) {
for(var ii in vals[i].groups) {
if(vals[i].groups[ii].services) {
handle_services(vals[i].groups[ii].services);
}
}
}
// handle deleted services
if(vals[i].hidden_services) {
if (vals[i].hidden_services) {
handle_services(vals[i].hidden_services);
}
}
}
return ret;
}
© Stack Overflow or respective owner