Most efficient method to groupby on an array of objects

Most efficient method to groupby on an array of objects

What is the most efficient way to groupby objects in an array?
For example, given this array of objects:
[
{ Phase: “Phase 1”, Step: “Step 1”, Task: “Task 1”, Value: “5” },
{ Phase: “Phase 1”, Step: “Step 1”, Task: “Task 2”, Value: “10” },
{ Phase: “Phase 1”, Step: “Step 2”, Task: “Task 1”, Value: “15” },
{ Phase: “Phase 1”, Step: “Step 2”, Task: “Task 2”, Value: “20” },
{ Phase: “Phase 2”, Step: “Step 1”, Task: “Task 1”, Value: “25” },
{ Phase: “Phase 2”, Step: “Step 1”, Task: “Task 2”, Value: “30” },
{ Phase: “Phase 2”, Step: “Step 2”, Task: “Task 1”, Value: “35” },
{ Phase: “Phase 2”, Step: “Step 2”, Task: “Task 2”, Value: “40” }
]

I’m displaying this information in a table. I’d like to groupby different methods, but I want to sum the values.
I’m using Underscore.js for its groupby function, which is helpful, but doesn’t do the whole trick, because I don’t want them “split up” but “merged”, more like the SQL group by method.
What I’m looking for would be able to total specific values (if requested).
So if I did groupby Phase, I’d want to receive:
[
{ Phase: “Phase 1”, Value: 50 },
{ Phase: “Phase 2”, Value: 130 }
]

And if I did groupy Phase / Step, I’d receive:
[
{ Phase: “Phase 1”, Step: “Step 1”, Value: 15 },
{ Phase: “Phase 1”, Step: “Step 2”, Value: 35 },
{ Phase: “Phase 2”, Step: “Step 1”, Value: 55 },
{ Phase: “Phase 2”, Step: “Step 2”, Value: 75 }
]

Is there a helpful script for this, or should I stick to using Underscore.js, and then looping through the resulting object to do the totals myself?

Solutions/Answers:

Solution 1:

If you want to avoid external libraries, you can concisely implement a vanilla version of groupBy() like so:

var groupBy = function(xs, key) {
  return xs.reduce(function(rv, x) {
    (rv[x[key]] = rv[x[key]] || []).push(x);
    return rv;
  }, {});
};

console.log(groupBy(['one', 'two', 'three'], 'length'));

// => {3: ["one", "two"], 5: ["three"]}

Solution 2:

Using ES6 Map object:

function groupBy(list, keyGetter) {
    const map = new Map();
    list.forEach((item) => {
         const key = keyGetter(item);
         const collection = map.get(key);
         if (!collection) {
             map.set(key, [item]);
         } else {
             collection.push(item);
         }
    });
    return map;
}

// example usage

const pets = [
    {type:"Dog", name:"Spot"},
    {type:"Cat", name:"Tiger"},
    {type:"Dog", name:"Rover"}, 
    {type:"Cat", name:"Leo"}
];
    
const grouped = groupBy(pets, pet => pet.type);
    
console.log(grouped.get("Dog")); // -> [{type:"Dog", name:"Spot"}, {type:"Dog", name:"Rover"}]
console.log(grouped.get("Cat")); // -> [{type:"Cat", name:"Tiger"}, {type:"Cat", name:"Leo"}]
    
    

About Map:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map

Solution 3:

with ES6:

const groupBy = (items, key) => items.reduce(
  (result, item) => ({
    ...result,
    [item[key]]: [
      ...(result[item[key]] || []),
      item,
    ],
  }), 
  {},
);

Solution 4:

Although the linq answer is interesting, it’s also quite heavy-weight. My approach is somewhat different:

var DataGrouper = (function() {
    var has = function(obj, target) {
        return _.any(obj, function(value) {
            return _.isEqual(value, target);
        });
    };

    var keys = function(data, names) {
        return _.reduce(data, function(memo, item) {
            var key = _.pick(item, names);
            if (!has(memo, key)) {
                memo.push(key);
            }
            return memo;
        }, []);
    };

    var group = function(data, names) {
        var stems = keys(data, names);
        return _.map(stems, function(stem) {
            return {
                key: stem,
                vals:_.map(_.where(data, stem), function(item) {
                    return _.omit(item, names);
                })
            };
        });
    };

    group.register = function(name, converter) {
        return group[name] = function(data, names) {
            return _.map(group(data, names), converter);
        };
    };

    return group;
}());

DataGrouper.register("sum", function(item) {
    return _.extend({}, item.key, {Value: _.reduce(item.vals, function(memo, node) {
        return memo + Number(node.Value);
    }, 0)});
});

You can see it in action on JSBin.

I didn’t see anything in Underscore that does what has does, although I might be missing it. It’s much the same as _.contains, but uses _.isEqual rather than === for comparisons. Other than that, the rest of this is problem-specific, although with an attempt to be generic.

Now DataGrouper.sum(data, ["Phase"]) returns

[
    {Phase: "Phase 1", Value: 50},
    {Phase: "Phase 2", Value: 130}
]

And DataGrouper.sum(data, ["Phase", "Step"]) returns

[
    {Phase: "Phase 1", Step: "Step 1", Value: 15},
    {Phase: "Phase 1", Step: "Step 2", Value: 35},
    {Phase: "Phase 2", Step: "Step 1", Value: 55},
    {Phase: "Phase 2", Step: "Step 2", Value: 75}
]

But sum is only one potential function here. You can register others as you like:

DataGrouper.register("max", function(item) {
    return _.extend({}, item.key, {Max: _.reduce(item.vals, function(memo, node) {
        return Math.max(memo, Number(node.Value));
    }, Number.NEGATIVE_INFINITY)});
});

and now DataGrouper.max(data, ["Phase", "Step"]) will return

[
    {Phase: "Phase 1", Step: "Step 1", Max: 10},
    {Phase: "Phase 1", Step: "Step 2", Max: 20},
    {Phase: "Phase 2", Step: "Step 1", Max: 30},
    {Phase: "Phase 2", Step: "Step 2", Max: 40}
]

or if you registered this:

DataGrouper.register("tasks", function(item) {
    return _.extend({}, item.key, {Tasks: _.map(item.vals, function(item) {
      return item.Task + " (" + item.Value + ")";
    }).join(", ")});
});

then calling DataGrouper.tasks(data, ["Phase", "Step"]) will get you

[
    {Phase: "Phase 1", Step: "Step 1", Tasks: "Task 1 (5), Task 2 (10)"},
    {Phase: "Phase 1", Step: "Step 2", Tasks: "Task 1 (15), Task 2 (20)"},
    {Phase: "Phase 2", Step: "Step 1", Tasks: "Task 1 (25), Task 2 (30)"},
    {Phase: "Phase 2", Step: "Step 2", Tasks: "Task 1 (35), Task 2 (40)"}
]

DataGrouper itself is a function. You can call it with your data and a list of the properties you want to group by. It returns an array whose elements are object with two properties: key is the collection of grouped properties, vals is an array of objects containing the remaining properties not in the key. For example, DataGrouper(data, ["Phase", "Step"]) will yield:

[
    {
        "key": {Phase: "Phase 1", Step: "Step 1"},
        "vals": [
            {Task: "Task 1", Value: "5"},
            {Task: "Task 2", Value: "10"}
        ]
    },
    {
        "key": {Phase: "Phase 1", Step: "Step 2"},
        "vals": [
            {Task: "Task 1", Value: "15"}, 
            {Task: "Task 2", Value: "20"}
        ]
    },
    {
        "key": {Phase: "Phase 2", Step: "Step 1"},
        "vals": [
            {Task: "Task 1", Value: "25"},
            {Task: "Task 2", Value: "30"}
        ]
    },
    {
        "key": {Phase: "Phase 2", Step: "Step 2"},
        "vals": [
            {Task: "Task 1", Value: "35"}, 
            {Task: "Task 2", Value: "40"}
        ]
    }
]

DataGrouper.register accepts a function and creates a new function which accepts the initial data and the properties to group by. This new function then takes the output format as above and runs your function against each of them in turn, returning a new array. The function that’s generated is stored as a property of DataGrouper according to a name you supply and also returned if you just want a local reference.

Well that’s a lot of explanation. The code is reasonably straightforward, I hope!

Solution 5:

This is probably more easily done with linq.js, which is intended to be a true implementation of LINQ in JavaScript (DEMO):

var linq = Enumerable.From(data);
var result =
    linq.GroupBy(function(x){ return x.Phase; })
        .Select(function(x){
          return {
            Phase: x.Key(),
            Value: x.Sum(function(y){ return y.Value|0; })
          };
        }).ToArray();

result:

[
    { Phase: "Phase 1", Value: 50 },
    { Phase: "Phase 2", Value: 130 }
]

Or, more simply using the string-based selectors (DEMO):

linq.GroupBy("$.Phase", "",
    "k,e => { Phase:k, Value:e.Sum('$.Value|0') }").ToArray();

Solution 6:

I would check lodash groupBy it seems to do exactly what you are looking for. It is also quite lightweight and really simple.

Fiddle example: https://jsfiddle.net/r7szvt5k/

Provided that your array name is arr the groupBy with lodash is just:

import groupBy from 'lodash/groupBy';
// if you still use require:
// const groupBy = require('lodash/groupBy');

const a = groupBy(arr, function(n) {
  return n.Phase;
});
// a is your array grouped by Phase attribute