Tuesday, September 22, 2020

Simple GroupBy Function in for Python

After a lot of research I found that Python has no good groupby function that you can use in a single line (like Linq GroupBy of C#). For that, I've made a sample reusable groupby function that works with one or more keys, I wouldn't say it has great performance (actually far from it). But, it works and it is easier to use :) 

You may want to make changes based on your input:

import copy
import itertools

def yogee_groupby(collkeys):

    copied_collection = copy.deepcopy(coll)
    master_key = ""

    if len(keys) > 1:
        copied_collection = copy.deepcopy(coll)
        for key in keys:
            master_key = master_key + key
        
        for an_element in copied_collection:
            element_key = ""
            for key in keys:
                element_key = element_key + an_element[key]
            an_element[master_key] = element_key
    else:
        master_key = keys[0]

    grouped = {}
    sorted_collection = sorted(copied_collection, key = lambda item: item[master_key])
    for k, g in itertools.groupby(sorted_collection,  lambda item: item[master_key]):
        group = list(g)
        if len(keys) > 1:
            for an_element in group:
                an_element.pop(master_key, None)
        
        grouped[k] = group
    return grouped


def test_groupby_2():
    test_data = [
        {
            "E1""V1",
            "E2""V2",
            "E3": ["V31","V32","V33","V34"],
            "E3": {"E31":"V31","E32""V32","E33""V33","E34":"V34"}
        },
        {
            "E1""W1",
            "E2""W2",
            "E3": ["W31","W32","W33","W34"],
            "E3": {"E31":"W31","E32""W32","E33""W33","E34":"W34"}
        },
        {
            "E1""V1",
            "E2""V2",
            "E3": ["VV31","VV32","VV33","VV34"],
            "E3": {"E31":"VV31","E32""VV32","E33""VV33","E34":"VV34"}
        },
        {
            "E1""V1",
            "E2""V22",
            "E3": ["X31","X32","X33","X34"],
            "E3": {"E31":"X31","E32""X32","E33""X33","E34":"X34"}
        },
    ]
    grouped = yogee_groupby(test_data, ["E1""E2"])
    print(grouped)
    grouped = yogee_groupby(test_data, ["E1"])
    print(grouped)

test_groupby_2()