How to flatten a list of objects in python

81 views Asked by At

I have a list of objects :

[
  {
    "person": "abc",
    "city": "united states",
    "facebooklink": "link",
    "address": "united states",
    "united states": [
      {
        "person": "cdf",
        "city": "ohio",
        "facebooklink": "link",
        "address": "united states/ohio",
        "ohio": [
          {
            "person": "efg",
            "city": "clevland",
            "facebooklink": "link",
            "address": "united states/ohio/clevland",
            "clevland": [
              {
                "person": "jkl",
                "city": "Street A",
                "facebooklink": "link",
                "address": "united states/ohio/clevland/Street A",
                "Street A": [
                  {
                    "person": "jkl",
                    "city": "House 1",
                    "facebooklink": "link",
                    "address": "united states/ohio/clevland/Street A/House 1"
                  }
                ]
              }
            ]
          },
          {
            "person": "ghi",
            "city": "columbus",
            "facebooklink": "link",
            "address": "united states/ohio/columbus"
          }
        ]
      },
      {
        "person": "abc",
        "city": "washington",
        "facebooklink": "link",
        "address": "united states/washington"
      }
    ]
  }
]

How can I flatten it to

[
  {
    "person": "abc",
    "city": "united states",
    "facebooklink": "link",
    "address": "united states"
  },
  {
    "person": "cdf",
    "city": "ohio",
    "facebooklink": "link",
    "address": "united states/ohio"
  },
  {
    "person": "efg",
    "city": "clevland",
    "facebooklink": "link",
    "address": "united states/ohio/clevland"
  },
  {
    "person": "jkl",
    "city": "Street A",
    "facebooklink": "link",
    "address": "united states/ohio/clevland/Street A"
  },
  {
    "person": "jkl",
    "city": "House 1",
    "facebooklink": "link",
    "address": "united states/ohio/clevland/Street A/House 1"
  },
  {
    "person": "ghi",
    "city": "columbus",
    "facebooklink": "link",
    "address": "united states/ohio/columbus"
  },
  {
    "person": "abc",
    "city": "washington",
    "facebooklink": "link",
    "address": "united states/washington"
  }
]

I am trying to achieve the same using flatten from flatten_json

2

There are 2 answers

0
str1ng On BEST ANSWER

Here's one approach by defining "custom" recursive function that takes two arguments the list of objects (or a single object) and a list to accumulate the flattened objects, where for each object we create a new dictionary - only containing the keys we want to keep, after that you'd obviously append this to the accumulator list, then check if the object contain a nested list which can be done by iterating over the keys of the object, in case there's a nested list call the function recursively with this list.

def flatten_objects(objects, flattened=None):
    if flattened is None:
        flattened = []

    # Convert a single dict to a list for consistency
    if isinstance(objects, dict):
        objects = [objects]

    for obj in objects:
        # assuming keys exist, extract them
        flat_obj = {key: obj[key] for key in ['person', 'city', 'facebooklink', 'address']}
        flattened.append(flat_obj)

        # Recursively process any nested lists
        for key, value in obj.items():
            if isinstance(value, list):  
                flatten_objects(value, flattened)

    return flattened

nested_objects= [
  {
    "person": "abc",
    "city": "united states",
    "facebooklink": "link",
    "address": "united states",
    "united states": [
      {
        "person": "cdf",
        "city": "ohio",
        "facebooklink": "link",
        "address": "united states/ohio",
        "ohio": [
          {
            "person": "efg",
            "city": "clevland",
            "facebooklink": "link",
            "address": "united states/ohio/clevland",
            "clevland": [
              {
                "person": "jkl",
                "city": "Street A",
                "facebooklink": "link",
                "address": "united states/ohio/clevland/Street A",
                "Street A": [
                  {
                    "person": "jkl",
                    "city": "House 1",
                    "facebooklink": "link",
                    "address": "united states/ohio/clevland/Street A/House 1"
                  }
                ]
              }
            ]
          },
          {
            "person": "ghi",
            "city": "columbus",
            "facebooklink": "link",
            "address": "united states/ohio/columbus"
          }
        ]
      },
      {
        "person": "abc",
        "city": "washington",
        "facebooklink": "link",
        "address": "united states/washington"
      }
    ]
  }
]
flattened_list = flatten_objects(nested_objects)
print(flattened_list)

Results by running this (output as follows):

[
   {
      "person":"abc",
      "city":"united states",
      "facebooklink":"link",
      "address":"united states"
   },
   {
      "person":"cdf",
      "city":"ohio",
      "facebooklink":"link",
      "address":"united states/ohio"
   },
   {
      "person":"efg",
      "city":"clevland",
      "facebooklink":"link",
      "address":"united states/ohio/clevland"
   },
   {
      "person":"jkl",
      "city":"Street A",
      "facebooklink":"link",
      "address":"united states/ohio/clevland/Street A"
   },
   {
      "person":"jkl",
      "city":"House 1",
      "facebooklink":"link",
      "address":"united states/ohio/clevland/Street A/House 1"
   },
   {
      "person":"ghi",
      "city":"columbus",
      "facebooklink":"link",
      "address":"united states/ohio/columbus"
   },
   {
      "person":"abc",
      "city":"washington",
      "facebooklink":"link",
      "address":"united states/washington"
   }
]
0
cdlane On

If these are dictionary-like objects where the order of keys within the dictionaries is not significant computation-wise (even though a certain order may look better) then we can simplify (and possibly speed up) the problem something like:

def flatten_objects(list_of_objects):
    list_of_dictionaries = []

    for dictionary in list_of_objects:
        new_dictionary = {}

        for key, value in dictionary.items():
            if all(isinstance(element, dict) for element in value):
                list_of_dictionaries.extend(flatten_objects(value))
            else:
                new_dictionary[key] = value
        
        if new_dictionary:
            list_of_dictionaries.append(new_dictionary)

    return list_of_dictionaries

if __name__ == "__main__":
    import pprint
    pp = pprint.PrettyPrinter(indent=4)

    nested_objects = [
        {
            "person": "abc",
            "city": "united states",
            "facebooklink": "link",
            "address": "united states",
            "united states": [
                {
                    "person": "cdf",
                    "city": "ohio",
                    "facebooklink": "link",
                    "address": "united states/ohio",
                    "ohio": [
                        {
                            "person": "efg",
                            "city": "clevland",
                            "facebooklink": "link",
                            "address": "united states/ohio/clevland",
                            "clevland": [
                                {
                                    "person": "jkl",
                                    "city": "Street A",
                                    "facebooklink": "link",
                                    "address": "united states/ohio/clevland/Street A",
                                    "Street A": [
                                        {
                                            "person": "jkl",
                                            "city": "House 1",
                                            "facebooklink": "link",
                                            "address": "united states/ohio/clevland/Street A/House 1"
                                        }
                                    ]
                                }
                            ]
                        },
                        {
                            "person": "ghi",
                            "city": "columbus",
                            "facebooklink": "link",
                            "address": "united states/ohio/columbus"
                        }
                    ]
                },
                {
                    "person": "abc",
                    "city": "washington",
                    "facebooklink": "link",
                    "address": "united states/washington"
                }
            ]
        }
    ]

    pp.pprint(flatten_objects(nested_objects))

With output:

% python3 test.py
[   {   'address': 'united states/ohio/clevland/Street A/House 1',
        'city': 'House 1',
        'facebooklink': 'link',
        'person': 'jkl'},
    {   'address': 'united states/ohio/clevland/Street A',
        'city': 'Street A',
        'facebooklink': 'link',
        'person': 'jkl'},
    {   'address': 'united states/ohio/clevland',
        'city': 'clevland',
        'facebooklink': 'link',
        'person': 'efg'},
    {   'address': 'united states/ohio/columbus',
        'city': 'columbus',
        'facebooklink': 'link',
        'person': 'ghi'},
    {   'address': 'united states/ohio',
        'city': 'ohio',
        'facebooklink': 'link',
        'person': 'cdf'},
    {   'address': 'united states/washington',
        'city': 'washington',
        'facebooklink': 'link',
        'person': 'abc'},
    {   'address': 'united states',
        'city': 'united states',
        'facebooklink': 'link',
        'person': 'abc'}]
%