Package Arules in Python

2.5k views Asked by At

I am using arules in Python. I executed the below code to generate all associations. I want to know how can i convert the output of arules to some data-structure in Python. The output if of type 'rpy2.robjects.methods.RS4'. --below is the code---

from rpy2.robjects import pandas2ri
pandas2ri.activate()
import collections
from collections import OrderedDict
import pandas as pd
import numpy as np
from rpy2.robjects.vectors import ListVector
from rpy2.robjects.packages import importr
arules = importr("arules")
od = OrderedDict()
od["supp"] = 0.0005
od["conf"] = 0.7
od["target"] = 'rules'
df = pd.DataFrame (
    [
        ['1','1', '1'],
        ['1', '0','0'],
        ['1', '1', '1'],
        ['1', '0', '0'],
        ['1', '1', '1'],
        ['1', '0', '1'],
        ['1', '1', '1'],
        ['0', '0', '1'],
        ['0', '1', '1'],
        ['1', '0', '1'],
    ],
    columns=list ('ABC')) 
result = ListVector(od)
df['A'] = df['A'].astype('category')
df['B'] = df['B'].astype('category')
df['C'] = df['C'].astype('category')
my_rules = arules.apriori(df, parameter=result)
print("herererererere")
print(type(my_rules))
print("rules")
1

There are 1 answers

0
Michael Hahsler On BEST ANSWER

Here is a minimalist example of how to do this:

# prepare the data as a dataframe with boolean values
import pandas as pd

df = pd.DataFrame (
    [
        [True,True, True],
        [True, False,False],
        [True, True, True],
        [True, False, False],
        [True, True, True],
        [True, False, True],
        [True, True, True],
        [False, False, True],
        [False, True, True],
        [True, False, True],
    ],
    columns=list ('ABC')) 

# set up rpy2
from rpy2.robjects import pandas2ri
pandas2ri.activate()
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
arules = importr("arules")

# run apriori
itsets = arules.apriori(df, 
   parameter = ro.ListVector({"supp": 0.1, "target": "frequent itemsets"}))

# get itemsets as a dataframe
print(arules.DATAFRAME(itsets))

# get quality as a dataframe
print(itsets.slots["quality"])

# get itemsets as a matrix
itemset_as_matrix = ro.r('function(x) as(items(x), "matrix")')
itemset_as_matrix(itsets)