I'm working on datasets and this is what I wrote till now.
import itertools
import csv
import numpy
def counter(x):
unique, counts = numpy.unique(result, return_counts=True)
list1= numpy.asarray((unique, counts)).T
return list1
def findsubsets(S,m):
return list(itertools.combinations(S, m))
sup=input("enter min support\n")
reader = csv.reader(open("test.csv", "rb"), delimiter=",")
X = list(reader)
result = numpy.array(X).astype("string")
print(result)
(m,n)=result.shape
list1=counter(result)
print("deleting items which have less support")
print(list1)
(a,b)=list1.shape
l=[]
for x in range(a):
a=int(list1[x][1])
sup1=int(sup)
if(a>=sup):
l.append(list1[x][0])
print"after deleting"
print(l)
print("making sets")
o=(findsubsets(l,2))
print(o)
print(X)
the list o has these tuples
[('Beer', 'Bread'), ('Beer', 'Coke'), ('Beer', 'Diaper'), ('Beer', 'Milk'), ('Bread', 'Coke'), ('Bread', 'Diaper'), ('Bread', 'Milk'), ('Coke', 'Diaper'), ('Coke', 'Milk'), ('Diaper', 'Milk')]
and the list X has
[['Bread', 'Diaper', 'Beer', 'Eggs'], ['Milk', 'Diaper', 'Beer', 'Coke'], ['Bread', 'Milk', 'Diaper', 'Beer'], ['Bread', 'Milk', 'Diaper', 'Coke']]
I want to check that every tuple of list-o was really in list-X or not.
for example beer, bread is a tuple of list-o
beer,bread was present in list-X for 2 times. I want to return the count 2. How can I do it?
EDIT :
***********I did this using sets********
O = [('Beer', 'Bread'), ('Beer', 'Coke'), ('Beer', 'Diaper'), ('Beer', 'Milk'), ('Bread', 'Coke'), ('Bread', 'Diaper'), ('Bread', 'Milk'), ('Coke', 'Diaper'), ('Coke', 'Milk'), ('Diaper', 'Milk')]
X = [['Bread', 'Diaper', 'Beer', 'Eggs'], ['Milk', 'Diaper', 'Beer', 'Coke'], ['Bread', 'Milk', 'Diaper', 'Beer'], ['Bread', 'Milk', 'Diaper', 'Coke']]
dict = defaultdict(int)
for tuple in O:
for LST in X:
if set(tuple) <= set(LST):
dict[tuple] += 1