from math import sqrt
def euclid(prefer,person1,person2):
s=0
flag=0
for p in prefer[person1]:
if p in prefer[person2]:
flag=1
s=s+pow((prefer[person1][p]-prefer[person2][p]),2)
if flag==0:
return 0
return 1/(1+s)
def pearson(prefer,person1,person2):
s={}
for p in prefer[person1]:
if p in prefer[person2]:
s[p]=1
if len(s)==0:
return 0
n=len(s)
sum1=sum(prefer[person1][p] for p in s)
sum2=sum(prefer[person2][p] for p in s)
sq1=sum(pow(prefer[person1][p],2) for p in s)
sq2=sum(pow(prefer[person2][p],2) for p in s)
ps=sum(prefer[person1][p]*prefer[person2][p] for p in s)
num=ps-(sum1*sum2/n)
den=sqrt((sq1-pow(sum1,2)/n)*(sq2-pow(sum2,2)/n))
if den==0:
return 0
return num/den
def ranking(prefer,person1,n,pe=pearson):
rank=[( pe(prefer,person1,other),other) for other in prefer if other != person1 ]
rank.sort()
rank.reverse()
return rank[0:n]
def getRecommendations(prefs,person,similarity=pearson):
totals={}
simSums={}
for other in prefs:
if other==person: continue
sim=similarity(prefs,person,other)
if sim<=0: continue
for item in prefs[other]:
if item not in prefs[person] or prefs[person][item]==0:
totals.setdefault(item,0)
totals[item]+=prefs[other][item]*sim
simSums.setdefault(item,0)
simSums[item]+=sim
rankings=[(total/simSums[item],item) for item,total in totals.items( )]
rankings.sort( )
rankings.reverse( )
return rankings
def loadMovieLens(path=’/home/prashant/Downloads/ml-100k’):
movies={}
for line in open(path+’/u.item’):
(id,title)=line.split(‘|’)[0:2]
movies[id]=title
prefs={}
for line in open(path+’/u.data’):
(user,movieid,rating,ts)=line.split(‘\t’)
prefs.setdefault(user,{})
prefs[user][movies[movieid]]=float(rating)
return prefs
prefs=loadMovieLens()
critics={‘Lisa Rose’: {‘Lady in the Water’: 2.5, ‘Snakes on a Plane’: 3.5,’Just My Luck’: 3.0, ‘Superman Returns’: 3.5, ‘You, Me and Dupree’: 2.5,
‘The Night Listener’: 3.0},
‘Gene Seymour’: {‘Lady in the Water’: 3.0, ‘Snakes on a Plane’: 3.5,’Just My Luck’: 1.5, ‘Superman Returns’: 5.0, ‘The Night Listener’: 3.0,
‘You, Me and Dupree’: 3.5},
‘Michael Phillips’: {‘Lady in the Water’: 2.5, ‘Snakes on a Plane’: 3.0,’Superman Returns’: 3.5, ‘The Night Listener’: 4.0},
‘Claudia Puig’: {‘Snakes on a Plane’: 3.5, ‘Just My Luck’: 3.0,’The Night Listener’: 4.5, ‘Superman Returns’: 4.0,’You, Me and Dupree’: 2.5},
‘Mick LaSalle’: {‘Lady in the Water’: 3.0, ‘Snakes on a Plane’: 4.0,’Just My Luck’: 2.0, ‘Superman Returns’: 3.0, ‘The Night Listener’: 3.0,’You, Me and Dupree’: 2.0},
‘Jack Matthews’: {‘Lady in the Water’: 3.0, ‘Snakes on a Plane’: 4.0,’The Night Listener’: 3.0, ‘Superman Returns’: 5.0, ‘You, Me and Dupree’: 3.5},
‘Toby’: {‘Snakes on a Plane’:4.5,’You, Me and Dupree’:1.0,’Superman Returns’:4.0}}
print “Printing Similiarilty between Lisa and Gene using Euclid Method”, euclid(critics,’Lisa Rose’,’Gene Seymour’)
print “Printing Similiarilty between Lisa and Gene using Pearson Method”, pearson(critics,’Lisa Rose’,’Gene Seymour’)
print “Printing Top Three user similiar to Toby “,ranking(critics,’Toby’,3)
print “Getting Recommendation for Toby “, getRecommendations(critics,’Toby’)
print “Movies “, getRecommendations(prefs,’87’)[0:20]
print “Printing Top Three user similiar to Toby “,ranking(critics,’Toby’,3)
print “Movies “, getRecommendations(prefs,’87’)[0:20] #Applying above algorithm on large data set
http://1drv.ms/1aWoPa4 #data for movie set function