In [1]:
import sys
import pandas as pd
from helpers.plot_helper import PlotHelper
from math import sqrt

Load JSON dataset by pandas

In [2]:
ratings_grade_inflation2_dataframe = pd.read_json('../data/ratings_grade_inflation2.json')
ratings_grade_inflation2_dict = ratings_grade_inflation2_dataframe.to_dict()

Display dataset

In [3]:
ratings_grade_inflation2_dataframe = ratings_grade_inflation2_dataframe
display(ratings_grade_inflation2_dataframe)
Interstellar Inception Hangover
u1 1 1 4
u2 2 2 3
u3 3 3 2
u4 4 4 1

implementation

In [6]:
def dot(A,B): 
    return (sum(a*b for a,b in zip(A,B)))

def get_cosine_similarity(rating1,rating2):
    a = rating1.values()
    b = rating2.values()
    return dot(a,b) / ( (dot(a,a) **.5) * (dot(b,b) ** .5) )

dt= ratings_grade_inflation2_dict

print('Interstellar-Inception:%f'%get_cosine_similarity(dt['Interstellar'],dt['Inception']))
print('Interstellar-Hangover:%f'%+get_cosine_similarity(dt['Interstellar'],dt['Hangover']))
Interstellar-Inception:1.000000
Interstellar-Hangover:0.666667
In [5]:
def mean(lst): 
    return sum(lst) / len(lst) 

def dot(A,B,a_mean,b_mean): 
    return (sum((a-a_mean)*(b-b_mean) for a,b in zip(A,B)))

def get_adjusted_cosine_similarity(rating1,rating2):
    a = rating1.values()
    a_mean = mean(a)
    b = rating2.values()
    b_mean = mean(b)
    return dot(a,b,a_mean,b_mean) / ( (dot(a,a,a_mean,a_mean) **.5) * (dot(b,b,b_mean,b_mean) ** .5) )

print('Interstellar-Inception:%f'%get_adjusted_cosine_similarity(dt['Interstellar'],dt['Inception']))
print('Interstellar-Hangover:%f'%+get_adjusted_cosine_similarity(dt['Interstellar'],dt['Hangover']))
Interstellar-Inception:1.000000
Interstellar-Hangover:-1.000000