import sys
import pandas as pd
from helpers.plot_helper import PlotHelper
ratings_sparseness_dataframe = pd.read_json('../data/ratings_sparseness.json')
ratings_sparseness_dict = ratings_sparseness_dataframe.to_dict()
ratings_sparseness_dataframe = ratings_sparseness_dataframe.transpose()
display(ratings_sparseness_dataframe)
def get_manhattan_distance(rating1,rating2):
rating1 = dict(filter(lambda x: x[1] != '-', rating1.items()))
rating2 = dict(filter(lambda x: x[1] != '-', rating2.items()))
distance = 0
for key in rating1:
if key in rating2:
distance += abs(rating1[key] - rating2[key])
return distance
dt=ratings_sparseness_dict
print('saeed-alireza:%f'%get_manhattan_distance(dt['saeed'],dt['alireza']))
print('saeed-abbas:%f'%+get_manhattan_distance(dt['saeed'],dt['abbas']))
def get_euclidean_distance(rating1,rating2):
rating1 = dict(filter(lambda x: x[1] != '-', rating1.items()))
rating2 = dict(filter(lambda x: x[1] != '-', rating2.items()))
distance = 0
for key in rating1:
if key in rating2:
distance += pow(abs(rating1[key]-rating2[key]), 2)
return pow(distance, 1/2)
dt=ratings_sparseness_dict
print('saeed-alireza:%f'%get_euclidean_distance(dt['saeed'],dt['alireza']))
print('saeed-abbas:%f'%+get_euclidean_distance(dt['saeed'],dt['abbas']))
def dot(A,B):
return (sum(a*b for a,b in zip(A,B)))
def cosine_similarity(rating1,rating2):
rating1 = dict(filter(lambda x: x[1] != '-', rating1.items()))
rating2 = dict(filter(lambda x: x[1] != '-', rating2.items()))
a = rating1.values()
b = rating2.values()
return dot(a,b) / ( (dot(a,a) **.5) * (dot(b,b) ** .5) )
dt=ratings_sparseness_dict
print('saeed-alireza:%f'%cosine_similarity(dt['saeed'],dt['alireza']))
print('saeed-abbas:%f'%+cosine_similarity(dt['saeed'],dt['abbas']))