SoulBook/soulbook/recommend/cosinesimilarity.py
2024-08-01 19:38:07 +08:00

59 lines
2.1 KiB
Python

import numpy as np
from functools import reduce
from math import sqrt
class CosineSimilarity(object):
"""
余弦相似性计算相似度
"""
def __init__(self, initQuery, userData):
self.title = initQuery
self.data = userData
def create_vector(self):
"""
创建兴趣向量
:return: wordVector = {} 目标用户以及各个兴趣对应的向量
"""
wordVector = {}
for web, value in self.data.items():
wordVector[web] = []
titleVector, valueVector = [], []
allWord = set(self.title + value)
for eachWord in allWord:
titleNum = self.title.count(eachWord)
valueNum = value.count(eachWord)
titleVector.append(titleNum)
valueVector.append(valueNum)
wordVector[web].append(titleVector)
wordVector[web].append(valueVector)
return wordVector
def calculate(self, wordVector):
"""
计算余弦相似度
:param wordVector: wordVector = {} 目标用户以及各个兴趣对应的向量
:return: 返回各个用户相似度值
"""
resultDic = {}
for web, value in wordVector.items():
valueArr = np.array(value)
# 余弦相似性
squares = []
numerator = reduce(lambda x, y: x + y, valueArr[0] * valueArr[1])
square_title, square_data = 0.0, 0.0
for num in range(len(valueArr[0])):
square_title += pow(valueArr[0][num], 2)
square_data += pow(valueArr[1][num], 2)
squares.append(sqrt(square_title))
squares.append(sqrt(square_data))
mul_of_squares = reduce(lambda x, y: x * y, squares)
value = float(('%.5f' % (numerator / mul_of_squares)))
if value > 0:
resultDic[web] = value
resultDic = [{v[0]: v[1]} for v in sorted(resultDic.items(), key=lambda d: d[1], reverse=True)]
return resultDic