1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
   | from svd import SVD import numpy as np import traceback
  class SVDPlusPlus(SVD):     ''' 基于SVD++的协同过滤算法 '''     def __init__(self, logger, trainset, historyset, topicNum, lr, gamma):         ''' 初始化函数         Args:             logger:日志记录对象             trainset:训练集             historyset:历史数据集             topicNum:主题个数,奇异值数量             lr:学习率             gamma:正则项权重         '''         self.historyset = historyset         self.yItem = {}         super().__init__(logger, trainset, topicNum, lr, gamma)
      def initModel(self):         ''' 初始化模型参数:biasUser、biasItem、pUser、qItem、yItem '''         super().initModel()         for item in self.biasItem.keys():             self.yItem[item] = np.random.rand(self.topicNum) / 10
      def train(self, iteration, topN):         ''' 迭代多批次训练SVD模型         min power(rated - rate, 2)/2 + gamma * regularization/2         rate = mean + biasUser + biasItem + qItem * (pUser + yItemUserSum/sqrt(itemCounter))         itemCounter = sqrt(len(self.historyset[user].keys()))         yItemUserSum = sum(map(lambda key:self.yItem[key], self.historyset[user].keys()))         regularization = power(biasUser) + power(biasItem) + power(pUser) + power(qItem) + power(yItem)         故每次迭代:lr = learning rate         delta(biasUser) = lr * ((rate - rated) + gamma * biasUser)         delta(biasItem) = lr * ((rate - rated) + gamma * biasItem)         delta(qItem) = lr * ((rate - rated) * (pUser + yItemUserSum/itemCounter) + gamma * qItem)         delta(pUser) = lr * ((rate - rated) * qItem + gamma * pUser)         delta(yItem) = lr * (rate - rated) * qItem/itemCounter + gamma * yItem)         Args:             iteration:迭代次数         '''         self.logger.info('开始训练...')         sampleSize = len(self.trainMap)              sampleSeq = np.arange(sampleSize)                      self.logger.info('训练前,误差为:')         self.evaluate(self.trainset, topN)         for i in range(iteration):             np.random.shuffle(sampleSeq)                              for j in sampleSeq:                 user, item = self.trainMap[j]                 rate = self.predict(user, item)                      rated = self.trainset[user][item]                 itemCounter = np.sqrt(len(self.historyset[user].keys()))                 yItemUserSum = sum(map(lambda key:self.yItem[key], self.historyset[user].keys()))                                  rateDiff = rate - rated                 self.biasUser[user] -= (self.lr * (rateDiff + self.gamma * self.biasUser[user]))                 self.biasItem[item] -= (self.lr * (rateDiff + self.gamma * self.biasItem[item]))                 self.qItem[item] -= (self.lr * (rateDiff * (self.pUser[user] + yItemUserSum/itemCounter) +                           self.gamma * self.qItem[item]))                 self.pUser[user] -= (self.lr * (rateDiff * self.qItem[item] +                           self.gamma * self.pUser[user]))                                  for historyItem in self.historyset[user].keys():                     self.yItem[historyItem] -= (self.lr * (rateDiff * self.qItem[item]/itemCounter +                               self.gamma * self.yItem[historyItem]))
              if i % 10 == 0:                 self.logger.info('训练进度[{i}/{iteration}],训练误差为:'.format(i=i,                                  iteration=iteration))                 self.evaluate(self.trainset, topN)                 self.lr *= 0.5
          self.logger.info('训练后,误差为:')         self.evaluate(self.trainset, topN)         self.logger.info('训练结束!')
      def predict(self, user, item):         ''' 测试SVD模型         rate = mean + biasUser + biasItem + qItem * (pUser + yItemUserSum/itemCounter)         itemCounter = sqrt(len(self.historyset[user].keys()))         yItemUserSum = sum(map(lambda key:self.yItem[key], self.historyset[user].keys()))         Args:             user:待预测评分的用户             item:待预测评分的物品         Returns:             rate:用户user对物品item的评分         '''         itemCounter = np.sqrt(len(self.historyset[user].keys()))         yItemUserSum = sum(map(lambda key:self.yItem[key], self.historyset[user].keys()))         try:             rate = (self.mean + self.biasUser[user] + self.biasItem[item] +                     np.dot(self.qItem[item], self.pUser[user] + yItemUserSum/itemCounter))         except ZeroDivisionError as e:             traceback.print_exc()             print(user)             print(itemCounter)             raise e         return rate
   |