动手实现推荐系统评价指标

picture.image

关注我们,一起学习~

代码地址:https://github.com/dqdallen/Torch-RecHub/blob/main/torch\_rechub/basic/metric.py

点击阅读原文可以前往代码仓库

导入必要的模块

          
from sklearn.metrics import roc_auc_score  
import numpy as np
      
工具函数

用于对数据按用户分组,去每个用户的前topk个推荐列表。


          
def get\_user\_pred(y\_true, y\_pred, users):  
  """divide the result into different group by user id  
  
  Args:  
    y\_true: array, all true labels of the data  
    y\_pred: array, the predicted score  
    users: array, user id   
  
  Return:  
    user\_pred: dict, key is user id and value is the labels and scores of each user  
  """  
  user_pred = {}  
  for i, u in enumerate(users):  
    if u not in user_pred:  
      user_pred[u] = [[y_true[i]], [y_pred[i]]]  
    else:  
      user_pred[u][0].append(y_true[i])  
      user_pred[u][1].append(y_pred[i])  
  
  return user_pred  
  
  
def get\_user\_topk(y\_true, y\_pred, users, k):  
  """sort y\_pred and find topk results  
  this function is used to find topk predicted scores   
  and the corresponding index is applied to find the corresponding labels  
  
  """  
  user_pred = get_user_pred(y_true, y_pred, users)  
  for u in user_pred:  
    idx = np.argsort(user_pred[u][1])[::-1][:k]  
    user_pred[u][1] = np.array(user_pred[u][1])[idx]  
    user_pred[u][0] = np.array(user_pred[u][0])[idx]  
  return user_pred
      
AUC

          
def auc\_score(y\_true, y\_pred):  
  
  return roc_auc_score(y_true, y_pred)
      
GAUC

          
def gauc\_score(y\_true, y\_pred, users, weights=None):  
  """compute GAUC  
  
  Args:   
    y\_true: array, dim(N, ), all true labels of the data  
    y\_pred: array, dim(N, ), the predicted score  
    users: array, dim(N, ), user id   
    weight: dict, it contains weights for each group.   
        if it is None, the weight is equal to the number  
        of times the user is recommended  
  Return:  
    score: float, GAUC  
  """  
  assert len(y_true) == len(y_pred) and len(y_true) == len(users)  
  
  user_pred = get_user_topk(y_true, y_pred, users, len(users))  
  score = 0  
  num = 0  
  for u in user_pred.keys():  
    auc = auc_score(user_pred[u][0], user_pred[u][1])  
    if weights is None:  
      wg = len(user_pred[u][0])  
    else:  
      wg = weights[u]  
    auc *= wg  
    num += wg  
    score += auc  
  return score / num
      
LogLoss

          
def log\_loss(y\_true, y\_pred):  
  score = y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)  
  return -score.sum() / len(y_true)
      
NDCG

          
def ndcg\_score(user\_pred, k):  
  """compute NDCG  
  Args:  
    user\_pred: dict, computed by get\_user\_topk()  
  """  
  rank = np.arange(1, k+1, 1)  
  idcgs = 1. / np.log2(rank + 1)  
  idcg = sum(idcgs)  
  score = 0  
  for u in user_pred:  
    dcgs = idcgs[np.where(user_pred[u][0] == 1)]  
    dcg = sum(dcgs)  
    score += dcg / idcg  
  return score / len(user_pred.keys())
      
HitRate

          
def hit\_score(user\_pred):  
  score = 0  
  for u in user_pred:  
    if 1 in user_pred[u][0]:  
      score += 1.0  
  return score / len(user_pred.keys())
      
MRR

          
def mrr\_score(user\_pred):  
  score = 0  
  for u in user_pred:  
    if 1 in user_pred[u][0]:  
      score += 1.0 / (np.where(user_pred[u][0] == 1)[0][0] + 1)  
  return score / len(user_pred.keys())
      
Recall

          
def recall\_score(user\_pred):  
  score = 0  
  for u in user_pred:  
    score += sum(user_pred[u][0]) * 1. / len(user_pred[u][0])  
  return score / len(user_pred.keys())
      
选择topk评价指标

          
def topk\_metrics(y\_true, y\_pred, users, k, metric\_type):  
  """choice topk metrics and compute it  
  the metrics contains 'ndcg', 'mrr', 'recall' and 'hit'  
  
  Args:  
    y\_true: array, dim(N, ), all true labels of the data  
    y\_pred: array, dim(N, ), the predicted score  
    k: int, the number of topk  
    metric\_type: string, choice the metric,   
    it can be lowercase 'ndcg' or uppercase 'NDCG' or 'Ndcg' and so on  
  
  Return:  
    the score of topk metric  
  
  """  
  assert len(y_true) == len(y_pred) and len(y_true) == len(users)  
  
  user_pred = get_user_topk(y_true, y_pred, users, k)  
  if metric_type.lower() == 'ndcg':  
    return ndcg_score(user_pred, k)  
  elif metric_type.lower() == 'mrr':  
    return mrr_score(user_pred)  
  elif metric_type.lower() == 'recall':  
    return recall_score(user_pred)  
  elif metric_type.lower() == 'hit':  
    return hit_score(user_pred)  
  else:  
    raise ValueError('metric\_type error, choice from \'ndcg\', \'mrr\', \'recall\', \'hit\'')
      
测试用例

          
y_pred = np.array([0.3, 0.2, 0.5, 0.9, 0.7, 0.31, 0.8, 0.1, 0.4, 0.6])  
y_true = np.array([1, 0, 0, 1, 0, 0, 1, 0, 0, 1])  
users_id = np.array([ 2, 1, 0, 2, 1, 0, 0, 2, 1, 1])  
  
print('auc: ', auc_score(y_true, y_pred))  
print('gauc: ', gauc_score(y_true, y_pred, users_id))  
print('log\_loss: ', log_loss(y_true, y_pred))  
  
for mt in ['ndcg', 'mrr', 'recall', 'hit','s']:  
  tm = topk_metrics(y_true, y_pred, users_id, 3, metric_type=mt)  
  print(f'{mt}: {tm}')
      

交流群:点击“联系作者”--备注“研究方向-公司或学校”

欢迎|论文宣传|合作交流

往期推荐

[多分辨率分析对曝光序列去噪

2022-05-12

picture.image](https://mp.weixin.qq.com/s?__biz=MzkxNjI4MDkzOQ==&mid=2247491472&idx=1&sn=44e888598358dbb69da99ad52dae0311&chksm=c1531c94f62495821c7056cce98869ae4a4415b39831c8c6630b0ea68b5891efc24a1beb0222&scene=21#wechat_redirect)

[华为 | ReLoop:自纠正地训练推荐系统

2022-05-10

picture.image](https://mp.weixin.qq.com/s?__biz=MzkxNjI4MDkzOQ==&mid=2247491439&idx=1&sn=92179756e2f4fdbc02a914d13d6980ec&chksm=c1531c6bf624957d71d393581b11aa72962d20be50db9b77f75296e1d06a97118631d48bb62b&scene=21#wechat_redirect)

[一文学完所有的Hive Sql(两万字最全详解)

2022-05-09

picture.image](https://mp.weixin.qq.com/s?__biz=MzkxNjI4MDkzOQ==&mid=2247491416&idx=1&sn=3bad12c1edeb3abf138320f50e8baa31&chksm=c1531c5cf624954a98cec7b62d6c3005f783d25f56b5754f83f24080cae3457a072160b4c402&scene=21#wechat_redirect)

picture.image

长按关注,更多精彩

picture.image

picture.image

支持一哈

0
0
0
0
评论
未登录
暂无评论