強化学習を使ってレコメンド表示するコードをPythonで書いて

2024年6月8日

以下は、Pythonで強化学習を使用してレコメンドを表示するためのサンプルコードです。この例では、状態はユーザーの属性と過去の購買履歴から生成し、報酬はユーザーが選択した商品の評価に基づいて計算されます。エージェントはQ学習を使用して学習を行い、最適なレコメンドを提供します。

python
import numpy as np

class RecommendationEnvironment:
    def __init__(self, num_users, num_items):
        self.num_users = num_users
        self.num_items = num_items
        self.state_dim = num_users + num_items
        self.state = np.zeros(self.state_dim)  # レコメンドシステムの状態を表すベクトル

    def reset(self):
        self.state = np.zeros(self.state_dim)  # 初期状態にリセット
        return self.state

    def step(self, user_id, action):
        # アクションを実行し、報酬を計算
        reward = self.state[self.num_users + action]  # ユーザーが選択した商品の評価を報酬とする
        done = True  # 1ステップで終了する
        return self.state, reward, done

class QLearningAgent:
    def __init__(self, state_dim, action_dim, alpha=0.1, gamma=0.99, epsilon=0.1):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.alpha = alpha  # 学習率
        self.gamma = gamma  # 割引率
        self.epsilon = epsilon  # ε-greedy法のε
        self.q_table = np.zeros((state_dim, action_dim))  # Qテーブルの初期化

    def choose_action(self, state):
        # ε-greedy法に基づいて行動を選択
        if np.random.uniform(0, 1) < self.epsilon:
            return np.random.randint(self.action_dim)  # ランダムに選択
        else:
            return np.argmax(self.q_table[state])

    def update(self, state, action, reward, next_state):
        # Q値の更新
        q_value = self.q_table[state, action]
        next_max = np.max(self.q_table[next_state])
        new_q_value = q_value + self.alpha * (reward + self.gamma * next_max - q_value)
        self.q_table[state, action] = new_q_value

# レコメンドシステムの初期化
num_users = 10
num_items = 5
env = RecommendationEnvironment(num_users, num_items)

# エージェントの初期化
state_dim = num_users + num_items
action_dim = num_items
agent = QLearningAgent(state_dim, action_dim)

# 学習の実行
num_episodes = 1000
for episode in range(num_episodes):
    state = env.reset()  # 環境のリセット
    total_reward = 0

    while True:
        user_id = np.random.randint(num_users)  # ランダムにユーザーを選択
        action = agent.choose_action(state)  # 行動の選択
        next_state, reward, done = env.step(user_id, action)  # 行動の実行

        # Q値の更新
        agent.update(state, action, reward, next_state)

        total_reward += reward
        state = next_state

        if done:
            break

    # エピソードごとに結果を出力
    print("Episode:", episode+1, "Total Reward:", total_reward)

# 最適なレコメンドの表示
for user_id in range(num_users):
    state = np.zeros(state_dim)
    state[user_id] = 1  # ユーザーの属性を表す状態
    action = agent.choose_action(state)
    print("User", user_id, "Recommends Item", action)

このコードでは、RecommendationEnvironmentクラスがレコメンドシステムの環境を定義し、QLearningAgentクラスが強化学習エージェントを表しています。エージェントはQ学習アルゴリズムを使用して学習を行い、最適なレコメンドを提供します。

未分類

Posted by ぼっち