————————————————————————— ValueError Traceback (most recent call last) Cell In[105], line 89 87 # データの標準化 88 scaler = StandardScaler() —> 89 X_train_scaled = scaler.fit_transform(X_train) 90 X_test_scaled = scaler.transform(X_test) 94 # ランダムフォレストモデルの作成とトレーニング File ~anaconda3libsite-packagessklearnutils_set_output.py:142, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs) 140 @wraps(f) 141 def wrapped(self, X, *args, **kwargs): –> 142 data_to_wrap = f(self, X, *args, **kwargs) 143 if isinstance(data_to_wrap, tuple): 144 # only wrap the first output for cross decomposition 145 return ( 146 _wrap_data_with_container(method, data_to_wrap[0], X, self), 147 *data_to_wrap[1:], 148 ) File ~anaconda3libsite-packagessklearnbase.py:859, in TransformerMixin.fit_transform(self, X, y, fit_params) 855 # non-optimized default implementation; override when a better 856 # method is possible for a given clustering algorithm 857 if y is None: 858 # fit method of arity 1 (unsupervised transformation) –> 859 return self.fit(X, fit_params).transform(X) 860 else: 861 # fit method of arity 2 (supervised transformation) 862 return self.fit(X, y, **fit_params).transform(X) File ~anaconda3libsite-packagessklearnpreprocessing_data.py:824, in StandardScaler.fit(self, X, y, sample_weight) 822 # Reset internal state before fitting 823 self._reset() –> 824 return self.partial_fit(X, y, sample_weight) File ~anaconda3libsite-packagessklearnpreprocessing_data.py:889, in StandardScaler.partial_fit(self, X, y, sample_weight) 887 if sparse.issparse(X): 888 if self.with_mean: –> 889 raise ValueError( 890 “Cannot center sparse matrices: pass `with_mean=False` ” 891 “instead. See docstring for motivation and alternatives.” 892 ) 893 sparse_constructor = ( 894 sparse.csr_matrix if X.format == “csr” else sparse.csc_matrix 895 ) 897 if self.with_std: 898 # First pass ValueError: Cannot center sparse matrices: pass `with_mean=False` instead. See docstring for motivation and alternatives.

2024年6月8日

エラーが発生しているのは、StandardScalerが疎行列（sparse matrices）を中心化（centering）できないためです。中心化を行うには密行列（dense matrices）である必要があります。

解決策としては、疎行列を中心化する代わりに、with_mean=Falseを設定してスケーリングのみを行うことが挙げられます。以下は修正されたコードです。

python
# データの標準化
scaler = StandardScaler(with_mean=False)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

これにより、疎行列を使ってデータのスケーリングが行われ、エラーが解消されます。

Posted by ぼっち