Nikolaanastasiia
@Nikolaanastasiia

Как изменить знак вопроса на 1 в таблице для машинного обучения?

Код программы:
spoiler
import pandas 

from sklearn import tree

import numpy as np

pandas.options.mode.chained_assignment = None



# Matplotlib visualization
import matplotlib.pyplot as plt



# Internal ipython tool for setting figure size
from IPython.core.pylabtools import figsize

# Seaborn for visualization
import seaborn as sns
sns.set(font_scale = 2)

# Splitting data into training and testing
from sklearn.model_selection import train_test_split

from pandas.plotting import scatter_matrix




table = pandas.read_csv ('/Users/anastasiia/Desktop/Новая папка/mammographic_masses.csv', sep='\t')


print (table)


table.info()


print ("\n Y means that and that \n x1 means that and that \n")



table.columns = ['X' + str(i) for i in range(1, 6)] + ['Y']
table.head()

print(table)




X = table.drop(('Y'), axis=1)  # Выбрасываем столбец 'class'.
y = table['Y']
feature_names = X.columns
print (feature_names)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 11)

N_train, _ = X_train.shape 
N_test,  _ = X_test.shape 

print("\n")
print (N_train, N_test)

Текст входного файла csv по ссылке

Всем привет!
В таблице в столбцах атрибутов стоят знаки вопроса, на их место надо поставить единицу.
Когда я добавляю метод ближайших соседей, он выдает could not convert.
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()
knn.fit(X_train, y_train)

Выдается ошибка
spoiler
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
in
63 knn = KNeighborsClassifier()
64
---> 65 knn.fit(X_train, y_train)
66
67 print("\n")

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/neighbors/_base.py in fit(self, X, y)
1128 """
1129 if not isinstance(X, (KDTree, BallTree)):
-> 1130 X, y = check_X_y(X, y, "csr", multi_output=True)
1131
1132 if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1:

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
753 ensure_min_features=ensure_min_features,
754 warn_on_dtype=warn_on_dtype,
--> 755 estimator=estimator)
756 if multi_output:
757 y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
529 array = array.astype(dtype, casting="unsafe", copy=False)
530 else:
--> 531 array = np.asarray(array, order=order, dtype=dtype)
532 except ComplexWarning:
533 raise ValueError("Complex data not supported\n"

~/opt/anaconda3/lib/python3.7/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
83
84 """
---> 85 return array(a, dtype, copy=False, order=order)
86
87

ValueError: could not convert string to float: '?'

Спасибо!
  • Вопрос задан
  • 153 просмотра
Пригласить эксперта
Ваш ответ на вопрос

Войдите, чтобы написать ответ

Похожие вопросы