import pandas
from sklearn import tree
import numpy as np
pandas.options.mode.chained_assignment = None
# Matplotlib visualization
import matplotlib.pyplot as plt
# Internal ipython tool for setting figure size
from IPython.core.pylabtools import figsize
# Seaborn for visualization
import seaborn as sns
sns.set(font_scale = 2)
# Splitting data into training and testing
from sklearn.model_selection import train_test_split
from pandas.plotting import scatter_matrix
table = pandas.read_csv ('/Users/anastasiia/Desktop/Новая папка/mammographic_masses.csv', sep='\t')
print (table)
table.info()
print ("\n Y means that and that \n x1 means that and that \n")
table.columns = ['X' + str(i) for i in range(1, 6)] + ['Y']
table.head()
print(table)
X = table.drop(('Y'), axis=1) # Выбрасываем столбец 'class'.
y = table['Y']
feature_names = X.columns
print (feature_names)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 11)
N_train, _ = X_train.shape
N_test, _ = X_test.shape
print("\n")
print (N_train, N_test)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
in
63 knn = KNeighborsClassifier()
64
---> 65 knn.fit(X_train, y_train)
66
67 print("\n")
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/neighbors/_base.py in fit(self, X, y)
1128 """
1129 if not isinstance(X, (KDTree, BallTree)):
-> 1130 X, y = check_X_y(X, y, "csr", multi_output=True)
1131
1132 if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1:
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
753 ensure_min_features=ensure_min_features,
754 warn_on_dtype=warn_on_dtype,
--> 755 estimator=estimator)
756 if multi_output:
757 y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
529 array = array.astype(dtype, casting="unsafe", copy=False)
530 else:
--> 531 array = np.asarray(array, order=order, dtype=dtype)
532 except ComplexWarning:
533 raise ValueError("Complex data not supported\n"
~/opt/anaconda3/lib/python3.7/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
83
84 """
---> 85 return array(a, dtype, copy=False, order=order)
86
87
ValueError: could not convert string to float: '?'