@Sneiksus

Почему выходять такие результаты точности?

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score

train_df = train_data
test_df = test_data

X_train = train_df.drop('ack', axis=1)
y_train = train_df['ack']

X_test = test_df.drop('ack', axis=1) 
y_test = test_df['ack']

def oneR(X_train, y_train):
    
    accuracies = []
    
    for col in X_train.columns:
        values = X_train[col].unique()
        max_acc = 0
        best_threshold = None
        best_class = None
        
        for value in values:
            y_pred = (X_train[col] > value).astype(int)
            acc = accuracy_score(y_train, y_pred)
            if acc > max_acc:
                max_acc = acc
                best_threshold = value
                best_class = 1 if value < X_train[col].mean() else 0
        
        accuracies.append(max_acc)
    
    best_feature = X_train.columns[accuracies.index(max(accuracies))]
    best_threshold = sorted(X_train[best_feature].unique())[1] 
    best_class = 1 if best_threshold < X_train[best_feature].mean() else 0
    
    print("Best feature:", best_feature)
    print("Threshold:", best_threshold)
    print("Class below threshold:", best_class)
    
    return best_feature, best_threshold, best_class

best_feature, best_threshold, best_class = oneR(X_train, y_train)

def predict_oneR(X):
    y_pred = (X[best_feature] > best_threshold).astype(int)
    return y_pred.apply(lambda x: 1 if x else 0)

y_pred = predict_oneR(X_test)

from sklearn.tree import DecisionTreeClassifier  
from sklearn.linear_model import LogisticRegression

dt = DecisionTreeClassifier() 
lr = LogisticRegression()

models = [('OneR', predict_oneR),  
          ('Decision Tree', dt),    
          ('Logistic Regression', lr)]


for name, model in models:
    
    if name == 'OneR':
        model = predict_oneR
    else: 
        model.fit(X_train, y_train)
    
    if name == 'Decision Tree':
        from sklearn import tree
        tree.plot_tree(model)

    if name == 'OneR':
        y_pred = model(X_test)
    else:
        y_pred = model.predict(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    
    print(name)
    print("Accuracy:", acc)
    print("Precision:", prec)
    print("Recall:", rec)


Делал OneR алгоритм, но в сравнении с tree класификатором выдает лучшие параметрыю. Только начинаю с изучением машинного обучения, поэтому хотел бы узнать есть ли ошибка у меня в коде выше. Или что то в датасете.

Output:
Best feature: scroll_move_total_rel_distance
Threshold: -0.9427631178094024
Class below threshold: 1
OneR
Accuracy: 0.5497709287796751
Precision: 0.5497709287796751
Recall: 1.0
Decision Tree
Accuracy: 0.45022907122032485
Precision: 0.0
Recall: 0.0
Logistic Regression
Accuracy: 0.5497709287796751
Precision: 0.5497709287796751
Recall: 1.0
  • Вопрос задан
  • 33 просмотра
Пригласить эксперта
Ваш ответ на вопрос

Войдите, чтобы написать ответ

Похожие вопросы