{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "b24465d0", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import sklearn as sk" ] }, { "cell_type": "code", "execution_count": null, "id": "094df92b", "metadata": {}, "outputs": [], "source": [ "pingvinek = sns.load_dataset('penguins')\n", "pingvinek" ] }, { "cell_type": "code", "execution_count": null, "id": "a4253344", "metadata": {}, "outputs": [], "source": [ "pingvinek = pingvinek[['species', 'bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']]\n", "pingvinek = pingvinek[\n", " pingvinek['species'].notna() &\n", " pingvinek['bill_length_mm'].notna() &\n", " pingvinek['bill_depth_mm'].notna() &\n", " pingvinek['flipper_length_mm'].notna() &\n", " pingvinek['body_mass_g'].notna()\n", "]\n", "pingvinek" ] }, { "cell_type": "code", "execution_count": null, "id": "c5dd2925", "metadata": {}, "outputs": [], "source": [ "pingvinek['species'].value_counts()" ] }, { "cell_type": "code", "execution_count": null, "id": "238d9a13", "metadata": {}, "outputs": [], "source": [ "g = sns.pairplot(pingvinek, hue='species')\n", "g.map_lower(sns.kdeplot)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "dd1fab07", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "pingvinek_jellemzők = pingvinek.drop('species', axis=1)\n", "pingvinek_cél = pingvinek['species']\n", "(\n", " pingvinek_jellemzők_tanuló, \n", " pingvinek_jellemzők_teszt, \n", " pingvinek_cél_tanuló, \n", " pingvinek_cél_teszt,\n", ") = train_test_split(\n", " pingvinek_jellemzők, \n", " pingvinek_cél,\n", " test_size=1/3,\n", " random_state=12345,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "073c42a2", "metadata": {}, "outputs": [], "source": [ "from sklearn.neighbors import KNeighborsClassifier\n", "modell = KNeighborsClassifier()\n", "modell.fit(pingvinek_jellemzők_tanuló, pingvinek_cél_tanuló)\n", "pingvinek_cél_jósolt = modell.predict(pingvinek_jellemzők_teszt)" ] }, { "cell_type": "code", "execution_count": null, "id": "e6dbe94e", "metadata": {}, "outputs": [], "source": [ "from sklearn import metrics\n", "print(f'{100*metrics.accuracy_score(pingvinek_cél_teszt, pingvinek_cél_jósolt):.1f}%')" ] }, { "cell_type": "code", "execution_count": null, "id": "f5b38f79", "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import confusion_matrix\n", "metrics.ConfusionMatrixDisplay(\n", " confusion_matrix = confusion_matrix(pingvinek_cél_teszt, pingvinek_cél_jósolt), \n", " display_labels = modell.classes_\n", ").plot()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "78c51a1f", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.model_selection import cross_val_score\n", "k_szoros = StratifiedKFold(n_splits=10, random_state=54321, shuffle=True)\n", "cross_val_score(modell, pingvinek_jellemzők, pingvinek_cél, cv=k_szoros, scoring='accuracy')" ] }, { "cell_type": "code", "execution_count": null, "id": "bfb5d58e", "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression\n", "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.svm import SVC\n", "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.model_selection import cross_val_score\n", "\n", "for név, modell in [\n", " ('Logistic Regression', LogisticRegression(solver='liblinear')),\n", " ('Linear Discriminant Analysis', LinearDiscriminantAnalysis()),\n", " ('K-Neighbors Classifier', KNeighborsClassifier()),\n", " ('Decision Tree Classifier', DecisionTreeClassifier()),\n", " ('Gaussian Naive Bayes', GaussianNB()),\n", " ('C-Support Vector Classification', SVC()),\n", "]:\n", " k_szoros_ellenőrzés = StratifiedKFold(n_splits=10, random_state=11111, shuffle=True)\n", " eredmények = cross_val_score(modell, pingvinek_jellemzők, pingvinek_cél, cv=k_szoros_ellenőrzés, scoring='accuracy')\n", " print(f'{név}: {100*eredmények.mean():.2f}%')" ] }, { "cell_type": "code", "execution_count": null, "id": "53925de3", "metadata": {}, "outputs": [], "source": [ "modell = LogisticRegression(solver='liblinear')\n", "modell.fit(pingvinek_jellemzők_tanuló, pingvinek_cél_tanuló)\n", "pingvinek_cél_jósolt = modell.predict(pingvinek_jellemzők_teszt)\n", "metrics.ConfusionMatrixDisplay(\n", " confusion_matrix = confusion_matrix(pingvinek_cél_teszt, pingvinek_cél_jósolt), \n", " display_labels = modell.classes_\n", ").plot()\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }