```python import pandas as pd import matplotlib.pyplot as plt ``` ```python data = pd.read_csv("upsampling_whole_sep.csv", index_col=0) data["MSE / 10"] = data["MSE"] / 10 data.head() ``` ```python data[data.type == "NB"][["MSE / 10", "acc", "bal acc", "upsampling"]].plot(kind="bar", x="upsampling") plt.xticks(rotation=45) plt.xlabel('') plt.ylabel("Score on Naive Bayes") plt.title("Different upsampling method with Naive Bayes") plt.savefig("figs/NB_upsampling.jpg", bbox_inches='tight') plt.show() ``` ```python data[data.type == "Tree"][["MSE / 10", "acc", "bal acc", "upsampling"]].plot(kind="bar", x="upsampling") plt.xticks(rotation=45) plt.xlabel('') plt.ylabel("Score on Regression Tree") plt.title("Different upsampling method with Regression Tree") plt.savefig("figs/Tree_upsampling.jpg", bbox_inches='tight') plt.show() ``` ```python data[data.type == "LR"][["MSE / 10", "acc", "bal acc", "upsampling"]].plot(kind="bar", x="upsampling") plt.xticks(rotation=45) plt.xlabel('') plt.ylabel("Score on Ridge Regression") plt.title("Different upsampling method with Ridge Regression") plt.savefig("figs/LR_upsampling.jpg", bbox_inches='tight') plt.show() ``` ```python data[data.type == "SVM"][["MSE / 10", "acc", "bal acc", "upsampling"]].plot(kind="bar", x="upsampling") plt.xticks(rotation=45) plt.xlabel('') plt.ylabel("Score on SVM") plt.title("Different upsampling method with SVM") plt.savefig("figs/SVM_upsampling.jpg", bbox_inches='tight') plt.show() ``` ```python data[data.type == "NN"][["MSE / 10", "acc", "bal acc", "upsampling"]].plot(kind="bar", x="upsampling") plt.xticks(rotation=45) plt.xlabel('') plt.ylabel("Score on Neural Network") plt.title("Different upsampling method with Neural Network") plt.savefig("figs/NN_upsampling.jpg", bbox_inches='tight') plt.show() ``` ```python data_no_border = pd.read_csv("upsampling_noborder_sep.csv", index_col=0) data_no_border["MSE / 10"] = data_no_border["MSE"] / 10 data_no_border.head() ``` ```python assert (data_no_border["type"] == data["type"]).all() and (data_no_border["upsampling"] == data["upsampling"]).all() data["MSE trimmed / 10"] = data_no_border["MSE / 10"] data["acc trimmed"] = data_no_border["acc"] data["bal acc trimmed"] = data_no_border["bal acc"] data.head() ``` ```python data[data.type == "NB"][["MSE / 10", "MSE trimmed / 10", "acc", "acc trimmed", "bal acc", "bal acc trimmed", "upsampling"]].plot(kind="bar", x="upsampling") plt.xticks(rotation=45) plt.xlabel("") plt.ylabel("Score on Naive Bayes") plt.title("Inflence of trimming to upsampling method with Naive Bayes") plt.savefig("figs/trimmed_NB_upsampling.jpg", bbox_inches='tight') plt.show() ``` ```python data[data.type == "Tree"][["MSE / 10", "MSE trimmed / 10", "acc", "acc trimmed", "bal acc", "bal acc trimmed", "upsampling"]].plot(kind="bar", x="upsampling") plt.xticks(rotation=45) plt.xlabel("") plt.ylabel("Score on Regression Tree") plt.title("Inflence of trimming to upsampling method with Regression Tree") plt.savefig("figs/trimmed_Tree_upsampling.jpg", bbox_inches='tight') plt.show() ``` ```python data[data.type == "LR"][["MSE / 10", "MSE trimmed / 10", "acc", "acc trimmed", "bal acc", "bal acc trimmed", "upsampling"]].plot(kind="bar", x="upsampling") plt.xticks(rotation=45) plt.xlabel("") plt.ylabel("Score on Ridge Regression") plt.title("Inflence of trimming to upsampling method with Ridge Regression") plt.savefig("figs/trimmed_LR_upsampling.jpg", bbox_inches='tight') plt.show() ``` ```python data[data.type == "SVM"][["MSE / 10", "MSE trimmed / 10", "acc", "acc trimmed", "bal acc", "bal acc trimmed", "upsampling"]].plot(kind="bar", x="upsampling") plt.xticks(rotation=45) plt.xlabel("") plt.ylabel("Score on SVM") plt.title("Inflence of trimming to upsampling method with SVM") plt.savefig("figs/trimmed_SVM_upsampling.jpg", bbox_inches='tight') plt.show() ``` ```python data[data.type == "NN"][["MSE / 10", "MSE trimmed / 10", "acc", "acc trimmed", "bal acc", "bal acc trimmed", "upsampling"]].plot(kind="bar", x="upsampling") plt.xticks(rotation=45) plt.xlabel("") plt.ylabel("Score on Neural Network") plt.title("Inflence of trimming to upsampling method with Neural Network") plt.savefig("figs/trimmed_NN_upsampling.jpg", bbox_inches='tight') plt.show() ``` ```python data = data.rename(columns={"bal acc": "balAcc"}) data["upsampling"] = data["upsampling"].apply(lambda x: "no upsampling" if x == "no_upsampling" else x) data["MSE"] = data["MSE"].apply(lambda x: round(x, 4)) data["acc"] = data["acc"].apply(lambda x: round(x, 4)) data["balAcc"] = data["balAcc"].apply(lambda x: round(x, 4)) (data[data["type"] == "NN"][["MSE", "acc", "balAcc", "upsampling"]]).to_csv("figs/NN_upsampling.csv", index=False) ``` ```python data_no_border = data_no_border.rename(columns={"bal acc": "balAcc"}) data_no_border["upsampling"] = data_no_border["upsampling"].apply(lambda x: "no upsampling" if x == "no_upsampling" else x) data_no_border["MSE"] = data_no_border["MSE"].apply(lambda x: round(x, 4)) data_no_border["acc"] = data_no_border["acc"].apply(lambda x: round(x, 4)) data_no_border["balAcc"] = data_no_border["balAcc"].apply(lambda x: round(x, 4)) (data_no_border[data_no_border["type"] == "NN"][["MSE", "acc", "balAcc", "upsampling"]]).to_csv("figs/NN_trimmed_upsampling.csv", index=False) ``` ```python ```