From cbb0892399d35a0db38d2c3fce7b32a8b78f47e2 Mon Sep 17 00:00:00 2001 From: Kristofers-Solo Date: Wed, 19 Jan 2022 17:16:51 +0200 Subject: [PATCH] Classwork 190122 --- .../task_050122/main_vs_module_A_Bisenieks.py | 6 ++ january/task_050122/task_190122.py | 81 +++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 january/task_050122/main_vs_module_A_Bisenieks.py create mode 100644 january/task_050122/task_190122.py diff --git a/january/task_050122/main_vs_module_A_Bisenieks.py b/january/task_050122/main_vs_module_A_Bisenieks.py new file mode 100644 index 00000000..1f0f9e16 --- /dev/null +++ b/january/task_050122/main_vs_module_A_Bisenieks.py @@ -0,0 +1,6 @@ +import pandas + +dati = pandas.read_csv('auto_imports_mainits.csv') + +for index, element in enumerate(dati.isnull().sum()): + if element != 0: print(f"| {dati.columns[index]}" + " " * (25 - len(str(dati.columns[index]))) + f"{element}") diff --git a/january/task_050122/task_190122.py b/january/task_050122/task_190122.py new file mode 100644 index 00000000..2a0eea69 --- /dev/null +++ b/january/task_050122/task_190122.py @@ -0,0 +1,81 @@ +# Author - Kristiāns Francis Cagulis +# Date - 19.01.2022 +# Title - Classwork + +import pandas as pd +from word2number import w2n + + +def main(): + data = pd.read_csv("auto_imports_mainits.csv") + + # summary = data["normalized-losses"].notnull() # returns boolean + # print(data[summary], "\n") # "normalized-losses" is not empty + # print(data[~summary]) # inverts all the bits + # print(len(data[~summary])) + + data_copy = data.copy() + + # Delete rows with empty spots + # print(f"Before erasing: {data_copy.shape}") + # print(f"After erasing: {data_copy.dropna().shape}") + + # Delete column + # print(f"Before erasing: {data_copy.shape}") + del data_copy["normalized-losses"] + # print(f"After erasing: {data_copy.shape}") + # print(f"Blank spots: {data_copy.isnull().any().sum()}") + + # data_copy2 = data_copy.copy() + # print(data_copy2.head()) + # data_copy2.drop(data_copy2.columns[[0, 1]], axis=1, inplace=True) + # print(data_copy2.head()) + + dislike = ["N/A", "NA", "--"] + data_copy3 = pd.read_csv("auto_imports_mainits.csv", na_values=dislike) + + # Mean + # print(data_copy3.iloc[52], "\n") + # mean = data_copy3["bore"].mean() + # data_copy3["bore"].fillna(mean, inplace=True) + # print(data_copy3.iloc[52]) + + # Median + # print(data_copy3.iloc[53], "\n") + # median = data_copy3["bore"].median() + # data_copy3["bore"].fillna(median, inplace=True) + # print(data_copy3.iloc[53]) + + # Mode + # print(data_copy3.iloc[60], "\n") + # mode = data_copy3["bore"].mode() + # data_copy3["bore"].fillna(mode, inplace=True) + # print(data_copy3.iloc[60]) + + # print(data_copy3.dtypes) + + # data_copy3["curb-weight"] = pd.to_numeric(data_copy3["curb-weight"], errors='coerce') + # data_copy3["curb-weight"] = data_copy3["curb-weight"].astype("float64") + # print(data_copy3.dtypes) + + # Replaces word written numbers to intigers + columns = ["num-of-doors", "num-of-cylinders"] + for column in columns: + for value in data_copy3[column]: + try: + data_copy3 = data_copy3.replace(to_replace=value, value=w2n.word_to_num(value)) + print(type(w2n.word_to_num(value))) + except: + pass + print(data_copy3[["num-of-doors", "num-of-cylinders"]]) + + # Leaves only columns that contain numbers + data_copy4 = data_copy3.copy() + for column in data_copy4: + if isinstance(data_copy4[column][0], str): + del data_copy4[column] + print(data_copy4) + + +if __name__ == '__main__': + main() \ No newline at end of file