diff --git a/february/task_180222/output/excel/combined.xlsx b/february/task_180222/output/excel/combined.xlsx deleted file mode 100644 index 723d2e3a..00000000 Binary files a/february/task_180222/output/excel/combined.xlsx and /dev/null differ diff --git a/february/task_180222/output/excel/output_many_20022022124051.xlsx b/february/task_180222/output/excel/output_many_20022022124051.xlsx new file mode 100644 index 00000000..5dc3479d Binary files /dev/null and b/february/task_180222/output/excel/output_many_20022022124051.xlsx differ diff --git a/february/task_180222/output/graphs/cenu_grafiki.png b/february/task_180222/output/graphs/cenu_grafiki.png index 51d43fef..b53731de 100644 Binary files a/february/task_180222/output/graphs/cenu_grafiki.png and b/february/task_180222/output/graphs/cenu_grafiki.png differ diff --git a/february/task_180222/output/graphs/korelacija.png b/february/task_180222/output/graphs/korelacija.png index 186460ed..25b77170 100644 Binary files a/february/task_180222/output/graphs/korelacija.png and b/february/task_180222/output/graphs/korelacija.png differ diff --git a/february/task_180222/pd_pandas_k_f_cagulis.py b/february/task_180222/pd_pandas_k_f_cagulis.py index 7b5bcfc6..fe9056b4 100644 --- a/february/task_180222/pd_pandas_k_f_cagulis.py +++ b/february/task_180222/pd_pandas_k_f_cagulis.py @@ -3,7 +3,6 @@ # Title - Patstāvīgais darbs - pandas from pathlib import Path -import matplotlib import pandas as pd import seaborn as sns import matplotlib.pyplot as plt @@ -20,30 +19,49 @@ def read(path): all_df.append(df) -def address(): - pass - - def get_data(): files = list(Path(Path(__file__).parent.absolute()).glob("**/*.xlsx")) for file in files: read(file) df_out = pd.concat(all_df).reset_index(drop=True) - df_out.to_excel("output/excel/combined.xlsx", index=False) - return df_out + # df_out.to_excel("output/excel/combined.xlsx", index=False) + + # replaces floor value to intiger + for value in df_out["Stāvs"]: + df_out = df_out.replace(value, int(value[:value.find("/")])) + + # replaces price value to intiger + for value in df_out["Cena"]: + df_out = df_out.replace(value, replace_value(value)) + return df_out.sort_values(by="Cena") + + +def replace_value(value): + new_value = value[:value.find(" ")] + new_value = new_value.replace(",", "") + return int(new_value) def graph_plot(): data = get_data() - # graph_corr(data) + graph_corr(data) graph_price(data) def graph_corr(data): data_corr = data.copy() - sns.set_style("whitegrid") - # plt.figure(figsize=(15, 10)) + + series = [] + for i in data_corr["Sērija"]: + if i not in series: + series.append(i) + j = 0 + for s in series: + data_corr = list(map(lambda x: x.replace(s, j), data_corr)) + j += 1 + + print(data_corr["Sērija"]) sns.heatmap(data_corr.corr()) plt.savefig(f"{output_path}/korelacija.png") @@ -61,25 +79,35 @@ def graph_price(data): plot4 = plt.subplot2grid((3, 2), (1, 1)) plot5 = plt.subplot2grid((3, 2), (2, 0)) - # price to floor + # floor to price plot1.scatter(data["Cena"], data["Stāvs"]) - plot1.set_title("Price to floor") + plot1.set_title("Floor to price") + plot1.set_xlabel("Price") + plot1.set_ylabel("Floor") - # price to room amount + # room amount to price plot2.scatter(data["Cena"], data["Istabu skaits"]) - plot2.set_title("Price to room amount") + plot2.set_title("Room amount to price") + plot2.set_xlabel("Price") + plot2.set_ylabel("Room amount") - # price to quadrature + # quadrature to price plot3.scatter(data["Cena"], data["Kvadratūra"]) - plot3.set_title("Price to quadrature") + plot3.set_title("Quadrature to price") + plot3.set_xlabel("Price") + plot3.set_ylabel("Quadrature") - # price to series + # series to price plot4.scatter(data["Cena"], data["Sērija"]) - plot4.set_title("Price to series") + plot4.set_title("Series to price") + plot4.set_xlabel("Price") + plot4.set_ylabel("Series") - # price to date + # date to price plot5.scatter(data["Cena"], data["Izvietošanas datums"]) - plot5.set_title("Price to floor") + plot5.set_title("Date to price") + plot5.set_xlabel("Price") + plot5.set_ylabel("Date") plt.savefig(f"{output_path}/cenu_grafiki.png") diff --git a/february/task_180222/ss_scraper.py b/february/task_180222/ss_scraper.py index db67d8c9..59d760c0 100644 --- a/february/task_180222/ss_scraper.py +++ b/february/task_180222/ss_scraper.py @@ -5,6 +5,7 @@ from bs4 import BeautifulSoup import requests import pandas as pd +from datetime import datetime HEADERS = { "User-Agent": @@ -76,7 +77,8 @@ class SS: chunked_items_list = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)] # combines each 'chunk_size' elements into array columns = ["Atrašanās vieta", "Istabu skaits", "Kvadratūra", "Stāvs", "Sērija", "Cena", "Pilns sludinājuma teksts", "Izvietošanas datums"] df = pd.DataFrame(chunked_items_list, columns=columns) - df.to_excel(excel_writer=f"output/excel/output_{self.name}.xlsx", index=False) + time = datetime.now().strftime("%d%m%Y%H%M%S") + df.to_excel(excel_writer=f"output/excel/output_{self.name}_{time}.xlsx", index=False) print("Done") @@ -88,11 +90,11 @@ flats_ogre = SS("https://www.ss.com/lv/real-estate/flats/ogre-and-reg/sell/", "o def main(): - flats_aizkraukle.get_data() - flats_tukums.get_data() + # flats_aizkraukle.get_data() + # flats_tukums.get_data() # flats_ogre.get_data() # flats_few.get_data() - # flats_many.get_data() + flats_many.get_data() if __name__ == '__main__': diff --git a/february/task_180222/test.py b/february/task_180222/test.py new file mode 100644 index 00000000..6e645ed9 --- /dev/null +++ b/february/task_180222/test.py @@ -0,0 +1,5 @@ +from datetime import datetime + +time = datetime.now().strftime("%d%m%Y%H%M%S") + +print(time) \ No newline at end of file diff --git a/output_many.xlsx b/output_many.xlsx new file mode 100644 index 00000000..cbdc4ded Binary files /dev/null and b/output_many.xlsx differ