task_180222

2026-02-04 14:52:11 +00:00 · 2022-02-20 13:13:31 +02:00
parent 891e77616d
commit 8f9c22e1d6
8 changed files with 59 additions and 24 deletions
--- a/february/task_180222/output/excel/combined.xlsx
+++ b/february/task_180222/output/excel/combined.xlsx
--- a/february/task_180222/output/excel/output_many_20022022124051.xlsx
+++ b/february/task_180222/output/excel/output_many_20022022124051.xlsx
--- a/february/task_180222/output/graphs/cenu_grafiki.png
+++ b/february/task_180222/output/graphs/cenu_grafiki.png
--- a/february/task_180222/output/graphs/korelacija.png
+++ b/february/task_180222/output/graphs/korelacija.png
--- a/february/task_180222/pd_pandas_k_f_cagulis.py
+++ b/february/task_180222/pd_pandas_k_f_cagulis.py
@@ -3,7 +3,6 @@
 # Title - Patstāvīgais darbs - pandas

 from pathlib import Path
-import matplotlib
 import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt
@@ -20,30 +19,49 @@ def read(path):
 	all_df.append(df)


-def address():
-	pass
-
-
 def get_data():
 	files = list(Path(Path(__file__).parent.absolute()).glob("**/*.xlsx"))

 	for file in files:
 		read(file)
 	df_out = pd.concat(all_df).reset_index(drop=True)
-	df_out.to_excel("output/excel/combined.xlsx", index=False)
-	return df_out
+	# df_out.to_excel("output/excel/combined.xlsx", index=False)
+
+	# replaces floor value to intiger
+	for value in df_out["Stāvs"]:
+		df_out = df_out.replace(value, int(value[:value.find("/")]))
+
+	# replaces price value to intiger
+	for value in df_out["Cena"]:
+		df_out = df_out.replace(value, replace_value(value))
+	return df_out.sort_values(by="Cena")
+
+
+def replace_value(value):
+	new_value = value[:value.find(" ")]
+	new_value = new_value.replace(",", "")
+	return int(new_value)


 def graph_plot():
 	data = get_data()
-	# graph_corr(data)
+	graph_corr(data)
 	graph_price(data)


 def graph_corr(data):
 	data_corr = data.copy()
-	sns.set_style("whitegrid")
-	# plt.figure(figsize=(15, 10))
+
+	series = []
+	for i in data_corr["Sērija"]:
+		if i not in series:
+			series.append(i)
+	j = 0
+	for s in series:
+		data_corr = list(map(lambda x: x.replace(s, j), data_corr))
+		j += 1
+
+	print(data_corr["Sērija"])
 	sns.heatmap(data_corr.corr())
 	plt.savefig(f"{output_path}/korelacija.png")

@@ -61,25 +79,35 @@ def graph_price(data):
 	plot4 = plt.subplot2grid((3, 2), (1, 1))
 	plot5 = plt.subplot2grid((3, 2), (2, 0))

-	# price to floor
+	# floor to price
 	plot1.scatter(data["Cena"], data["Stāvs"])
-	plot1.set_title("Price to floor")
+	plot1.set_title("Floor to price")
+	plot1.set_xlabel("Price")
+	plot1.set_ylabel("Floor")

-	# price to room amount
+	# room amount to price
 	plot2.scatter(data["Cena"], data["Istabu skaits"])
-	plot2.set_title("Price to room amount")
+	plot2.set_title("Room amount to price")
+	plot2.set_xlabel("Price")
+	plot2.set_ylabel("Room amount")

-	# price to quadrature
+	# quadrature to price
 	plot3.scatter(data["Cena"], data["Kvadratūra"])
-	plot3.set_title("Price to quadrature")
+	plot3.set_title("Quadrature to price")
+	plot3.set_xlabel("Price")
+	plot3.set_ylabel("Quadrature")

-	# price to series
+	# series to price
 	plot4.scatter(data["Cena"], data["Sērija"])
-	plot4.set_title("Price to series")
+	plot4.set_title("Series to price")
+	plot4.set_xlabel("Price")
+	plot4.set_ylabel("Series")

-	# price to date
+	# date to price
 	plot5.scatter(data["Cena"], data["Izvietošanas datums"])
-	plot5.set_title("Price to floor")
+	plot5.set_title("Date to price")
+	plot5.set_xlabel("Price")
+	plot5.set_ylabel("Date")

 	plt.savefig(f"{output_path}/cenu_grafiki.png")

--- a/february/task_180222/ss_scraper.py
+++ b/february/task_180222/ss_scraper.py
@@ -5,6 +5,7 @@
 from bs4 import BeautifulSoup
 import requests
 import pandas as pd
+from datetime import datetime

 HEADERS = {
    "User-Agent":
@@ -76,7 +77,8 @@ class SS:
 		chunked_items_list = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]  # combines each 'chunk_size' elements into array
 		columns = ["Atrašanās vieta", "Istabu skaits", "Kvadratūra", "Stāvs", "Sērija", "Cena", "Pilns sludinājuma teksts", "Izvietošanas datums"]
 		df = pd.DataFrame(chunked_items_list, columns=columns)
-		df.to_excel(excel_writer=f"output/excel/output_{self.name}.xlsx", index=False)
+		time = datetime.now().strftime("%d%m%Y%H%M%S")
+		df.to_excel(excel_writer=f"output/excel/output_{self.name}_{time}.xlsx", index=False)
 		print("Done")


@@ -88,11 +90,11 @@ flats_ogre = SS("https://www.ss.com/lv/real-estate/flats/ogre-and-reg/sell/", "o


 def main():
-	flats_aizkraukle.get_data()
-	flats_tukums.get_data()
+	# flats_aizkraukle.get_data()
+	# flats_tukums.get_data()
 	# flats_ogre.get_data()
 	# flats_few.get_data()
-	# flats_many.get_data()
+	flats_many.get_data()


 if __name__ == '__main__':
--- a/february/task_180222/test.py
+++ b/february/task_180222/test.py
@@ -0,0 +1,5 @@
+from datetime import datetime
+
+time = datetime.now().strftime("%d%m%Y%H%M%S")
+
+print(time)
--- a/output_many.xlsx
+++ b/output_many.xlsx