mirror of
https://github.com/kristoferssolo/School.git
synced 2025-10-21 20:10:38 +00:00
task_180222
This commit is contained in:
parent
891e77616d
commit
8f9c22e1d6
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 390 KiB After Width: | Height: | Size: 341 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 17 KiB |
@ -3,7 +3,6 @@
|
||||
# Title - Patstāvīgais darbs - pandas
|
||||
|
||||
from pathlib import Path
|
||||
import matplotlib
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
@ -20,30 +19,49 @@ def read(path):
|
||||
all_df.append(df)
|
||||
|
||||
|
||||
def address():
|
||||
pass
|
||||
|
||||
|
||||
def get_data():
|
||||
files = list(Path(Path(__file__).parent.absolute()).glob("**/*.xlsx"))
|
||||
|
||||
for file in files:
|
||||
read(file)
|
||||
df_out = pd.concat(all_df).reset_index(drop=True)
|
||||
df_out.to_excel("output/excel/combined.xlsx", index=False)
|
||||
return df_out
|
||||
# df_out.to_excel("output/excel/combined.xlsx", index=False)
|
||||
|
||||
# replaces floor value to intiger
|
||||
for value in df_out["Stāvs"]:
|
||||
df_out = df_out.replace(value, int(value[:value.find("/")]))
|
||||
|
||||
# replaces price value to intiger
|
||||
for value in df_out["Cena"]:
|
||||
df_out = df_out.replace(value, replace_value(value))
|
||||
return df_out.sort_values(by="Cena")
|
||||
|
||||
|
||||
def replace_value(value):
|
||||
new_value = value[:value.find(" ")]
|
||||
new_value = new_value.replace(",", "")
|
||||
return int(new_value)
|
||||
|
||||
|
||||
def graph_plot():
|
||||
data = get_data()
|
||||
# graph_corr(data)
|
||||
graph_corr(data)
|
||||
graph_price(data)
|
||||
|
||||
|
||||
def graph_corr(data):
|
||||
data_corr = data.copy()
|
||||
sns.set_style("whitegrid")
|
||||
# plt.figure(figsize=(15, 10))
|
||||
|
||||
series = []
|
||||
for i in data_corr["Sērija"]:
|
||||
if i not in series:
|
||||
series.append(i)
|
||||
j = 0
|
||||
for s in series:
|
||||
data_corr = list(map(lambda x: x.replace(s, j), data_corr))
|
||||
j += 1
|
||||
|
||||
print(data_corr["Sērija"])
|
||||
sns.heatmap(data_corr.corr())
|
||||
plt.savefig(f"{output_path}/korelacija.png")
|
||||
|
||||
@ -61,25 +79,35 @@ def graph_price(data):
|
||||
plot4 = plt.subplot2grid((3, 2), (1, 1))
|
||||
plot5 = plt.subplot2grid((3, 2), (2, 0))
|
||||
|
||||
# price to floor
|
||||
# floor to price
|
||||
plot1.scatter(data["Cena"], data["Stāvs"])
|
||||
plot1.set_title("Price to floor")
|
||||
plot1.set_title("Floor to price")
|
||||
plot1.set_xlabel("Price")
|
||||
plot1.set_ylabel("Floor")
|
||||
|
||||
# price to room amount
|
||||
# room amount to price
|
||||
plot2.scatter(data["Cena"], data["Istabu skaits"])
|
||||
plot2.set_title("Price to room amount")
|
||||
plot2.set_title("Room amount to price")
|
||||
plot2.set_xlabel("Price")
|
||||
plot2.set_ylabel("Room amount")
|
||||
|
||||
# price to quadrature
|
||||
# quadrature to price
|
||||
plot3.scatter(data["Cena"], data["Kvadratūra"])
|
||||
plot3.set_title("Price to quadrature")
|
||||
plot3.set_title("Quadrature to price")
|
||||
plot3.set_xlabel("Price")
|
||||
plot3.set_ylabel("Quadrature")
|
||||
|
||||
# price to series
|
||||
# series to price
|
||||
plot4.scatter(data["Cena"], data["Sērija"])
|
||||
plot4.set_title("Price to series")
|
||||
plot4.set_title("Series to price")
|
||||
plot4.set_xlabel("Price")
|
||||
plot4.set_ylabel("Series")
|
||||
|
||||
# price to date
|
||||
# date to price
|
||||
plot5.scatter(data["Cena"], data["Izvietošanas datums"])
|
||||
plot5.set_title("Price to floor")
|
||||
plot5.set_title("Date to price")
|
||||
plot5.set_xlabel("Price")
|
||||
plot5.set_ylabel("Date")
|
||||
|
||||
plt.savefig(f"{output_path}/cenu_grafiki.png")
|
||||
|
||||
|
||||
@ -5,6 +5,7 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent":
|
||||
@ -76,7 +77,8 @@ class SS:
|
||||
chunked_items_list = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)] # combines each 'chunk_size' elements into array
|
||||
columns = ["Atrašanās vieta", "Istabu skaits", "Kvadratūra", "Stāvs", "Sērija", "Cena", "Pilns sludinājuma teksts", "Izvietošanas datums"]
|
||||
df = pd.DataFrame(chunked_items_list, columns=columns)
|
||||
df.to_excel(excel_writer=f"output/excel/output_{self.name}.xlsx", index=False)
|
||||
time = datetime.now().strftime("%d%m%Y%H%M%S")
|
||||
df.to_excel(excel_writer=f"output/excel/output_{self.name}_{time}.xlsx", index=False)
|
||||
print("Done")
|
||||
|
||||
|
||||
@ -88,11 +90,11 @@ flats_ogre = SS("https://www.ss.com/lv/real-estate/flats/ogre-and-reg/sell/", "o
|
||||
|
||||
|
||||
def main():
|
||||
flats_aizkraukle.get_data()
|
||||
flats_tukums.get_data()
|
||||
# flats_aizkraukle.get_data()
|
||||
# flats_tukums.get_data()
|
||||
# flats_ogre.get_data()
|
||||
# flats_few.get_data()
|
||||
# flats_many.get_data()
|
||||
flats_many.get_data()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
5
february/task_180222/test.py
Normal file
5
february/task_180222/test.py
Normal file
@ -0,0 +1,5 @@
|
||||
from datetime import datetime
|
||||
|
||||
time = datetime.now().strftime("%d%m%Y%H%M%S")
|
||||
|
||||
print(time)
|
||||
BIN
output_many.xlsx
Normal file
BIN
output_many.xlsx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user