Fix array split into chunks

This commit is contained in:
Kristofers-Solo 2021-12-02 15:13:07 +02:00
parent e10100c1ba
commit a528edea71
2 changed files with 9 additions and 12 deletions

View File

@ -39,9 +39,7 @@ class SS:
def get_data(self): def get_data(self):
items = [] items = []
test = []
combined_list = [] combined_list = []
combined_list.clear()
# combined_list.clear() # combined_list.clear()
for page_number in range(1, self._get_page_amount() + 1): for page_number in range(1, self._get_page_amount() + 1):
url = self.url + f"/page{page_number}.html" url = self.url + f"/page{page_number}.html"
@ -50,7 +48,8 @@ class SS:
soup = BeautifulSoup(page.content, 'html.parser') soup = BeautifulSoup(page.content, 'html.parser')
ids = [tag['id'] for tag in soup.select('tr[id]')] # creates list with ids ids = [tag['id'] for tag in soup.select('tr[id]')] # creates list with ids
ids = [x for x in ids if "tr_bnr" not in x] # removes "tr_bnr" from list ids = [x for x in ids if "tr_bnr" not in x] # removes "tr_bnr" from list
ids.pop(0) # removes first "head_line" id ids.remove("head_line") # removes first "head_line" id
# TODO # TODO
# Atrašānās vieta # Atrašānās vieta
# stāvs # stāvs
@ -63,14 +62,12 @@ class SS:
# getting product name # getting product name
for el in soup.find_all(id=ids): for el in soup.find_all(id=ids):
items.clear()
for elem in el.find_all(class_='msga2-o pp6'): for elem in el.find_all(class_='msga2-o pp6'):
item = elem.get_text() items.append(elem.get_text())
items.append(item)
print(items)
combined_list.append(items)
# print(combined_list)
chunk_size = 6
chunked_items_list = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
# print(chunked_items_list)
columns = [ columns = [
"Atrašanās vieta", "Atrašanās vieta",
"Istabu skaits", "Istabu skaits",
@ -82,9 +79,9 @@ class SS:
#"Izvietošanas datums" #"Izvietošanas datums"
] ]
# df = pd.DataFrame(combined_list) df = pd.DataFrame(chunked_items_list, columns=columns)
# df.to_excel(excel_writer='test.xlsx', index=False) df.to_excel(excel_writer='test.xlsx', index=False)
# print(df) print(df)
flats = SS("https://www.ss.com/lv/real-estate/flats/riga-region/all/sell") flats = SS("https://www.ss.com/lv/real-estate/flats/riga-region/all/sell")

Binary file not shown.