mirror of
https://github.com/kristoferssolo/School.git
synced 2025-10-21 20:10:38 +00:00
Fix array split into chunks
This commit is contained in:
parent
e10100c1ba
commit
a528edea71
@ -39,9 +39,7 @@ class SS:
|
|||||||
|
|
||||||
def get_data(self):
|
def get_data(self):
|
||||||
items = []
|
items = []
|
||||||
test = []
|
|
||||||
combined_list = []
|
combined_list = []
|
||||||
combined_list.clear()
|
|
||||||
# combined_list.clear()
|
# combined_list.clear()
|
||||||
for page_number in range(1, self._get_page_amount() + 1):
|
for page_number in range(1, self._get_page_amount() + 1):
|
||||||
url = self.url + f"/page{page_number}.html"
|
url = self.url + f"/page{page_number}.html"
|
||||||
@ -50,7 +48,8 @@ class SS:
|
|||||||
soup = BeautifulSoup(page.content, 'html.parser')
|
soup = BeautifulSoup(page.content, 'html.parser')
|
||||||
ids = [tag['id'] for tag in soup.select('tr[id]')] # creates list with ids
|
ids = [tag['id'] for tag in soup.select('tr[id]')] # creates list with ids
|
||||||
ids = [x for x in ids if "tr_bnr" not in x] # removes "tr_bnr" from list
|
ids = [x for x in ids if "tr_bnr" not in x] # removes "tr_bnr" from list
|
||||||
ids.pop(0) # removes first "head_line" id
|
ids.remove("head_line") # removes first "head_line" id
|
||||||
|
|
||||||
# TODO
|
# TODO
|
||||||
# Atrašānās vieta
|
# Atrašānās vieta
|
||||||
# stāvs
|
# stāvs
|
||||||
@ -63,14 +62,12 @@ class SS:
|
|||||||
|
|
||||||
# getting product name
|
# getting product name
|
||||||
for el in soup.find_all(id=ids):
|
for el in soup.find_all(id=ids):
|
||||||
items.clear()
|
|
||||||
for elem in el.find_all(class_='msga2-o pp6'):
|
for elem in el.find_all(class_='msga2-o pp6'):
|
||||||
item = elem.get_text()
|
items.append(elem.get_text())
|
||||||
items.append(item)
|
|
||||||
print(items)
|
|
||||||
combined_list.append(items)
|
|
||||||
# print(combined_list)
|
|
||||||
|
|
||||||
|
chunk_size = 6
|
||||||
|
chunked_items_list = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
|
||||||
|
# print(chunked_items_list)
|
||||||
columns = [
|
columns = [
|
||||||
"Atrašanās vieta",
|
"Atrašanās vieta",
|
||||||
"Istabu skaits",
|
"Istabu skaits",
|
||||||
@ -82,9 +79,9 @@ class SS:
|
|||||||
#"Izvietošanas datums"
|
#"Izvietošanas datums"
|
||||||
]
|
]
|
||||||
|
|
||||||
# df = pd.DataFrame(combined_list)
|
df = pd.DataFrame(chunked_items_list, columns=columns)
|
||||||
# df.to_excel(excel_writer='test.xlsx', index=False)
|
df.to_excel(excel_writer='test.xlsx', index=False)
|
||||||
# print(df)
|
print(df)
|
||||||
|
|
||||||
|
|
||||||
flats = SS("https://www.ss.com/lv/real-estate/flats/riga-region/all/sell")
|
flats = SS("https://www.ss.com/lv/real-estate/flats/riga-region/all/sell")
|
||||||
|
|||||||
BIN
december/task_011221/test.xlsx
Normal file
BIN
december/task_011221/test.xlsx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user