Finished task_071221 SS.com scraper

This commit is contained in:
Kristofers Solo
2021-12-07 23:35:44 +02:00
parent 344f36ecd8
commit 7aeec062ed
9 changed files with 14792 additions and 111 deletions

14579
november/task_061021/book.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,44 @@
# Author - Kristiāns Francis Cagulis
# Date - 06.10.2021
import re
CHAPTERS = 61
# creates file with chapters and row numbers
def read_array(document):
with open(document, "r", encoding='utf-8') as book:
lines = [line.strip('\n') for line in book] # removes 'enter' characters
with open('array_output.txt', 'w') as output:
for i in range(1, CHAPTERS + 1):
line = lines.index(f"Chapter {i}") + 1 # finds all chapter indexes/lines
output.write(f"Line {line} - Chapter {i}\n") # writes line in file
# creates file with chapter positions
def read_string(document):
with open(document, "r", encoding='utf-8') as book:
lines = book.read()
with open('str_output.txt', 'w') as output:
for i in range(1, CHAPTERS + 1):
_, position = re.finditer(rf"\bChapter {i}\b", lines) # finds all chapter positions
output.write(f"Position {position.start()} - Chapter {i}\n") # writes position in file
def read_book(document):
read_array(document)
read_string(document)
def main():
try:
read_book("book.txt")
except:
try:
read_book("1342-0.txt")
except:
read_book(input("Ievadiet faila nosaukumu: "))
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,44 @@
# Author - Kristiāns Francis Cagulis
# Date - 01.12.2021
# Title - Stundas kopdarbs
from bs4 import BeautifulSoup
import requests
url = "https://www.ikea.lv/"
all_page = requests.get(url)
if all_page.status_code == 200:
page = BeautifulSoup(all_page.content, 'html.parser')
found = page.find_all(class_="itemBlock")
info = []
item_array = []
for item in found:
item = item.findChild("div").findChild(class_="card-body")
item_name = item.findChild(class_="itemName")
item_name = item_name.findChild("div").findChild("h6")
item_array.append(item_name.string)
price = item.findChild(class_="itemPrice-wrapper")
price = price.findChild("p").findChild("span")
try:
item_array.append(price.attrs["data-price"])
except:
item_array.append(price.attrs["data-pricefamily"])
all_facts = []
for facts in all_facts:
if len(facts) == 1:
all_facts.append(facts.string)
else:
atrasts = facts.findChildren("span")
for i in atrasts:
all_facts.append(i.string)
item_array.append(all_facts)
info.append(item_array)
for ieraksts in info:
print(ieraksts)