mirror of
https://github.com/kristoferssolo/School.git
synced 2026-03-22 00:26:35 +00:00
Finished task_071221 SS.com scraper
This commit is contained in:
14579
november/task_061021/book.txt
Normal file
14579
november/task_061021/book.txt
Normal file
File diff suppressed because it is too large
Load Diff
44
november/task_061021/kcagulis_061021.py
Normal file
44
november/task_061021/kcagulis_061021.py
Normal file
@@ -0,0 +1,44 @@
|
||||
# Author - Kristiāns Francis Cagulis
|
||||
# Date - 06.10.2021
|
||||
import re
|
||||
|
||||
CHAPTERS = 61
|
||||
|
||||
|
||||
# creates file with chapters and row numbers
|
||||
def read_array(document):
|
||||
with open(document, "r", encoding='utf-8') as book:
|
||||
lines = [line.strip('\n') for line in book] # removes 'enter' characters
|
||||
with open('array_output.txt', 'w') as output:
|
||||
for i in range(1, CHAPTERS + 1):
|
||||
line = lines.index(f"Chapter {i}") + 1 # finds all chapter indexes/lines
|
||||
output.write(f"Line {line} - Chapter {i}\n") # writes line in file
|
||||
|
||||
|
||||
# creates file with chapter positions
|
||||
def read_string(document):
|
||||
with open(document, "r", encoding='utf-8') as book:
|
||||
lines = book.read()
|
||||
with open('str_output.txt', 'w') as output:
|
||||
for i in range(1, CHAPTERS + 1):
|
||||
_, position = re.finditer(rf"\bChapter {i}\b", lines) # finds all chapter positions
|
||||
output.write(f"Position {position.start()} - Chapter {i}\n") # writes position in file
|
||||
|
||||
|
||||
def read_book(document):
|
||||
read_array(document)
|
||||
read_string(document)
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
read_book("book.txt")
|
||||
except:
|
||||
try:
|
||||
read_book("1342-0.txt")
|
||||
except:
|
||||
read_book(input("Ievadiet faila nosaukumu: "))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
44
november/task_241121/demo_ikea.py
Normal file
44
november/task_241121/demo_ikea.py
Normal file
@@ -0,0 +1,44 @@
|
||||
# Author - Kristiāns Francis Cagulis
|
||||
# Date - 01.12.2021
|
||||
# Title - Stundas kopdarbs
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
|
||||
url = "https://www.ikea.lv/"
|
||||
all_page = requests.get(url)
|
||||
|
||||
if all_page.status_code == 200:
|
||||
page = BeautifulSoup(all_page.content, 'html.parser')
|
||||
found = page.find_all(class_="itemBlock")
|
||||
|
||||
info = []
|
||||
item_array = []
|
||||
for item in found:
|
||||
item = item.findChild("div").findChild(class_="card-body")
|
||||
|
||||
item_name = item.findChild(class_="itemName")
|
||||
item_name = item_name.findChild("div").findChild("h6")
|
||||
|
||||
item_array.append(item_name.string)
|
||||
|
||||
price = item.findChild(class_="itemPrice-wrapper")
|
||||
price = price.findChild("p").findChild("span")
|
||||
|
||||
try:
|
||||
item_array.append(price.attrs["data-price"])
|
||||
except:
|
||||
item_array.append(price.attrs["data-pricefamily"])
|
||||
|
||||
all_facts = []
|
||||
for facts in all_facts:
|
||||
if len(facts) == 1:
|
||||
all_facts.append(facts.string)
|
||||
else:
|
||||
atrasts = facts.findChildren("span")
|
||||
for i in atrasts:
|
||||
all_facts.append(i.string)
|
||||
|
||||
item_array.append(all_facts)
|
||||
info.append(item_array)
|
||||
for ieraksts in info:
|
||||
print(ieraksts)
|
||||
Reference in New Issue
Block a user