Minor release update

Added `pyproject.toml` file.
This commit is contained in:
Kristofers Solo 2023-09-09 23:37:16 +03:00
parent cef627e196
commit 611a70ad68
8 changed files with 77 additions and 51 deletions

2
.gitignore vendored
View File

@ -112,5 +112,3 @@ dmypy.json
# Pyre type checker
.pyre/
config.json

View File

@ -1,25 +1,23 @@
# Webscraper
Webscraper of SS marketplace for GPUs
## Installation
```sh
git clone https://github.com/kristoferssolo/SScom-scraper
cd SScom-scraper
pip install .
```
Or download [zip](https://github.com/kristoferssolo/SScom-scraper/archive/refs/heads/master.zip).
Create `config.json` file with following content:
Add [Telegram bot API token](https://core.telegram.org/bots/tutorial#obtain-your-bot-token) to the `config.json` file.
```json
{
"API_TOKEN": "<TOKEN_FROM_BOT_FATHER>"
}
```
Install required libraries:
Run the bot.
```sh
pip install -r requirements.txt
python main.py
```
## Libraries used
- [Aiogram](https://github.com/aiogram/aiogram)
- [Beautiful Soup](https://www.crummy.com/software/BeautifulSoup/)

3
config.json Normal file
View File

@ -0,0 +1,3 @@
{
"API_TOKEN": ""
}

14
main.py Normal file → Executable file
View File

@ -1,13 +1,10 @@
"""
Telegram bot for scraper
Author - Kristofers Solo
Licence - MIT
"""
#!/usr/bin/env python3
import json
import logging
from pathlib import Path
from aiogram import Bot, Dispatcher, executor, types
from scraper import gpus
from SScom.scraper import gpus
BASE_DIR = Path(__file__).resolve().parent
@ -30,8 +27,9 @@ async def gpu_price_message(message: types.Message):
"""Returns all scraped GPUs and their prices to telegram"""
data = gpus.get_data()
message_size = 100
chunked_data = [data[i:i + message_size]
for i in range(0, len(data), message_size)]
chunked_data = [
data[i : i + message_size] for i in range(0, len(data), message_size)
]
for i in chunked_data:
await message.answer("\n".join(i))

47
pyproject.toml Normal file
View File

@ -0,0 +1,47 @@
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "SScom-scraper"
version = "0.1.0"
description = "Webscraper of SS marketplace for GPUs "
authors = [{ name = "Kristofers Solo", email = "dev@kristofers.xyz" }]
keywords = ["scraper", "telegram"]
readme = "README.md"
requires-python = ">=3.10"
license = { text = "GPLv3" }
classifiers = [
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3:10",
"Programming Language :: Python :: 3:11",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: Linux",
]
dependencies = ["aiogram~=2.0", "beautifulsoup4~=4.12", "requests~=2.31"]
[project.urls]
"Source" = "https://github.com/kristoferssolo/SScom-scraper"
"Bug Tracker" = "https://github.com/kristoferssolo/SScom-scraper/issues"
[tool.mypy]
check_untyped_defs = true
disallow_any_generics = true
ignore_missing_imports = true
mypy_path = "src"
no_implicit_optional = true
no_implicit_reexport = true
show_error_codes = true
strict_equality = true
warn_redundant_casts = true
warn_return_any = true
warn_unreachable = true
warn_unused_configs = true
[tool.ruff]
line-length = 160
[tool.ruff.flake8-quotes]
docstring-quotes = "double"

View File

@ -1,17 +1,3 @@
aiogram==2.22.1
aiohttp==3.8.3
aiosignal==1.2.0
async-timeout==4.0.2
attrs==22.1.0
Babel==2.9.1
beautifulsoup4==4.11.1
certifi==2022.9.24
charset-normalizer==2.1.1
frozenlist==1.3.1
idna==3.4
multidict==6.0.2
pytz==2022.2.1
requests==2.28.1
soupsieve==2.3.2.post1
urllib3==1.26.12
yarl==1.8.1
aiogram==2.0.*
beautifulsoup4==4.12.*
requests==2.31.*

2
requirements_dev.txt Normal file
View File

@ -0,0 +1,2 @@
mypy==1.5.*
ruff==0.0.*

View File

@ -1,14 +1,9 @@
"""
Webscaper of SS marketplace for GPUs
Author - Kristofers Solo
Licence - MIT
"""
from bs4 import BeautifulSoup
import requests
from bs4 import BeautifulSoup
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.97 Safari/537.36 Vivaldi/4.1.2369.21"}
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.97 Safari/537.36 Vivaldi/4.1.2369.21"
}
class SS:
@ -20,8 +15,7 @@ class SS:
soup = BeautifulSoup(page.content, "html.parser")
last_url = soup.find(class_="td2").findChild("a")["href"]
page_amount = last_url[last_url.find(
"page") + 4:last_url.find(".html")]
page_amount = last_url[last_url.find("page") + 4 : last_url.find(".html")]
print(f"Page amount = {page_amount}")
return int(page_amount)
@ -35,8 +29,7 @@ class SS:
soup = BeautifulSoup(page.content, "html.parser")
# item ids
ids = [tag["id"]
for tag in soup.select("tr[id]")] # creates list with ids
ids = [tag["id"] for tag in soup.select("tr[id]")] # creates list with ids
# removes "tr_bnr" elements from list
ids = [x for x in ids if "tr_bnr" not in x]
ids.remove("head_line") # removes first "head_line" id
@ -70,7 +63,7 @@ class SS:
# convert price back to string and add `€`
gpu[1] = str(gpu[1]) + ""
# transform 2D array to 1D
gpus_list[index] = (" - ".join(gpu))
gpus_list[index] = " - ".join(gpu)
return gpus_list
@ -79,12 +72,13 @@ gpus = SS("https://www.ss.com/lv/electronics/computers/completing-pc/video/sell"
def main():
"""Main funcion to test scraper"""
"""Funcion to test scraper"""
data = gpus.get_data()
message_size = 100
chunked_data = [data[i:i + message_size]
for i in range(0, len(data), message_size)]
chunked_data = [
data[i : i + message_size] for i in range(0, len(data), message_size)
]
for i in chunked_data:
print("\n".join(i))