mirror of
https://github.com/kristoferssolo/SScom-scraper.git
synced 2025-10-21 19:50:33 +00:00
Minor release update
Added `pyproject.toml` file.
This commit is contained in:
parent
cef627e196
commit
611a70ad68
2
.gitignore
vendored
2
.gitignore
vendored
@ -112,5 +112,3 @@ dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
config.json
|
||||
|
||||
14
README.md
14
README.md
@ -1,25 +1,23 @@
|
||||
# Webscraper
|
||||
|
||||
Webscraper of SS marketplace for GPUs
|
||||
|
||||
## Installation
|
||||
|
||||
```sh
|
||||
git clone https://github.com/kristoferssolo/SScom-scraper
|
||||
cd SScom-scraper
|
||||
pip install .
|
||||
```
|
||||
Or download [zip](https://github.com/kristoferssolo/SScom-scraper/archive/refs/heads/master.zip).
|
||||
Create `config.json` file with following content:
|
||||
|
||||
Add [Telegram bot API token](https://core.telegram.org/bots/tutorial#obtain-your-bot-token) to the `config.json` file.
|
||||
```json
|
||||
{
|
||||
"API_TOKEN": "<TOKEN_FROM_BOT_FATHER>"
|
||||
}
|
||||
```
|
||||
|
||||
Install required libraries:
|
||||
Run the bot.
|
||||
```sh
|
||||
pip install -r requirements.txt
|
||||
python main.py
|
||||
```
|
||||
|
||||
## Libraries used
|
||||
- [Aiogram](https://github.com/aiogram/aiogram)
|
||||
- [Beautiful Soup](https://www.crummy.com/software/BeautifulSoup/)
|
||||
|
||||
3
config.json
Normal file
3
config.json
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
"API_TOKEN": ""
|
||||
}
|
||||
14
main.py
Normal file → Executable file
14
main.py
Normal file → Executable file
@ -1,13 +1,10 @@
|
||||
"""
|
||||
Telegram bot for scraper
|
||||
Author - Kristofers Solo
|
||||
Licence - MIT
|
||||
"""
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from aiogram import Bot, Dispatcher, executor, types
|
||||
from scraper import gpus
|
||||
from SScom.scraper import gpus
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent
|
||||
|
||||
@ -30,8 +27,9 @@ async def gpu_price_message(message: types.Message):
|
||||
"""Returns all scraped GPUs and their prices to telegram"""
|
||||
data = gpus.get_data()
|
||||
message_size = 100
|
||||
chunked_data = [data[i:i + message_size]
|
||||
for i in range(0, len(data), message_size)]
|
||||
chunked_data = [
|
||||
data[i : i + message_size] for i in range(0, len(data), message_size)
|
||||
]
|
||||
|
||||
for i in chunked_data:
|
||||
await message.answer("\n".join(i))
|
||||
|
||||
47
pyproject.toml
Normal file
47
pyproject.toml
Normal file
@ -0,0 +1,47 @@
|
||||
[build-system]
|
||||
requires = ["setuptools>=61.0"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "SScom-scraper"
|
||||
version = "0.1.0"
|
||||
description = "Webscraper of SS marketplace for GPUs "
|
||||
authors = [{ name = "Kristofers Solo", email = "dev@kristofers.xyz" }]
|
||||
keywords = ["scraper", "telegram"]
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
license = { text = "GPLv3" }
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3:10",
|
||||
"Programming Language :: Python :: 3:11",
|
||||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
||||
"Operating System :: Linux",
|
||||
]
|
||||
dependencies = ["aiogram~=2.0", "beautifulsoup4~=4.12", "requests~=2.31"]
|
||||
|
||||
[project.urls]
|
||||
"Source" = "https://github.com/kristoferssolo/SScom-scraper"
|
||||
"Bug Tracker" = "https://github.com/kristoferssolo/SScom-scraper/issues"
|
||||
|
||||
|
||||
[tool.mypy]
|
||||
check_untyped_defs = true
|
||||
disallow_any_generics = true
|
||||
ignore_missing_imports = true
|
||||
mypy_path = "src"
|
||||
no_implicit_optional = true
|
||||
no_implicit_reexport = true
|
||||
show_error_codes = true
|
||||
strict_equality = true
|
||||
warn_redundant_casts = true
|
||||
warn_return_any = true
|
||||
warn_unreachable = true
|
||||
warn_unused_configs = true
|
||||
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 160
|
||||
|
||||
[tool.ruff.flake8-quotes]
|
||||
docstring-quotes = "double"
|
||||
@ -1,17 +1,3 @@
|
||||
aiogram==2.22.1
|
||||
aiohttp==3.8.3
|
||||
aiosignal==1.2.0
|
||||
async-timeout==4.0.2
|
||||
attrs==22.1.0
|
||||
Babel==2.9.1
|
||||
beautifulsoup4==4.11.1
|
||||
certifi==2022.9.24
|
||||
charset-normalizer==2.1.1
|
||||
frozenlist==1.3.1
|
||||
idna==3.4
|
||||
multidict==6.0.2
|
||||
pytz==2022.2.1
|
||||
requests==2.28.1
|
||||
soupsieve==2.3.2.post1
|
||||
urllib3==1.26.12
|
||||
yarl==1.8.1
|
||||
aiogram==2.0.*
|
||||
beautifulsoup4==4.12.*
|
||||
requests==2.31.*
|
||||
|
||||
2
requirements_dev.txt
Normal file
2
requirements_dev.txt
Normal file
@ -0,0 +1,2 @@
|
||||
mypy==1.5.*
|
||||
ruff==0.0.*
|
||||
@ -1,14 +1,9 @@
|
||||
"""
|
||||
Webscaper of SS marketplace for GPUs
|
||||
Author - Kristofers Solo
|
||||
Licence - MIT
|
||||
"""
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.97 Safari/537.36 Vivaldi/4.1.2369.21"}
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.97 Safari/537.36 Vivaldi/4.1.2369.21"
|
||||
}
|
||||
|
||||
|
||||
class SS:
|
||||
@ -20,8 +15,7 @@ class SS:
|
||||
soup = BeautifulSoup(page.content, "html.parser")
|
||||
|
||||
last_url = soup.find(class_="td2").findChild("a")["href"]
|
||||
page_amount = last_url[last_url.find(
|
||||
"page") + 4:last_url.find(".html")]
|
||||
page_amount = last_url[last_url.find("page") + 4 : last_url.find(".html")]
|
||||
print(f"Page amount = {page_amount}")
|
||||
|
||||
return int(page_amount)
|
||||
@ -35,8 +29,7 @@ class SS:
|
||||
soup = BeautifulSoup(page.content, "html.parser")
|
||||
|
||||
# item ids
|
||||
ids = [tag["id"]
|
||||
for tag in soup.select("tr[id]")] # creates list with ids
|
||||
ids = [tag["id"] for tag in soup.select("tr[id]")] # creates list with ids
|
||||
# removes "tr_bnr" elements from list
|
||||
ids = [x for x in ids if "tr_bnr" not in x]
|
||||
ids.remove("head_line") # removes first "head_line" id
|
||||
@ -70,7 +63,7 @@ class SS:
|
||||
# convert price back to string and add `€`
|
||||
gpu[1] = str(gpu[1]) + " €"
|
||||
# transform 2D array to 1D
|
||||
gpus_list[index] = (" - ".join(gpu))
|
||||
gpus_list[index] = " - ".join(gpu)
|
||||
|
||||
return gpus_list
|
||||
|
||||
@ -79,12 +72,13 @@ gpus = SS("https://www.ss.com/lv/electronics/computers/completing-pc/video/sell"
|
||||
|
||||
|
||||
def main():
|
||||
"""Main funcion to test scraper"""
|
||||
"""Funcion to test scraper"""
|
||||
data = gpus.get_data()
|
||||
|
||||
message_size = 100
|
||||
chunked_data = [data[i:i + message_size]
|
||||
for i in range(0, len(data), message_size)]
|
||||
chunked_data = [
|
||||
data[i : i + message_size] for i in range(0, len(data), message_size)
|
||||
]
|
||||
for i in chunked_data:
|
||||
print("\n".join(i))
|
||||
|
||||
Loading…
Reference in New Issue
Block a user