From a7e5d43423869929563ec5ced099eeebe2ab5982 Mon Sep 17 00:00:00 2001 From: Kristofers Solo Date: Fri, 19 Sep 2025 18:26:36 +0300 Subject: [PATCH] refactor(instagram): organize files --- Cargo.lock | 13 +++ Cargo.toml | 9 +- src/error.rs | 27 ++++++ src/handlers/instagram.rs | 127 +++++++++++++++++++++++++ src/handlers/mod.rs | 28 ++++++ src/lib.rs | 3 + src/main.rs | 195 +++++++------------------------------- src/utils.rs | 114 ++++++++++++++++++++++ 8 files changed, 354 insertions(+), 162 deletions(-) create mode 100644 src/error.rs create mode 100644 src/handlers/instagram.rs create mode 100644 src/handlers/mod.rs create mode 100644 src/utils.rs diff --git a/Cargo.lock b/Cargo.lock index a2b8145..e7bcd5a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -62,6 +62,17 @@ dependencies = [ "syn", ] +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -1689,8 +1700,10 @@ dependencies = [ name = "tg-relay-rs" version = "0.1.0" dependencies = [ + "async-trait", "color-eyre", "dotenv", + "futures", "infer", "once_cell", "regex", diff --git a/Cargo.toml b/Cargo.toml index 2626e0a..96ffe1f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,15 +6,22 @@ license = "MIT" edition = "2024" [dependencies] +async-trait = "0.1" color-eyre = "0.6" dotenv = "0.15" +futures = "0.3.31" infer = "0.19" once_cell = "1.21.3" regex = "1.11" teloxide = { version = "0.17", features = ["macros"] } tempfile = "3" thiserror = "2.0" -tokio = { version = "1", features = ["macros", "rt-multi-thread", "process"] } +tokio = { version = "1", features = [ + "macros", + "rt-multi-thread", + "process", + "fs", +] } tracing = "0.1" tracing-appender = "0.2" tracing-bunyan-formatter = { version = "0.3", default-features = false } diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..5ceecb8 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,27 @@ +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum Error { + #[error("io error: {0}")] + Io(#[from] std::io::Error), + + #[error("instaloader failed: {0}")] + InstaloaderFaileled(String), + + #[error("no media found")] + NoMediaFound, + + #[error("unknown media kind")] + UnknownMediaKind, + + #[error("teloxide error: {0}")] + Teloxide(#[from] teloxide::RequestError), + + #[error("join error: {0}")] + Join(#[from] tokio::task::JoinError), + + #[error("other: {0}")] + Other(String), +} + +pub type Result = std::result::Result; diff --git a/src/handlers/instagram.rs b/src/handlers/instagram.rs new file mode 100644 index 0000000..5250dca --- /dev/null +++ b/src/handlers/instagram.rs @@ -0,0 +1,127 @@ +use crate::error::{Error, Result}; +use crate::handlers::SocialHandler; +use crate::utils::{MediaKind, detect_media_kind_async, send_media_from_path}; +use futures::{StreamExt, stream}; +use regex::Regex; +use std::path::PathBuf; +use std::{process::Stdio, sync::OnceLock}; +use teloxide::{Bot, types::ChatId}; +use tempfile::tempdir; +use tokio::fs::read_dir; +use tokio::process::Command; +use tracing::{error, info}; + +static SHORTCODE_RE: OnceLock = OnceLock::new(); + +fn shortcode_regex() -> &'static Regex { + SHORTCODE_RE.get_or_init(|| { + Regex::new( + r"https?://(?:www\.)?(?:instagram\.com|instagr\.am)/(?:p|reel|tv)/([A-Za-z0-9_-]+)", + ) + .expect("filed to compile regex") + }) +} + +/// Handler for Instagram posts / reels / tv +#[derive(Clone, Default)] +pub struct InstagramHandler; + +impl InstagramHandler { + #[inline] + #[must_use] + pub const fn new() -> Self { + Self + } +} + +#[async_trait::async_trait] +impl SocialHandler for InstagramHandler { + fn name(&self) -> &'static str { + "instagram" + } + + fn try_extract(&self, text: &str) -> Option { + shortcode_regex() + .captures(text) + .and_then(|c| c.get(1).map(|m| m.as_str().to_owned())) + } + + async fn handle(&self, bot: &Bot, chat_id: ChatId, shortcode: String) -> Result<()> { + info!(handler = %self.name(), shortcode = %shortcode, "handling instagram code"); + let tmp = tempdir().map_err(Error::from)?; + let cwd = tmp.path().to_path_buf(); + let target = format!("-{shortcode}"); + + let status = Command::new("instaloader") + .current_dir(&cwd) + .args([ + "--dirname-pattern=.", + "--no-metadata-json", + "--no-compress-json", + "--quiet", + "--", + &target, + ]) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .await + .map_err(Error::from)?; + + if !status.success() { + return Err(Error::InstaloaderFaileled(status.to_string())); + } + + let mut dir = read_dir(&cwd).await?; + let mut paths = Vec::new(); + + while let Some(entry) = dir.next_entry().await? { + if entry.file_type().await?.is_file() { + paths.push(entry.path()); + } + } + + let concurrency = 8; + let results = stream::iter(paths) + .map(|path| async move { + let kind = detect_media_kind_async(&path).await; + match kind { + MediaKind::Unknown => None, + k => Some((path, k)), + } + }) + .buffer_unordered(concurrency) + .collect::>>() + .await; + + let mut media = results + .into_iter() + .flatten() + .collect::>(); + + if media.is_empty() { + error!("no media found in tmp dir after instaloader"); + return Err(Error::NoMediaFound); + } + + // deterministic ordering + media.sort_by_key(|(p, _)| p.clone()); + + // prefer video over image + if let Some((path, MediaKind::Video)) = media.iter().find(|(_, k)| *k == MediaKind::Video) { + return send_media_from_path(bot, chat_id, path.clone(), Some(MediaKind::Video)).await; + } + + if let Some((path, MediaKind::Image)) = media.iter().find(|(_, k)| *k == MediaKind::Image) { + return send_media_from_path(bot, chat_id, path.clone(), Some(MediaKind::Image)).await; + } + + error!("no supported media kind found after scanning"); + Err(Error::NoMediaFound) + } + + fn box_clone(&self) -> Box { + Box::new(self.clone()) + } +} diff --git a/src/handlers/mod.rs b/src/handlers/mod.rs new file mode 100644 index 0000000..9a47156 --- /dev/null +++ b/src/handlers/mod.rs @@ -0,0 +1,28 @@ +mod instagram; + +use crate::error::Result; +use teloxide::{Bot, types::ChatId}; + +#[async_trait::async_trait] +pub trait SocialHandler: Send + Sync { + /// Short name used for logging etc. + fn name(&self) -> &'static str; + + /// Try to extract a platform-specific identifier (shortcode, id, url) + /// from arbitrary text. Return `Some` if the handler should handle this message. + fn try_extract(&self, text: &str) -> Option; + + /// Do the heavy-lifting: fetch media and send to `chat_id`. + async fn handle(&self, bot: &Bot, chat_id: ChatId, id: String) -> Result<()>; + + /// Clone a boxed handler. + fn box_clone(&self) -> Box; +} + +impl Clone for Box { + fn clone(&self) -> Self { + self.box_clone() + } +} + +pub use instagram::InstagramHandler; diff --git a/src/lib.rs b/src/lib.rs index 304af1e..6e43309 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,4 @@ +pub mod error; +pub mod handlers; pub mod telemetry; +pub mod utils; diff --git a/src/main.rs b/src/main.rs index e41933c..cfacce7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,179 +1,52 @@ -mod telemetry; - -use crate::telemetry::setup_logger; -use color_eyre::{ - Result, - eyre::{Context, eyre}, -}; use dotenv::dotenv; -use regex::Regex; -use std::{ - fs::{File, read_dir}, - io::Read, - path::Path, +use std::sync::Arc; +use teloxide::{Bot, prelude::Requester, respond, types::Message}; +use tg_relay_rs::{ + handlers::{InstagramHandler, SocialHandler}, + telemetry::setup_logger, }; -use teloxide::{ - Bot, - prelude::Requester, - respond, - types::{ChatId, InputFile, Message}, -}; -use tempfile::tempdir; -use tokio::process::Command; -use tracing::error; - -static VIDEO_EXTS: &[&str] = &["mp4", "webm"]; -static IMAGE_EXTS: &[&str] = &["jpg", "jpeg", "png"]; - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum MediaKind { - Video, - Image, - Unknown, -} +use tracing::{error, info}; #[tokio::main] async fn main() -> color_eyre::Result<()> { dotenv().ok(); - color_eyre::install()?; + color_eyre::install().expect("color-eyre install"); setup_logger(); let bot = Bot::from_env(); + info!("bot starting"); - teloxide::repl(bot, |bot: Bot, msg: Message| async move { - if let Some(text) = msg.text() - && let Some(shortcode) = extract_instagram_shortcode(text) - { - let bot_cloned = bot.clone(); - let chat = msg.chat.id; + let handlers = vec![Arc::new(InstagramHandler)]; - tokio::spawn(async move { - if let Err(e) = fetch_and_send(&bot_cloned, chat, &shortcode).await { - error!("error fetching/sending: {:?}", e); - let _ = bot_cloned - .send_message(chat, "Failed to fetch Instagram media.") - .await; + teloxide::repl(bot.clone(), move |bot: Bot, msg: Message| { + // clone the handlers vector into the closure + let handlers = handlers.clone(); + async move { + if let Some(text) = msg.text() { + for handler in handlers { + if let Some(id) = handler.try_extract(text) { + let handler = handler.clone(); + let bot_for_task = bot.clone(); + let chat = msg.chat.id; + + tokio::spawn(async move { + if let Err(err) = handler.handle(&bot_for_task, chat, id).await { + error!(%err, "handler failed"); + + let _ = bot_for_task + .send_message(chat, "Failed to fetch media.") + .await; + } + }); + // if one handler matcher, stop checking + break; + } } - }); + } + respond(()) } - respond(()) }) .await; Ok(()) } - -fn extract_instagram_shortcode(text: &str) -> Option { - let re = Regex::new( - r"https?://(?:www\.)?(?:instagram\.com|instagr\.am)/(?:p|reel|tv)/([A-Za-z0-9_-]+)", - ) - .unwrap(); - re.captures(text) - .and_then(|cap| cap.get(1).map(|m| m.as_str().to_string())) -} - -async fn fetch_and_send(bot: &Bot, chat_id: ChatId, shortcode: &str) -> Result<()> { - let dir = tempdir().context("create tempdir")?; - let dir_path = dir.path().to_path_buf(); - - dbg!(&dir_path); - let target = format!("-{shortcode}"); - dbg!(&target); - let status = Command::new("instaloader") - .arg("--dirname-pattern") - .arg(dir_path.to_string_lossy().as_ref()) - .arg("--no-metadata-json") - .arg("--no-compress-json") - .arg("--quiet") - .arg("--") - .arg(&target) - .status() - .await - .context("runnning instaloader")?; - - if !status.success() { - error!("instaloader exit: {:?}", status); - return Err(eyre!("instaloader failed")); - } - let mut media_files = Vec::new(); - - for entry in read_dir(&dir_path)? { - let p = entry?.path(); - if p.is_file() { - let ext = p.extension().and_then(|s| s.to_str()).unwrap_or(""); - if matches!(ext, "jpg" | "jpeg" | "mp4" | "webm") { - media_files.push(p); - } - } - } - - if media_files.is_empty() { - return Err(eyre!("no media found")); - } - - dbg!(&media_files); - - if let Some(video_path) = media_files.iter().find(|p| is_video(p)) { - let input = InputFile::file(video_path.clone()); - bot.send_video(chat_id, input).await?; - return Ok(()); - } - - if let Some(image_path) = media_files.iter().find(|p| is_image(p)) { - let input = InputFile::file(image_path.clone()); - bot.send_photo(chat_id, input).await?; - return Ok(()); - } - - bot.send_message(chat_id, "No supported media found") - .await?; - - Ok(()) -} - -fn ext_lower(path: &Path) -> Option { - path.extension() - .and_then(|s| s.to_str()) - .map(str::to_ascii_lowercase) -} - -fn kind_by_magic(path: &Path) -> Option { - let mut f = File::open(path).ok()?; - let mut buf = [0u8; 8192]; - - let n = f.read(&mut buf).ok()?; - if n == 0 { - return None; - } - - if let Some(kind) = infer::get(&buf[..n]) { - let mt = kind.mime_type(); - if mt.starts_with("video/") { - return Some(MediaKind::Video); - } - if mt.starts_with("image/") { - return Some(MediaKind::Image); - } - } - None -} - -fn detect_media_kind(path: &Path) -> MediaKind { - if let Some(ext) = ext_lower(path) { - if VIDEO_EXTS.iter().any(|e| e.eq_ignore_ascii_case(&ext)) { - return MediaKind::Video; - } - if IMAGE_EXTS.iter().any(|e| e.eq_ignore_ascii_case(&ext)) { - return MediaKind::Image; - } - } - kind_by_magic(path).unwrap_or(MediaKind::Unknown) -} - -fn is_video(path: &Path) -> bool { - detect_media_kind(path) == MediaKind::Video -} - -fn is_image(path: &Path) -> bool { - detect_media_kind(path) == MediaKind::Image -} diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..468a420 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,114 @@ +use crate::error::{Error, Result}; +use std::{ + ffi::OsStr, + path::{Path, PathBuf}, +}; +use teloxide::{ + Bot, + prelude::Requester, + types::{ChatId, InputFile}, +}; +use tokio::{fs::File, io::AsyncReadExt}; + +/// Simple media kind enum shared by handlers. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MediaKind { + Video, + Image, + Unknown, +} + +static VIDEO_EXTS: &[&str] = &["mp4", "webm", "mov", "mkv", "avi"]; +static IMAGE_EXTS: &[&str] = &["jpg", "jpeg", "png", "webp"]; + +/// Detect media kind first by extension, then by content/magic (sync). +/// NOTE: `infer::get_from_path` is blocking — use `detect_media_kind_async` in +/// async contexts to avoid blocking the Tokio runtime. +pub fn detect_media_kind(path: &Path) -> MediaKind { + if let Some(ext) = path.extension().and_then(OsStr::to_str) { + if VIDEO_EXTS.iter().any(|e| e.eq_ignore_ascii_case(ext)) { + return MediaKind::Video; + } + if IMAGE_EXTS.iter().any(|e| e.eq_ignore_ascii_case(ext)) { + return MediaKind::Image; + } + } + + if let Ok(Some(kind)) = infer::get_from_path(path) { + let mt = kind.mime_type(); + if mt.starts_with("video/") { + return MediaKind::Video; + } + if mt.starts_with("image/") { + return MediaKind::Image; + } + } + + MediaKind::Unknown +} + +/// Async/non-blocking detection: check extension first, otherwise read a small +/// sample asynchronously and run `infer::get` on the buffer. +pub async fn detect_media_kind_async(path: &Path) -> MediaKind { + if let Some(ext) = path.extension().and_then(OsStr::to_str) { + if VIDEO_EXTS.iter().any(|e| e.eq_ignore_ascii_case(ext)) { + return MediaKind::Video; + } + if IMAGE_EXTS.iter().any(|e| e.eq_ignore_ascii_case(ext)) { + return MediaKind::Image; + } + } + + // Read a small prefix (8 KiB) asynchronously and probe + if let Ok(mut f) = File::open(path).await { + let mut buf = vec![0u8; 8192]; + match f.read(&mut buf).await { + Ok(n) if n > 0 => { + buf.truncate(n); + if let Some(k) = infer::get(&buf) { + let mt = k.mime_type(); + if mt.starts_with("video/") { + return MediaKind::Video; + } + if mt.starts_with("image/") { + return MediaKind::Image; + } + } + } + _ => {} + } + } + + MediaKind::Unknown +} + +/// Given a path, send it to chat as photo or video depending on detected kind. +/// +/// # Errors +/// +/// Returns an error if sending fails or the media kind is unknown. +pub async fn send_media_from_path( + bot: &Bot, + chat_id: ChatId, + path: PathBuf, + kind: Option, +) -> Result<()> { + let kind = kind.unwrap_or_else(|| detect_media_kind(&path)); + match kind { + MediaKind::Video => { + let video = InputFile::file(path); + bot.send_video(chat_id, video).await.map_err(Error::from)?; + } + MediaKind::Image => { + let photo = InputFile::file(path); + bot.send_photo(chat_id, photo).await.map_err(Error::from)?; + } + MediaKind::Unknown => { + bot.send_message(chat_id, "No supported media found") + .await + .map_err(Error::from)?; + return Err(Error::UnknownMediaKind); + } + } + Ok(()) +}