diff --git a/Dockerfile b/Dockerfile index e91880b..3038c76 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ COPY . . RUN apt-get update -y \ && apt-get install -y --no-install-recommends \ - pkg-config libssl-dev ca-certificates \ + pkg-config libssl-dev ca-certificates ffmpeg \ && rm -rf /var/lib/apt/lists/* RUN cargo build --release @@ -23,8 +23,9 @@ RUN useradd --create-home --shell /bin/bash app WORKDIR /home/app USER app -RUN uv tool install instaloader \ - && instaloader --version +RUN uv tool install instaloader yt-dlp \ + && instaloader --version \ + && yt-dlp --version COPY --from=builder /app/target/release/tg-relay-rs /usr/local/bin/tg-relay-rs diff --git a/src/download.rs b/src/download.rs new file mode 100644 index 0000000..ba262de --- /dev/null +++ b/src/download.rs @@ -0,0 +1,148 @@ +use crate::{ + error::{Error, Result}, + utils::{MediaKind, detect_media_kind_async, send_media_from_path}, +}; +use futures::{StreamExt, stream}; +use std::{path::PathBuf, process::Stdio}; +use teloxide::{Bot, types::ChatId}; +use tempfile::{TempDir, tempdir}; +use tokio::{fs::read_dir, process::Command}; + +/// `TempDir` guard + downloaded files. Keep this value alive until you're +/// done sending files so the temporary directory is not deleted. +#[derive(Debug)] +pub struct DownloadResult { + pub tempdir: TempDir, + pub files: Vec, +} + +/// Run a command in a freshly created temporary directory and collect +/// regular files produced there. +/// +/// `cmd` is the command name (e.g. "yt-dlp" or "instaloader"). +/// `args` are the command arguments (owned Strings so callers can build dynamic args). +/// +/// # Errors +/// +/// - `Error::Io` for filesystem / spawn errors (propagated). +/// - `Error::Other` for non-zero exit code (with stderr). +/// - `Error::NoMediaFound` if no files were produced. +#[allow(clippy::similar_names)] +async fn run_command_in_tempdir(cmd: &str, args: &[&str]) -> Result { + let tmp = tempdir().map_err(Error::from)?; + let cwd = tmp.path().to_path_buf(); + + let output = Command::new(cmd) + .current_dir(&cwd) + .args(args) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .output() + .await + .map_err(Error::from)?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string(); + return Err(Error::Other(format!("{cmd} failed: {stderr}"))); + } + + // collect files produced in tempdir (async) + let mut rd = read_dir(&cwd).await?; + let mut files = Vec::new(); + while let Some(entry) = rd.next_entry().await? { + if entry.file_type().await?.is_file() { + files.push(entry.path()); + } + } + + if files.is_empty() { + return Err(Error::NoMediaFound); + } + + Ok(DownloadResult { + tempdir: tmp, + files, + }) +} + +/// Download an Instagram shortcode using instaloader (wrapper). +/// +/// # Errors +/// +/// - Propagates `run_command_in_tempdir` errors. +pub async fn download_instaloader(shortcode: &str) -> Result { + let args = [ + "--no-metadata-json", + "--no-compress-json", + "--quiet", + "--", + &format!("-{shortcode}"), + ]; + run_command_in_tempdir("instaloader", &args).await +} + +/// Download a URL with yt-dlp. `format` can be "best" or a merged selector +/// like "bestvideo[ext=mp4]+bestaudio/best". +/// +/// # Errors +/// +/// - Propagates `run_command_in_tempdir` errors. +pub async fn download_ytdlp(url: &str, format: &str) -> Result { + let args = [ + "--no-playlist", + "-f", + format, + "--restrict-filenames", + "-o", + "%(id)s.%(ext)s", + url, + ]; + run_command_in_tempdir("yt-dlp", &args).await +} + +/// Post-process a `DownloadResult`. +/// +/// Detect media kinds (async), prefer video, then image, then call `send_media_from_path`. +/// Keeps the tempdir alive while sending because `DownloadResult` is passed by value. +/// +/// # Errors +/// +/// - Propagates `send_media_from_path` errors or returns NoMediaFound/UnknownMediaKind. +pub async fn process_download_result(bot: &Bot, chat_id: ChatId, dr: DownloadResult) -> Result<()> { + // detect kinds in parallel + let concurrency = 8; + let results = stream::iter(dr.files.into_iter().map(|path| async move { + let kind = detect_media_kind_async(&path).await; + match kind { + MediaKind::Unknown => None, + k => Some((path, k)), + } + })) + .buffer_unordered(concurrency) + .collect::>>() + .await; + + let mut media = results + .into_iter() + .flatten() + .collect::>(); + + if media.is_empty() { + return Err(Error::NoMediaFound); + } + + // deterministic ordering + media.sort_by_key(|(p, _)| p.clone()); + + // prefer video over image + if let Some((path, MediaKind::Video)) = media.iter().find(|(_, k)| *k == MediaKind::Video) { + return send_media_from_path(bot, chat_id, path.clone(), Some(MediaKind::Video)).await; + } + + if let Some((path, MediaKind::Image)) = media.iter().find(|(_, k)| *k == MediaKind::Image) { + return send_media_from_path(bot, chat_id, path.clone(), Some(MediaKind::Image)).await; + } + + Err(Error::NoMediaFound) +} diff --git a/src/error.rs b/src/error.rs index 3ba9cb1..36fcafe 100644 --- a/src/error.rs +++ b/src/error.rs @@ -6,7 +6,10 @@ pub enum Error { Io(#[from] tokio::io::Error), #[error("instaloader failed: {0}")] - InstaloaderFaileled(String), + InstaloaderFailed(String), + + #[error("yt-dpl failed: {0}")] + YTDLPFailed(String), #[error("no media found")] NoMediaFound, @@ -29,6 +32,16 @@ impl Error { pub fn other(text: impl Into) -> Self { Self::Other(text.into()) } + + #[inline] + pub fn instaloader_failed(text: impl Into) -> Self { + Self::InstaloaderFailed(text.into()) + } + + #[inline] + pub fn ytdlp_failed(text: impl Into) -> Self { + Self::YTDLPFailed(text.into()) + } } pub type Result = std::result::Result; diff --git a/src/handlers/instagram.rs b/src/handlers/instagram.rs index 5250dca..04e863d 100644 --- a/src/handlers/instagram.rs +++ b/src/handlers/instagram.rs @@ -1,15 +1,10 @@ -use crate::error::{Error, Result}; +use crate::download::{download_instaloader, process_download_result}; +use crate::error::Result; use crate::handlers::SocialHandler; -use crate::utils::{MediaKind, detect_media_kind_async, send_media_from_path}; -use futures::{StreamExt, stream}; use regex::Regex; -use std::path::PathBuf; -use std::{process::Stdio, sync::OnceLock}; +use std::sync::OnceLock; use teloxide::{Bot, types::ChatId}; -use tempfile::tempdir; -use tokio::fs::read_dir; -use tokio::process::Command; -use tracing::{error, info}; +use tracing::info; static SHORTCODE_RE: OnceLock = OnceLock::new(); @@ -48,77 +43,8 @@ impl SocialHandler for InstagramHandler { async fn handle(&self, bot: &Bot, chat_id: ChatId, shortcode: String) -> Result<()> { info!(handler = %self.name(), shortcode = %shortcode, "handling instagram code"); - let tmp = tempdir().map_err(Error::from)?; - let cwd = tmp.path().to_path_buf(); - let target = format!("-{shortcode}"); - - let status = Command::new("instaloader") - .current_dir(&cwd) - .args([ - "--dirname-pattern=.", - "--no-metadata-json", - "--no-compress-json", - "--quiet", - "--", - &target, - ]) - .stdin(Stdio::null()) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - .await - .map_err(Error::from)?; - - if !status.success() { - return Err(Error::InstaloaderFaileled(status.to_string())); - } - - let mut dir = read_dir(&cwd).await?; - let mut paths = Vec::new(); - - while let Some(entry) = dir.next_entry().await? { - if entry.file_type().await?.is_file() { - paths.push(entry.path()); - } - } - - let concurrency = 8; - let results = stream::iter(paths) - .map(|path| async move { - let kind = detect_media_kind_async(&path).await; - match kind { - MediaKind::Unknown => None, - k => Some((path, k)), - } - }) - .buffer_unordered(concurrency) - .collect::>>() - .await; - - let mut media = results - .into_iter() - .flatten() - .collect::>(); - - if media.is_empty() { - error!("no media found in tmp dir after instaloader"); - return Err(Error::NoMediaFound); - } - - // deterministic ordering - media.sort_by_key(|(p, _)| p.clone()); - - // prefer video over image - if let Some((path, MediaKind::Video)) = media.iter().find(|(_, k)| *k == MediaKind::Video) { - return send_media_from_path(bot, chat_id, path.clone(), Some(MediaKind::Video)).await; - } - - if let Some((path, MediaKind::Image)) = media.iter().find(|(_, k)| *k == MediaKind::Image) { - return send_media_from_path(bot, chat_id, path.clone(), Some(MediaKind::Image)).await; - } - - error!("no supported media kind found after scanning"); - Err(Error::NoMediaFound) + let dr = download_instaloader(&shortcode).await?; + process_download_result(bot, chat_id, dr).await } fn box_clone(&self) -> Box { diff --git a/src/handlers/mod.rs b/src/handlers/mod.rs index 9a47156..44c8013 100644 --- a/src/handlers/mod.rs +++ b/src/handlers/mod.rs @@ -1,4 +1,5 @@ mod instagram; +mod youtube; use crate::error::Result; use teloxide::{Bot, types::ChatId}; @@ -26,3 +27,4 @@ impl Clone for Box { } pub use instagram::InstagramHandler; +pub use youtube::YouTubeShortsHandler; diff --git a/src/handlers/youtube.rs b/src/handlers/youtube.rs new file mode 100644 index 0000000..fae1ce6 --- /dev/null +++ b/src/handlers/youtube.rs @@ -0,0 +1,55 @@ +use crate::{ + download::{download_ytdlp, process_download_result}, + error::Result, +}; +use regex::Regex; +use std::sync::OnceLock; +use teloxide::{Bot, types::ChatId}; +use tracing::info; + +use crate::handlers::SocialHandler; + +static SHORTCODE_RE: OnceLock = OnceLock::new(); + +fn shortcode_regex() -> &'static Regex { + SHORTCODE_RE.get_or_init(|| { + Regex::new( + r"https?://(?:www\.)?(?:youtube\.com/shorts/[A-Za-z0-9_-]+(?:\?[^\s]*)?|youtu\.be/[A-Za-z0-9_-]+(?:\?[^\s]*)?)", + ) + .expect("filed to compile regex") + }) +} + +/// Handler for `YouTube Shorts` (and short youtu.be links) +#[derive(Clone, Default)] +pub struct YouTubeShortsHandler; + +impl YouTubeShortsHandler { + #[inline] + #[must_use] + pub const fn new() -> Self { + Self + } +} + +#[async_trait::async_trait] +impl SocialHandler for YouTubeShortsHandler { + fn name(&self) -> &'static str { + "youtube" + } + + fn try_extract(&self, text: &str) -> Option { + shortcode_regex().find(text).map(|m| m.as_str().to_owned()) + } + + async fn handle(&self, bot: &Bot, chat_id: ChatId, url: String) -> Result<()> { + info!(handler = %self.name(), url = %url, "handling youtube code"); + let format = "bestvideo[ext=mp4]+bestaudio/best"; + let dr = download_ytdlp(&url, format).await?; + process_download_result(bot, chat_id, dr).await + } + + fn box_clone(&self) -> Box { + Box::new(self.clone()) + } +} diff --git a/src/lib.rs b/src/lib.rs index ecef7e5..e7f3b95 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ pub mod comments; +pub mod download; pub mod error; pub mod handlers; pub mod telemetry; diff --git a/src/main.rs b/src/main.rs index fae8664..31a64fb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use teloxide::{Bot, prelude::Requester, respond, types::Message}; use tg_relay_rs::{ comments::{Comments, init_global_comments}, - handlers::{InstagramHandler, SocialHandler}, + handlers::{InstagramHandler, SocialHandler, YouTubeShortsHandler}, telemetry::setup_logger, }; use tracing::{error, info, warn}; @@ -27,7 +27,8 @@ async fn main() -> color_eyre::Result<()> { let bot = Bot::from_env(); info!("bot starting"); - let handlers = vec![Arc::new(InstagramHandler)]; + let handlers: Vec> = + vec![Arc::new(InstagramHandler), Arc::new(YouTubeShortsHandler)]; teloxide::repl(bot.clone(), move |bot: Bot, msg: Message| { // clone the handlers vector into the closure