From af7e3bc97b8fd3cd8c328b8f92deca49af304478 Mon Sep 17 00:00:00 2001 From: Matthieu Bessat Date: Sun, 18 Dec 2022 19:25:18 +0100 Subject: [PATCH] WIP init sites --- Cargo.lock | 31 +++++++ Cargo.toml | 6 +- src/init_site.rs | 218 ++++++++++++++++++++++++++++++++++++++++++----- src/main.rs | 111 +++++++++++------------- src/models.rs | 85 ++++++++++++++++++ src/post.rs | 0 src/utils.rs | 57 +++++++++++++ 7 files changed, 424 insertions(+), 84 deletions(-) create mode 100644 src/models.rs create mode 100644 src/post.rs diff --git a/Cargo.lock b/Cargo.lock index 2cb6abc..a5a1446 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -290,6 +290,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -302,6 +313,7 @@ version = "0.1.0" dependencies = [ "actix-web", "chrono", + "env_logger", "git2", "log", "sea-orm", @@ -682,6 +694,19 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "env_logger" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + [[package]] name = "event-listener" version = "2.5.3" @@ -987,6 +1012,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "iana-time-zone" version = "0.1.53" diff --git a/Cargo.toml b/Cargo.toml index fdb116c..73d00f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,9 @@ actix-web = "^4.2" sea-orm = { version = "^0", features = [ "sqlx-sqlite", "runtime-actix-native-tls", "macros" ] } serde = { version = "^1", features = [ "derive" ] } toml = { version = "^0.5" } -chrono = { version = "^0.4", features = [ "unstable-locales" ] } +chrono = { version = "^0.4", features = [ "unstable-locales", "clock" ] } git2 = { version = "0.15" } -log = { version = "^0.4" } + +log = "^0.4" +env_logger = "^0.9" diff --git a/src/init_site.rs b/src/init_site.rs index 1282cf2..9436fca 100644 --- a/src/init_site.rs +++ b/src/init_site.rs @@ -1,15 +1,24 @@ -use git2::{Repository, Error as Git2Error, Remote}; +use git2::{Repository, Error as Git2Error, Remote, RepositoryState}; use git2::string_array::StringArray; -use super::{SiteConfig, Site}; -use std::path::Path; +use super::models::{SiteConfig, SiteContentBindingConfig, Site, SiteSection, Posting}; +use std::path::{PathBuf, Path}; +use std::ffi::{OsString, CString}; +use std::fs; +use std::io; +use chrono::prelude::{Utc}; -use crate::utils::{unwrap_opt_or_return, unwrap_or_return}; +use crate::utils::{ + unwrap_opt_or_return, + unwrap_or_return, + os_str_to_str, + bytes2path +}; use log::error; use log::info; use log::warn; -const REPOSITORIES_CONTAINER_PATH: &str = "./tmp/repositories"; +const REPOSITORIES_CONTAINER_PATH: &str = "tmp/repositories"; #[derive(Debug)] struct DetailledRemote { @@ -23,7 +32,7 @@ pub enum DetailledRemotesErr { CannotGetRemoteUrl } -fn get_detailled_remotes(repo_handle: &Repository) -> Result, DetailledRemotesErr> { +fn get_repository_detailled_remotes(repo_handle: &Repository) -> Result, DetailledRemotesErr> { let raw_remotes: StringArray = match repo_handle.remotes() { Ok(res) => res, Err(err) => { @@ -50,47 +59,173 @@ fn get_detailled_remotes(repo_handle: &Repository) -> Result Result { + debug!("get_repository_path {:?}", path); + let index = unwrap_or_return!( + repo_handle.index(), + |e| GetRepositoryPathErr::CannotGetIndex(e) + ); + let file = unwrap_opt_or_return!( + index.get_path(path, 0), + GetRepositoryPathErr::NotFound + ); + let path = bytes2path(&file.path); + Ok(GitFile { path: path.to_path_buf() }) +} + +#[derive(Debug)] +pub enum VerifyRepositoryPathErr { + CannotGetIndex(Git2Error), + NotFound, + NotCheckedOut +} + +fn verify_repository_path_and_checked_out(repo_handle: &Repository, path: &Path) -> Result<(), VerifyRepositoryPathErr> { + match get_repository_path(repo_handle, path) { + Ok(_res) => (), + Err(GetRepositoryPathErr::NotFound) => return Err(VerifyRepositoryPathErr::NotFound), + Err(GetRepositoryPathErr::CannotGetIndex(e)) => return Err(VerifyRepositoryPathErr::CannotGetIndex(e)) + }; + // we expect to have a non-bare repository, safe to unwrap + let git_workdir = repo_handle.workdir().unwrap(); + if !git_workdir.join(path).exists() { + return Err(VerifyRepositoryPathErr::NotCheckedOut); + } + Ok(()) +} + +#[derive(Debug)] +pub enum LsFilesErr { + CannotGetIndex(Git2Error) +} + +fn ls_files_repository(repo_handle: &Repository) -> Result, LsFilesErr> { + // check if there is a config.toml + let index = unwrap_or_return!(repo_handle.index(), |e| LsFilesErr::CannotGetIndex(e)); + // dbg!(index); + if index.is_empty() { + return Ok(vec![]); + } + + let mut files: Vec = vec![]; + index.iter().for_each(|file| { + let path = bytes2path(&file.path); + files.push(GitFile { path: path.to_path_buf() }); + }); + + Ok(files) +} + + +fn scan_section_dir(content_dir_path: &Path, section_conf: &SiteContentBindingConfig) -> Result { + let section_dir = content_dir_path.join(§ion_conf.slug); + debug!("Scanning section dir {:?}...", §ion_dir); + // check that directory exists + if !section_dir.exists() { + // note: converting PathBuf into String can fail if there is a non-unicode char + error!( + "Invalid binding: cannot find section directory {}", + os_str_to_str!(section_dir, InitSiteErr::NonUnicodePath) + ); + return Err(InitSiteErr::InvalidContentBinding); + } + + // scan the section dir for articles + // each directory name inside the section dir is considered an article slug + // then look inside each dir and check for index.md file + // then parse the yaml frontmatter header + + let mut postings: Vec = vec![]; + let entries = unwrap_or_return!(fs::read_dir(section_dir), |e| InitSiteErr::IoError(e)); + for entry_res in entries { + let entry: fs::DirEntry = unwrap_or_return!(entry_res, |e| InitSiteErr::IoError(e)); + + let entry_type: fs::FileType = unwrap_or_return!(entry.file_type(), |e| InitSiteErr::IoError(e)); + if !entry_type.is_dir() { + continue; + } + let slug = entry.path(); + + postings.push(Posting { + kind: section_conf.posting_kind.clone(), + slug: os_str_to_str!( + slug.file_name().unwrap(), + InitSiteErr::NonUnicodePath + ), + title: "title".to_string(), + created_at: Utc::now() + }) + } + return Ok(SiteSection { + slug: section_conf.slug.clone(), + postings + }) +} + + #[derive(Debug)] pub enum InitSiteErr { RepositoryCloneErr(Git2Error), ExistingRepositoryInvalid(Git2Error), - ExistingRepositoryCannotGetRemotes(DetailledRemotesErr), - ExistingRepositoryInvalidRemoteLayout, - ExistingRepositoryInvalidMainRemoteUrl + CannotIndexRepository(LsFilesErr), + CannotGetRemotes(DetailledRemotesErr), + InvalidRemoteLayout, + InvalidMainRemoteUrl, + InvalidExistingRepository(Git2Error), + HugoConfigFileNotFound, + EmptyRepository, + RepositoryNotClean, + InvalidContentBinding, + IoError(io::Error), + NonUnicodePath } -pub fn init_site(site_conf: SiteConfig) -> Result { +pub fn init_site(site_conf: &SiteConfig) -> Result { let remote_url = &site_conf.git_remote_url; // check if the path exists // check if the path contain an actual repository refering to the same remote url // clone the repo // check if the cloned repo contain an actual hugo website (config.toml must be presents) - // try to find the binding in the `content` directory, check that the stru + // try to find the binding in the `content` directory provided by site config + // scan the existing content for markdown files and call the scan_markdown function + // construct the struct that represent the current state of the site let clone_destination: String = format!("{REPOSITORIES_CONTAINER_PATH}/{0}", site_conf.slug); - let repo = if !Path::new(&clone_destination).exists() { - match Repository::clone(&remote_url, clone_destination) { + let repo_path = Path::new(&clone_destination); + let repo = if !repo_path.exists() { + // do a narrow clone + match Repository::clone(&remote_url, &clone_destination) { Ok(repo) => repo, Err(e) => { return Err(InitSiteErr::RepositoryCloneErr(e)) } } } else { - match Repository::open(clone_destination) { + match Repository::open(&clone_destination) { Ok(repo) => { let remotes_details = - unwrap_or_return!(get_detailled_remotes(&repo), |e| InitSiteErr::ExistingRepositoryCannotGetRemotes(e)); + unwrap_or_return!(get_repository_detailled_remotes(&repo), |e| InitSiteErr::CannotGetRemotes(e)); // make sure that the origin remote is present and match the configured remote url match remotes_details.iter().find(|r| r.slug == "origin") { None => { - return Err(InitSiteErr::ExistingRepositoryInvalidRemoteLayout) + return Err(InitSiteErr::InvalidRemoteLayout) }, Some(DetailledRemote { url, .. }) if url != remote_url => { - dbg!(url); - dbg!(remote_url); - return Err(InitSiteErr::ExistingRepositoryInvalidMainRemoteUrl); + return Err(InitSiteErr::InvalidMainRemoteUrl); } _ => () } @@ -98,13 +233,52 @@ pub fn init_site(site_conf: SiteConfig) -> Result { repo }, Err(e) => { - return Err(InitSiteErr::ExistingRepositoryInvalid(e)) + return Err(InitSiteErr::InvalidExistingRepository(e)) } } }; - dbg!(&repo.state()); + if repo.state() != RepositoryState::Clean { + return Err(InitSiteErr::RepositoryNotClean) + } + + // check if there is any files + let files_list = unwrap_or_return!( + ls_files_repository(&repo), + |e| InitSiteErr::CannotIndexRepository(e) + ); + if files_list.is_empty() { + return Err(InitSiteErr::EmptyRepository); + } + dbg!(&files_list); + + // if !files_list.iter().any(|f| f.path.ends_with("config.toml")) { + // return Err(InitSiteErr::HugoConfigFileNotFound); + // } + + // check hugo config file + match verify_repository_path_and_checked_out(&repo, Path::new("config.toml")) { + Ok(()) => (), + Err(VerifyRepositoryPathErr::NotFound | VerifyRepositoryPathErr::NotCheckedOut) => + return Err(InitSiteErr::HugoConfigFileNotFound), + Err(VerifyRepositoryPathErr::CannotGetIndex(e)) => return Err(InitSiteErr::InvalidExistingRepository(e)) + }; + + let mut sections: Vec = vec![]; + let content_dir = repo_path.join("content"); + for content_binding in &site_conf.content_bindings { + sections.push(scan_section_dir(&content_dir, content_binding)?); + // match scan_section_dir(&content_dir, content_binding) { + // Ok(section) => { + // sections.push(section); + // }, + // Err(e) => return Err(e) + // } + } + + Ok(Site { - config: site_conf + sections }) + } diff --git a/src/main.rs b/src/main.rs index f2bd2d0..c17a171 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,20 +1,16 @@ +#[macro_use] +extern crate log; + +mod models; mod init_site; +mod post; mod utils; use actix_web::{get, post, web, App, HttpResponse, HttpServer, Responder}; -use chrono::prelude::{DateTime, Utc}; -use serde::Deserialize; use toml; use std::fs; - -#[derive(Debug)] -struct Post { - id: u64, - title: String, - description: String, - content: String, - created_at: DateTime -} +use std::process::ExitCode; +use models::{Config}; #[get("/")] @@ -36,67 +32,62 @@ async fn echo(req_body: String) -> impl Responder { struct AppState { } -#[derive(Default, Clone, Deserialize, Debug)] -struct ServerConfig { - host: Option, - port: Option -} +fn main() -> ExitCode { + env_logger::init(); + info!("Starting a hugotator instance..."); -#[derive(Clone, Deserialize, Debug)] -enum PostingKind { - /// a micro-bloging kind of post (less than 80 words) - Micro, - - /// a full article (more than 80 words) - Article, -} - -/// A hugo directory under `content` -#[derive(Clone, Deserialize, Debug)] -struct SiteContentBindingConfig { - slug: String, - posting_kind: PostingKind -} - -#[derive(Clone, Deserialize, Debug)] -pub struct SiteConfig { - slug: String, // for example "werobot_blog" - git_remote_url: String, - content_path: String, - content_bindings: Vec -} - -#[derive(Debug)] -pub struct Site { - config: SiteConfig -} - -#[derive(Clone, Deserialize, Debug)] -struct Config { - server: Option, - sites: Vec, -} - - -#[actix_web::main] -async fn main() -> std::io::Result<()> { - let config: Config = toml::from_str( - &fs::read_to_string("./tmp/config.example.toml")? - )?; + let config_str = match fs::read_to_string("./tmp/config.example.toml") { + Ok(res) => res, + Err(e) => { + error!("Cannot read the server config file."); + return ExitCode::FAILURE; + } + }; + let config: Config = match toml::from_str(&config_str) { + Ok(res) => res, + Err(e) => { + error!("Invalid TOML server config file."); + error!("{:?}", e); + return ExitCode::FAILURE; + } + }; dbg!(&config); // initialize all the sites for site_conf in config.sites { - println!("Initializing site {}...", site_conf.slug); - let site_res = init_site::init_site(site_conf); - dbg!(site_res); + info!("Initializing site {:?}..", &site_conf.slug); + let site_initialized = match init_site::init_site(&site_conf) { + Ok(res) => res, + Err(e) => { + error!("Cannot initialize site"); + debug!("{:?}", e); + return ExitCode::FAILURE + } + }; + info!("Site {:?} initialized.", &site_conf.slug); + debug!("{:#?}", site_initialized); } let bind_config = ( config.server.as_ref().and_then(|sc| sc.host.clone()).unwrap_or_else(|| "127.0.0.1".to_string()), config.server.as_ref().and_then(|sc| sc.port).unwrap_or(6968), ); + + match actix_web_main(bind_config) { + Ok(_) => (), + Err(e) => { + error!("Failed to start actix web main: {:?}", e); + return ExitCode::FAILURE; + } + }; + + return ExitCode::SUCCESS; +} + + +#[actix_web::main] +async fn actix_web_main(bind_config: (String, u16)) -> std::io::Result<()> { HttpServer::new(|| { App::new() .service(hello) diff --git a/src/models.rs b/src/models.rs new file mode 100644 index 0000000..f25f20d --- /dev/null +++ b/src/models.rs @@ -0,0 +1,85 @@ +use serde::Deserialize; +use chrono::prelude::{DateTime, Utc}; +use crate::utils::pub_fields; + +pub_fields! { + #[derive(Debug)] + struct Post { + id: u64, + title: String, + description: String, + content: String, + created_at: DateTime + } +} + + +pub_fields! { + #[derive(Default, Clone, Deserialize, Debug)] + struct ServerConfig { + host: Option, + port: Option + } +} + + +#[derive(Clone, Deserialize, Debug)] +pub enum PostingKind { + /// a micro-bloging kind of post (less than 80 words) + Micro, + + /// a full article (more than 80 words) + Article, +} + +pub_fields! { + /// A hugo directory under `content` + #[derive(Clone, Deserialize, Debug)] + struct SiteContentBindingConfig { + slug: String, + posting_kind: PostingKind + } +} + +pub_fields! { + #[derive(Clone, Deserialize, Debug)] + struct SiteConfig { + slug: String, // for example "werobot_blog" + git_remote_url: String, + content_path: String, + content_bindings: Vec + } +} + +pub_fields! { + #[derive(Debug)] + struct Posting { + kind: PostingKind, + slug: String, + title: String, + created_at: DateTime + } +} + +pub_fields! { + #[derive(Debug)] + struct SiteSection { + slug: String, + postings: Vec + } +} + +pub_fields! { + #[derive(Debug)] + struct Site { + sections: Vec + } +} + +pub_fields! { + #[derive(Clone, Deserialize, Debug)] + struct Config { + server: Option, + sites: Vec, + } +} diff --git a/src/post.rs b/src/post.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/utils.rs b/src/utils.rs index 70de2c8..c01d944 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,3 +1,22 @@ +use std::path::{Component, Path, PathBuf}; +use std::ffi::{CString, OsStr}; + +macro_rules! pub_fields { + { + $(#[doc = $doc:expr])? + $(#[derive($($macros:tt)*)])* + struct $name:ident { + $($field:ident: $t:ty$(,)?)* + } + } => { + $(#[derive($($macros)*)])* + pub struct $name { + $(pub $field: $t),* + } + } +} +pub(crate) use pub_fields; + macro_rules! unwrap_or_return { ( $to_match: expr, $err_func: expr ) => { match $to_match { @@ -22,3 +41,41 @@ macro_rules! unwrap_opt_or_return { } } pub(crate) use unwrap_opt_or_return; + +// macro_rules! os_str_to_str { +// ( $e: expr, $err: expr ) => { +// match $e { +// OsStr => $e.to_str().unwrap().to_string(), +// PathBuf => match $e.into_os_string().into_string() { +// Ok(res) => res.to_string(), +// Err(_) => { +// return Err($err) +// } +// } +// } +// }; +// } +macro_rules! os_str_to_str { + ( $e: expr, $err: expr ) => { + match $e.to_str() { + Some(r) => r.to_string(), + None => { + return Err($err) + } + } + } +} +pub(crate) use os_str_to_str; + +#[cfg(unix)] +pub fn bytes2path(b: &[u8]) -> &Path { + use std::os::unix::prelude::*; + Path::new(OsStr::from_bytes(b)) +} + +#[cfg(windows)] +pub fn bytes2path(b: &[u8]) -> &Path { + use std::str; + Path::new(str::from_utf8(b).unwrap()) +} +