WIP init sites

This commit is contained in:
Matthieu Bessat 2022-12-18 19:25:18 +01:00
parent a99dfefeb3
commit af7e3bc97b
7 changed files with 424 additions and 84 deletions

31
Cargo.lock generated
View file

@ -290,6 +290,17 @@ dependencies = [
"num-traits", "num-traits",
] ]
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]] [[package]]
name = "autocfg" name = "autocfg"
version = "1.1.0" version = "1.1.0"
@ -302,6 +313,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"chrono", "chrono",
"env_logger",
"git2", "git2",
"log", "log",
"sea-orm", "sea-orm",
@ -682,6 +694,19 @@ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]]
name = "env_logger"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7"
dependencies = [
"atty",
"humantime",
"log",
"regex",
"termcolor",
]
[[package]] [[package]]
name = "event-listener" name = "event-listener"
version = "2.5.3" version = "2.5.3"
@ -987,6 +1012,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
[[package]]
name = "humantime"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]] [[package]]
name = "iana-time-zone" name = "iana-time-zone"
version = "0.1.53" version = "0.1.53"

View file

@ -11,7 +11,9 @@ actix-web = "^4.2"
sea-orm = { version = "^0", features = [ "sqlx-sqlite", "runtime-actix-native-tls", "macros" ] } sea-orm = { version = "^0", features = [ "sqlx-sqlite", "runtime-actix-native-tls", "macros" ] }
serde = { version = "^1", features = [ "derive" ] } serde = { version = "^1", features = [ "derive" ] }
toml = { version = "^0.5" } toml = { version = "^0.5" }
chrono = { version = "^0.4", features = [ "unstable-locales" ] } chrono = { version = "^0.4", features = [ "unstable-locales", "clock" ] }
git2 = { version = "0.15" } git2 = { version = "0.15" }
log = { version = "^0.4" }
log = "^0.4"
env_logger = "^0.9"

View file

@ -1,15 +1,24 @@
use git2::{Repository, Error as Git2Error, Remote}; use git2::{Repository, Error as Git2Error, Remote, RepositoryState};
use git2::string_array::StringArray; use git2::string_array::StringArray;
use super::{SiteConfig, Site}; use super::models::{SiteConfig, SiteContentBindingConfig, Site, SiteSection, Posting};
use std::path::Path; use std::path::{PathBuf, Path};
use std::ffi::{OsString, CString};
use std::fs;
use std::io;
use chrono::prelude::{Utc};
use crate::utils::{unwrap_opt_or_return, unwrap_or_return}; use crate::utils::{
unwrap_opt_or_return,
unwrap_or_return,
os_str_to_str,
bytes2path
};
use log::error; use log::error;
use log::info; use log::info;
use log::warn; use log::warn;
const REPOSITORIES_CONTAINER_PATH: &str = "./tmp/repositories"; const REPOSITORIES_CONTAINER_PATH: &str = "tmp/repositories";
#[derive(Debug)] #[derive(Debug)]
struct DetailledRemote { struct DetailledRemote {
@ -23,7 +32,7 @@ pub enum DetailledRemotesErr {
CannotGetRemoteUrl CannotGetRemoteUrl
} }
fn get_detailled_remotes(repo_handle: &Repository) -> Result<Vec<DetailledRemote>, DetailledRemotesErr> { fn get_repository_detailled_remotes(repo_handle: &Repository) -> Result<Vec<DetailledRemote>, DetailledRemotesErr> {
let raw_remotes: StringArray = match repo_handle.remotes() { let raw_remotes: StringArray = match repo_handle.remotes() {
Ok(res) => res, Ok(res) => res,
Err(err) => { Err(err) => {
@ -50,47 +59,173 @@ fn get_detailled_remotes(repo_handle: &Repository) -> Result<Vec<DetailledRemote
Ok(detailled_remotes) Ok(detailled_remotes)
} }
#[derive(Debug)]
pub struct GitFile {
path: PathBuf
}
#[derive(Debug)]
pub enum GetRepositoryPathErr {
CannotGetIndex(Git2Error),
NotFound
}
fn get_repository_path(repo_handle: &Repository, path: &Path) -> Result<GitFile, GetRepositoryPathErr> {
debug!("get_repository_path {:?}", path);
let index = unwrap_or_return!(
repo_handle.index(),
|e| GetRepositoryPathErr::CannotGetIndex(e)
);
let file = unwrap_opt_or_return!(
index.get_path(path, 0),
GetRepositoryPathErr::NotFound
);
let path = bytes2path(&file.path);
Ok(GitFile { path: path.to_path_buf() })
}
#[derive(Debug)]
pub enum VerifyRepositoryPathErr {
CannotGetIndex(Git2Error),
NotFound,
NotCheckedOut
}
fn verify_repository_path_and_checked_out(repo_handle: &Repository, path: &Path) -> Result<(), VerifyRepositoryPathErr> {
match get_repository_path(repo_handle, path) {
Ok(_res) => (),
Err(GetRepositoryPathErr::NotFound) => return Err(VerifyRepositoryPathErr::NotFound),
Err(GetRepositoryPathErr::CannotGetIndex(e)) => return Err(VerifyRepositoryPathErr::CannotGetIndex(e))
};
// we expect to have a non-bare repository, safe to unwrap
let git_workdir = repo_handle.workdir().unwrap();
if !git_workdir.join(path).exists() {
return Err(VerifyRepositoryPathErr::NotCheckedOut);
}
Ok(())
}
#[derive(Debug)]
pub enum LsFilesErr {
CannotGetIndex(Git2Error)
}
fn ls_files_repository(repo_handle: &Repository) -> Result<Vec<GitFile>, LsFilesErr> {
// check if there is a config.toml
let index = unwrap_or_return!(repo_handle.index(), |e| LsFilesErr::CannotGetIndex(e));
// dbg!(index);
if index.is_empty() {
return Ok(vec![]);
}
let mut files: Vec<GitFile> = vec![];
index.iter().for_each(|file| {
let path = bytes2path(&file.path);
files.push(GitFile { path: path.to_path_buf() });
});
Ok(files)
}
fn scan_section_dir(content_dir_path: &Path, section_conf: &SiteContentBindingConfig) -> Result<SiteSection, InitSiteErr> {
let section_dir = content_dir_path.join(&section_conf.slug);
debug!("Scanning section dir {:?}...", &section_dir);
// check that directory exists
if !section_dir.exists() {
// note: converting PathBuf into String can fail if there is a non-unicode char
error!(
"Invalid binding: cannot find section directory {}",
os_str_to_str!(section_dir, InitSiteErr::NonUnicodePath)
);
return Err(InitSiteErr::InvalidContentBinding);
}
// scan the section dir for articles
// each directory name inside the section dir is considered an article slug
// then look inside each dir and check for index.md file
// then parse the yaml frontmatter header
let mut postings: Vec<Posting> = vec![];
let entries = unwrap_or_return!(fs::read_dir(section_dir), |e| InitSiteErr::IoError(e));
for entry_res in entries {
let entry: fs::DirEntry = unwrap_or_return!(entry_res, |e| InitSiteErr::IoError(e));
let entry_type: fs::FileType = unwrap_or_return!(entry.file_type(), |e| InitSiteErr::IoError(e));
if !entry_type.is_dir() {
continue;
}
let slug = entry.path();
postings.push(Posting {
kind: section_conf.posting_kind.clone(),
slug: os_str_to_str!(
slug.file_name().unwrap(),
InitSiteErr::NonUnicodePath
),
title: "title".to_string(),
created_at: Utc::now()
})
}
return Ok(SiteSection {
slug: section_conf.slug.clone(),
postings
})
}
#[derive(Debug)] #[derive(Debug)]
pub enum InitSiteErr { pub enum InitSiteErr {
RepositoryCloneErr(Git2Error), RepositoryCloneErr(Git2Error),
ExistingRepositoryInvalid(Git2Error), ExistingRepositoryInvalid(Git2Error),
ExistingRepositoryCannotGetRemotes(DetailledRemotesErr), CannotIndexRepository(LsFilesErr),
ExistingRepositoryInvalidRemoteLayout, CannotGetRemotes(DetailledRemotesErr),
ExistingRepositoryInvalidMainRemoteUrl InvalidRemoteLayout,
InvalidMainRemoteUrl,
InvalidExistingRepository(Git2Error),
HugoConfigFileNotFound,
EmptyRepository,
RepositoryNotClean,
InvalidContentBinding,
IoError(io::Error),
NonUnicodePath
} }
pub fn init_site(site_conf: SiteConfig) -> Result<Site, InitSiteErr> { pub fn init_site(site_conf: &SiteConfig) -> Result<Site, InitSiteErr> {
let remote_url = &site_conf.git_remote_url; let remote_url = &site_conf.git_remote_url;
// check if the path exists // check if the path exists
// check if the path contain an actual repository refering to the same remote url // check if the path contain an actual repository refering to the same remote url
// clone the repo // clone the repo
// check if the cloned repo contain an actual hugo website (config.toml must be presents) // check if the cloned repo contain an actual hugo website (config.toml must be presents)
// try to find the binding in the `content` directory, check that the stru // try to find the binding in the `content` directory provided by site config
// scan the existing content for markdown files and call the scan_markdown function
// construct the struct that represent the current state of the site
let clone_destination: String = format!("{REPOSITORIES_CONTAINER_PATH}/{0}", site_conf.slug); let clone_destination: String = format!("{REPOSITORIES_CONTAINER_PATH}/{0}", site_conf.slug);
let repo = if !Path::new(&clone_destination).exists() { let repo_path = Path::new(&clone_destination);
match Repository::clone(&remote_url, clone_destination) { let repo = if !repo_path.exists() {
// do a narrow clone
match Repository::clone(&remote_url, &clone_destination) {
Ok(repo) => repo, Ok(repo) => repo,
Err(e) => { Err(e) => {
return Err(InitSiteErr::RepositoryCloneErr(e)) return Err(InitSiteErr::RepositoryCloneErr(e))
} }
} }
} else { } else {
match Repository::open(clone_destination) { match Repository::open(&clone_destination) {
Ok(repo) => { Ok(repo) => {
let remotes_details = let remotes_details =
unwrap_or_return!(get_detailled_remotes(&repo), |e| InitSiteErr::ExistingRepositoryCannotGetRemotes(e)); unwrap_or_return!(get_repository_detailled_remotes(&repo), |e| InitSiteErr::CannotGetRemotes(e));
// make sure that the origin remote is present and match the configured remote url // make sure that the origin remote is present and match the configured remote url
match remotes_details.iter().find(|r| r.slug == "origin") { match remotes_details.iter().find(|r| r.slug == "origin") {
None => { None => {
return Err(InitSiteErr::ExistingRepositoryInvalidRemoteLayout) return Err(InitSiteErr::InvalidRemoteLayout)
}, },
Some(DetailledRemote { url, .. }) if url != remote_url => { Some(DetailledRemote { url, .. }) if url != remote_url => {
dbg!(url); return Err(InitSiteErr::InvalidMainRemoteUrl);
dbg!(remote_url);
return Err(InitSiteErr::ExistingRepositoryInvalidMainRemoteUrl);
} }
_ => () _ => ()
} }
@ -98,13 +233,52 @@ pub fn init_site(site_conf: SiteConfig) -> Result<Site, InitSiteErr> {
repo repo
}, },
Err(e) => { Err(e) => {
return Err(InitSiteErr::ExistingRepositoryInvalid(e)) return Err(InitSiteErr::InvalidExistingRepository(e))
} }
} }
}; };
dbg!(&repo.state()); if repo.state() != RepositoryState::Clean {
return Err(InitSiteErr::RepositoryNotClean)
}
// check if there is any files
let files_list = unwrap_or_return!(
ls_files_repository(&repo),
|e| InitSiteErr::CannotIndexRepository(e)
);
if files_list.is_empty() {
return Err(InitSiteErr::EmptyRepository);
}
dbg!(&files_list);
// if !files_list.iter().any(|f| f.path.ends_with("config.toml")) {
// return Err(InitSiteErr::HugoConfigFileNotFound);
// }
// check hugo config file
match verify_repository_path_and_checked_out(&repo, Path::new("config.toml")) {
Ok(()) => (),
Err(VerifyRepositoryPathErr::NotFound | VerifyRepositoryPathErr::NotCheckedOut) =>
return Err(InitSiteErr::HugoConfigFileNotFound),
Err(VerifyRepositoryPathErr::CannotGetIndex(e)) => return Err(InitSiteErr::InvalidExistingRepository(e))
};
let mut sections: Vec<SiteSection> = vec![];
let content_dir = repo_path.join("content");
for content_binding in &site_conf.content_bindings {
sections.push(scan_section_dir(&content_dir, content_binding)?);
// match scan_section_dir(&content_dir, content_binding) {
// Ok(section) => {
// sections.push(section);
// },
// Err(e) => return Err(e)
// }
}
Ok(Site { Ok(Site {
config: site_conf sections
}) })
} }

View file

@ -1,20 +1,16 @@
#[macro_use]
extern crate log;
mod models;
mod init_site; mod init_site;
mod post;
mod utils; mod utils;
use actix_web::{get, post, web, App, HttpResponse, HttpServer, Responder}; use actix_web::{get, post, web, App, HttpResponse, HttpServer, Responder};
use chrono::prelude::{DateTime, Utc};
use serde::Deserialize;
use toml; use toml;
use std::fs; use std::fs;
use std::process::ExitCode;
#[derive(Debug)] use models::{Config};
struct Post {
id: u64,
title: String,
description: String,
content: String,
created_at: DateTime<Utc>
}
#[get("/")] #[get("/")]
@ -36,67 +32,62 @@ async fn echo(req_body: String) -> impl Responder {
struct AppState { struct AppState {
} }
#[derive(Default, Clone, Deserialize, Debug)]
struct ServerConfig {
host: Option<String>,
port: Option<u16>
}
fn main() -> ExitCode {
env_logger::init();
info!("Starting a hugotator instance...");
#[derive(Clone, Deserialize, Debug)] let config_str = match fs::read_to_string("./tmp/config.example.toml") {
enum PostingKind { Ok(res) => res,
/// a micro-bloging kind of post (less than 80 words) Err(e) => {
Micro, error!("Cannot read the server config file.");
return ExitCode::FAILURE;
/// a full article (more than 80 words) }
Article, };
} let config: Config = match toml::from_str(&config_str) {
Ok(res) => res,
/// A hugo directory under `content` Err(e) => {
#[derive(Clone, Deserialize, Debug)] error!("Invalid TOML server config file.");
struct SiteContentBindingConfig { error!("{:?}", e);
slug: String, return ExitCode::FAILURE;
posting_kind: PostingKind }
} };
#[derive(Clone, Deserialize, Debug)]
pub struct SiteConfig {
slug: String, // for example "werobot_blog"
git_remote_url: String,
content_path: String,
content_bindings: Vec<SiteContentBindingConfig>
}
#[derive(Debug)]
pub struct Site {
config: SiteConfig
}
#[derive(Clone, Deserialize, Debug)]
struct Config {
server: Option<ServerConfig>,
sites: Vec<SiteConfig>,
}
#[actix_web::main]
async fn main() -> std::io::Result<()> {
let config: Config = toml::from_str(
&fs::read_to_string("./tmp/config.example.toml")?
)?;
dbg!(&config); dbg!(&config);
// initialize all the sites // initialize all the sites
for site_conf in config.sites { for site_conf in config.sites {
println!("Initializing site {}...", site_conf.slug); info!("Initializing site {:?}..", &site_conf.slug);
let site_res = init_site::init_site(site_conf); let site_initialized = match init_site::init_site(&site_conf) {
dbg!(site_res); Ok(res) => res,
Err(e) => {
error!("Cannot initialize site");
debug!("{:?}", e);
return ExitCode::FAILURE
}
};
info!("Site {:?} initialized.", &site_conf.slug);
debug!("{:#?}", site_initialized);
} }
let bind_config = ( let bind_config = (
config.server.as_ref().and_then(|sc| sc.host.clone()).unwrap_or_else(|| "127.0.0.1".to_string()), config.server.as_ref().and_then(|sc| sc.host.clone()).unwrap_or_else(|| "127.0.0.1".to_string()),
config.server.as_ref().and_then(|sc| sc.port).unwrap_or(6968), config.server.as_ref().and_then(|sc| sc.port).unwrap_or(6968),
); );
match actix_web_main(bind_config) {
Ok(_) => (),
Err(e) => {
error!("Failed to start actix web main: {:?}", e);
return ExitCode::FAILURE;
}
};
return ExitCode::SUCCESS;
}
#[actix_web::main]
async fn actix_web_main(bind_config: (String, u16)) -> std::io::Result<()> {
HttpServer::new(|| { HttpServer::new(|| {
App::new() App::new()
.service(hello) .service(hello)

85
src/models.rs Normal file
View file

@ -0,0 +1,85 @@
use serde::Deserialize;
use chrono::prelude::{DateTime, Utc};
use crate::utils::pub_fields;
pub_fields! {
#[derive(Debug)]
struct Post {
id: u64,
title: String,
description: String,
content: String,
created_at: DateTime<Utc>
}
}
pub_fields! {
#[derive(Default, Clone, Deserialize, Debug)]
struct ServerConfig {
host: Option<String>,
port: Option<u16>
}
}
#[derive(Clone, Deserialize, Debug)]
pub enum PostingKind {
/// a micro-bloging kind of post (less than 80 words)
Micro,
/// a full article (more than 80 words)
Article,
}
pub_fields! {
/// A hugo directory under `content`
#[derive(Clone, Deserialize, Debug)]
struct SiteContentBindingConfig {
slug: String,
posting_kind: PostingKind
}
}
pub_fields! {
#[derive(Clone, Deserialize, Debug)]
struct SiteConfig {
slug: String, // for example "werobot_blog"
git_remote_url: String,
content_path: String,
content_bindings: Vec<SiteContentBindingConfig>
}
}
pub_fields! {
#[derive(Debug)]
struct Posting {
kind: PostingKind,
slug: String,
title: String,
created_at: DateTime<Utc>
}
}
pub_fields! {
#[derive(Debug)]
struct SiteSection {
slug: String,
postings: Vec<Posting>
}
}
pub_fields! {
#[derive(Debug)]
struct Site {
sections: Vec<SiteSection>
}
}
pub_fields! {
#[derive(Clone, Deserialize, Debug)]
struct Config {
server: Option<ServerConfig>,
sites: Vec<SiteConfig>,
}
}

0
src/post.rs Normal file
View file

View file

@ -1,3 +1,22 @@
use std::path::{Component, Path, PathBuf};
use std::ffi::{CString, OsStr};
macro_rules! pub_fields {
{
$(#[doc = $doc:expr])?
$(#[derive($($macros:tt)*)])*
struct $name:ident {
$($field:ident: $t:ty$(,)?)*
}
} => {
$(#[derive($($macros)*)])*
pub struct $name {
$(pub $field: $t),*
}
}
}
pub(crate) use pub_fields;
macro_rules! unwrap_or_return { macro_rules! unwrap_or_return {
( $to_match: expr, $err_func: expr ) => { ( $to_match: expr, $err_func: expr ) => {
match $to_match { match $to_match {
@ -22,3 +41,41 @@ macro_rules! unwrap_opt_or_return {
} }
} }
pub(crate) use unwrap_opt_or_return; pub(crate) use unwrap_opt_or_return;
// macro_rules! os_str_to_str {
// ( $e: expr, $err: expr ) => {
// match $e {
// OsStr => $e.to_str().unwrap().to_string(),
// PathBuf => match $e.into_os_string().into_string() {
// Ok(res) => res.to_string(),
// Err(_) => {
// return Err($err)
// }
// }
// }
// };
// }
macro_rules! os_str_to_str {
( $e: expr, $err: expr ) => {
match $e.to_str() {
Some(r) => r.to_string(),
None => {
return Err($err)
}
}
}
}
pub(crate) use os_str_to_str;
#[cfg(unix)]
pub fn bytes2path(b: &[u8]) -> &Path {
use std::os::unix::prelude::*;
Path::new(OsStr::from_bytes(b))
}
#[cfg(windows)]
pub fn bytes2path(b: &[u8]) -> &Path {
use std::str;
Path::new(str::from_utf8(b).unwrap())
}