WIP: feat(indexer): basic interface

This commit is contained in:
Matthieu Bessat 2023-08-01 23:03:20 +02:00
parent 0c2835f12b
commit 9a288ef2f3
7 changed files with 210 additions and 69 deletions

View file

@ -13,6 +13,8 @@ serde = "1.0"
serde_json = "1.0"
serde_yaml = "0.9"
wikidata = { path = "../rust_wikidata/" }
bincode = { version = "1.3" }
chrono = { version = "0.4.26", features = ["serde"] }
[lib]
name = "popequer"

View file

@ -1,47 +0,0 @@
#![allow(dead_code)]
#![allow(unused_import_braces)]
// #![allow(unused)]
use std::env;
use std::path::Path;
use std::process;
use popequer::indexer;
fn usage() {
println!("Usage: ./indexer PATH_TO_INDEX");
process::exit(64);
}
fn main() {
let args: Vec<String> = env::args().collect();
let user_path = match &args[..] {
[_bin, path] => path,
_ => {
usage();
return;
}
};
let path_to_index = Path::new(user_path);
if !path_to_index.exists() {
eprintln!("Err: The path {:?} does not exists", path_to_index);
process::exit(2);
}
if !path_to_index.is_dir() {
eprintln!("Err: The path must be a dir");
process::exit(2);
}
let index_res = indexer::index_dir(path_to_index);
match index_res {
Ok(entries) => {
dbg!(entries);
}
Err(err) => {
eprintln!("Failed indexing directory");
eprintln!("{:?}", err);
process::exit(2);
}
}
}

80
src/bin/popequer.rs Normal file
View file

@ -0,0 +1,80 @@
#![allow(dead_code)]
#![allow(unused_import_braces)]
// #![allow(unused)]
use std::path::Path;
use std::process;
use clap::Parser;
use popequer::indexer;
use clap::{arg, Command};
fn cli() -> Command {
Command::new("popequer")
.about("The popequer notebook manager CLI")
.subcommand_required(true)
.arg_required_else_help(true)
.allow_external_subcommands(true)
.subcommand(
Command::new("index")
.about("Index a directory and save the result to a binary database file")
.arg(
arg!(--source <SOURCE_DIRECTORY>)
),
)
.subcommand(
Command::new("status")
.about("Check the status of the database")
)
.subcommand(
Command::new("get")
.about("Get details of a particular item")
)
}
fn main() {
let matches = cli().get_matches();
match matches.subcommand() {
Some(("index", sub_matches)) => {
println!(
"Indexing..."
);
sub_matches.get_one::<String>("source");
},
_ => todo!()
}
// let source_path = Path::new(&args.source_path);
// if !source_path.exists() {
// eprintln!("Err: The source path {:?} does not exists", source_path);
// process::exit(2);
// }
// if !source_path.is_dir() {
// eprintln!("Err: The source path must be a dir");
// process::exit(2);
// }
// let internal_path = Path::new(&args.internal_path);
// if !internal_path.exists() {
// eprintln!("Err: The internal path {:?} does not exists", internal_path);
// process::exit(2);
// }
// if !internal_path.is_dir() {
// eprintln!("Err: The internal path must be a dir");
// process::exit(2);
// }
// let index_res = indexer::index_and_save(source_path, internal_path);
// match index_res {
// Ok(entries) => {
// dbg!(entries);
// }
// Err(err) => {
// eprintln!("Failed indexing directory");
// eprintln!("{:?}", err);
// process::exit(2);
// }
// }
}

View file

@ -1,7 +1,15 @@
use serde::{Serialize, Deserialize};
use std::path::PathBuf;
use chrono::{DateTime, Utc};
use fully_pub::fully_pub;
use std::path::Path;
use std::fs;
use crate::pdel_parser::{Entry, parse_wrapper};
use crate::pdel_parser::markdown::parse_markdown;
use bincode::{serialize, deserialize};
use std::fs::File;
// import the write trait (not directly used)
use std::io::Write;
mod reference_resolver;
@ -12,10 +20,43 @@ mod test_reference_resolver;
#[derive(Debug)]
pub enum IndexingErr {
CannotOpen,
ParseError
IoErr,
ParseErr
}
pub fn index_dir(dir_path: &Path) -> Result<Vec<Entry>, IndexingErr> {
#[fully_pub]
#[derive(Debug, Serialize, Deserialize)]
struct SourceFile {
path: PathBuf
}
#[fully_pub]
#[derive(Debug, Serialize, Deserialize)]
struct EntryContainer {
entry: Entry,
source_file_index: usize
}
#[fully_pub]
#[derive(Debug, Serialize, Deserialize)]
struct Notebook {
name: Option<String>,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
files: Vec<SourceFile>,
entries: Vec<EntryContainer> // TODO: transform to a hashmap
}
#[derive(Debug)]
struct IndexingResult {
files: Vec<SourceFile>,
entries: Vec<EntryContainer>
}
/// depth-first search recursive indexing of a directory
fn index_dir(dir_path: &Path) -> Result<IndexingResult, IndexingErr> {
let files_to_index = match fs::read_dir(dir_path) {
Err(_err) => {
return Err(IndexingErr::CannotOpen);
@ -23,16 +64,23 @@ pub fn index_dir(dir_path: &Path) -> Result<Vec<Entry>, IndexingErr> {
Ok(files) => files
};
let mut parsed_entries: Vec<Entry> = vec![];
let mut entries_containers: Vec<EntryContainer> = vec![];
let mut indexed_files: Vec<SourceFile> = vec![];
// TODO: have a database of checksum to only reparse if the file has changed? benefices pas
// certains
for file in files_to_index {
for (file_index, file) in files_to_index.enumerate() {
let path = file.unwrap().path();
indexed_files.push(SourceFile {
path: path.clone(),
});
if path.is_dir() {
match index_dir(&path) {
Ok(entries) => {
parsed_entries.extend(entries);
Ok(index_res) => {
indexed_files.extend(index_res.files);
entries_containers.extend(index_res.entries);
},
Err(err) => {
return Err(err);
@ -46,11 +94,16 @@ pub fn index_dir(dir_path: &Path) -> Result<Vec<Entry>, IndexingErr> {
let res = parse_wrapper(parse_markdown, &contents);
match res {
Ok(entry) => {
parsed_entries.extend(entry.p);
Ok(pout) => {
for entry in pout.p {
entries_containers.push(EntryContainer {
source_file_index: file_index.clone(),
entry
})
}
},
Err(_err) => {
return Err(IndexingErr::ParseError)
return Err(IndexingErr::ParseErr)
}
}
}
@ -64,8 +117,62 @@ pub fn index_dir(dir_path: &Path) -> Result<Vec<Entry>, IndexingErr> {
// if valid, replace them with actual id (or pointer) of the referenced entry
// look for an existing id,
// if not generate an ID, and prepend it to the claim
for entry in &parsed_entries {
}
Ok(parsed_entries)
Ok(IndexingResult {
files: indexed_files,
entries: entries_containers
})
}
fn index_notebook(path: &Path) -> Result<Notebook, IndexingErr>
{
let index_res = match index_dir(path) {
Ok(res) => res,
Err(err) => {
return Err(err)
}
};
Ok(Notebook {
name: Some("Example notebook".to_string()),
created_at: chrono::offset::Utc::now(),
updated_at: chrono::offset::Utc::now(),
entries: index_res.entries,
files: index_res.files
})
}
fn save_to_file(notebook: Notebook, database_path: &Path) -> Result<(), IndexingErr> {
let mut file = match File::create(database_path) {
Ok(res) => res,
Err(e) => {
dbg!(e);
return Err(IndexingErr::IoErr);
}
};
let bytes = match serialize(&notebook) {
Ok(res) => res,
Err(e) => {
dbg!(e);
return Err(IndexingErr::IoErr);
}
};
match file.write_all(&bytes) {
Ok(_) => {},
Err(e) => {
dbg!(e);
return Err(IndexingErr::IoErr);
}
}
Ok(())
}
pub fn index_and_save(source_path: &Path, internal_path: &Path) -> Result<(), IndexingErr> {
// we use a file inside the internals directory which will store multiple files in the future.
let database_path = internal_path.join(Path::new("./db.bin"));
let notebook = index_notebook(source_path)?;
save_to_file(notebook, &database_path)?;
Ok(())
}

View file

@ -1,4 +1,5 @@
use fully_pub::fully_pub;
use serde::{Serialize, Deserialize};
mod values;
mod claim;
@ -25,21 +26,21 @@ enum ParserState {
}
#[fully_pub]
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct Entry {
labels: Option<ParseOutput<Vec<ParseOutput<EntryClaim>>>>,
claims: ParseOutput<Vec<ParseOutput<EntryClaim>>>
}
#[fully_pub]
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
enum FunctionArgument {
Named { name: String, value: EntryValue },
Positional(EntryValue)
}
#[fully_pub]
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct Function {
name: String,
arguments: Vec<ParseOutput<FunctionArgument>>
@ -49,7 +50,7 @@ struct Function {
/// Still need checked and resolve the soft location
// TODO: parse and verify the syntax of the hard location
#[fully_pub]
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
enum UnresolvedReference {
SoftLocation(String), // inner: a query string
HardLocation(String) // inner: a URI-like object or a wikidata alias
@ -57,7 +58,7 @@ enum UnresolvedReference {
#[fully_pub]
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
enum EntryValue {
Integer(i64),
Float(f64),
@ -70,14 +71,14 @@ enum EntryValue {
/// this is a temp struct to contains the value and the qualifiers
/// this type is not meant to be in the final parsed value
#[fully_pub]
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct EntryValueContainer {
value: ParseOutput<EntryValue>,
qualifiers: Vec<ParseOutput<EntryClaim>>
}
#[fully_pub]
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct EntryClaim {
property: ParseOutput<String>,
value_container: ParseOutput<EntryValueContainer>,
@ -120,7 +121,7 @@ impl Default for ParseError {
}
#[fully_pub]
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct ParseOutput<T> {
p: T,
// the cursor position when the source object start

View file

@ -2,7 +2,6 @@ use crate::pdel_parser::{ParseError, ParseOutput, WHITESPACES};
pub fn parse_integer(subject: &str, initial_cursor: usize) -> Result<ParseOutput<i64>, ParseError>
{
dbg!("parse int");
#[derive(PartialEq)]
#[derive(Debug)]
#[derive(Clone)]

View file

@ -34,7 +34,6 @@ pub fn parse_reference(subject: &str, initial_cursor: usize) -> Result<ParseOutp
})
},
Err(hard_ref_err) if hard_ref_err.location_kind == ParseLocationKind::Before => {
dbg!(hard_ref_err);
return Ok(ParseOutput {
p: UnresolvedReference::SoftLocation(soft_ref_po.p),
start_loc: initial_cursor,