WIP: feat(indexer): basic interface
This commit is contained in:
parent
0c2835f12b
commit
9a288ef2f3
7 changed files with 210 additions and 69 deletions
|
@ -13,6 +13,8 @@ serde = "1.0"
|
|||
serde_json = "1.0"
|
||||
serde_yaml = "0.9"
|
||||
wikidata = { path = "../rust_wikidata/" }
|
||||
bincode = { version = "1.3" }
|
||||
chrono = { version = "0.4.26", features = ["serde"] }
|
||||
|
||||
[lib]
|
||||
name = "popequer"
|
||||
|
|
|
@ -1,47 +0,0 @@
|
|||
#![allow(dead_code)]
|
||||
#![allow(unused_import_braces)]
|
||||
// #![allow(unused)]
|
||||
|
||||
use std::env;
|
||||
use std::path::Path;
|
||||
use std::process;
|
||||
use popequer::indexer;
|
||||
|
||||
fn usage() {
|
||||
println!("Usage: ./indexer PATH_TO_INDEX");
|
||||
process::exit(64);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let args: Vec<String> = env::args().collect();
|
||||
|
||||
let user_path = match &args[..] {
|
||||
[_bin, path] => path,
|
||||
_ => {
|
||||
usage();
|
||||
return;
|
||||
}
|
||||
};
|
||||
let path_to_index = Path::new(user_path);
|
||||
if !path_to_index.exists() {
|
||||
eprintln!("Err: The path {:?} does not exists", path_to_index);
|
||||
process::exit(2);
|
||||
}
|
||||
if !path_to_index.is_dir() {
|
||||
eprintln!("Err: The path must be a dir");
|
||||
process::exit(2);
|
||||
}
|
||||
|
||||
let index_res = indexer::index_dir(path_to_index);
|
||||
match index_res {
|
||||
Ok(entries) => {
|
||||
dbg!(entries);
|
||||
}
|
||||
Err(err) => {
|
||||
eprintln!("Failed indexing directory");
|
||||
eprintln!("{:?}", err);
|
||||
process::exit(2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
80
src/bin/popequer.rs
Normal file
80
src/bin/popequer.rs
Normal file
|
@ -0,0 +1,80 @@
|
|||
#![allow(dead_code)]
|
||||
#![allow(unused_import_braces)]
|
||||
// #![allow(unused)]
|
||||
|
||||
use std::path::Path;
|
||||
use std::process;
|
||||
use clap::Parser;
|
||||
use popequer::indexer;
|
||||
|
||||
use clap::{arg, Command};
|
||||
|
||||
fn cli() -> Command {
|
||||
Command::new("popequer")
|
||||
.about("The popequer notebook manager CLI")
|
||||
.subcommand_required(true)
|
||||
.arg_required_else_help(true)
|
||||
.allow_external_subcommands(true)
|
||||
.subcommand(
|
||||
Command::new("index")
|
||||
.about("Index a directory and save the result to a binary database file")
|
||||
.arg(
|
||||
arg!(--source <SOURCE_DIRECTORY>)
|
||||
),
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("status")
|
||||
.about("Check the status of the database")
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("get")
|
||||
.about("Get details of a particular item")
|
||||
)
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let matches = cli().get_matches();
|
||||
|
||||
match matches.subcommand() {
|
||||
Some(("index", sub_matches)) => {
|
||||
println!(
|
||||
"Indexing..."
|
||||
);
|
||||
sub_matches.get_one::<String>("source");
|
||||
},
|
||||
_ => todo!()
|
||||
}
|
||||
|
||||
// let source_path = Path::new(&args.source_path);
|
||||
// if !source_path.exists() {
|
||||
// eprintln!("Err: The source path {:?} does not exists", source_path);
|
||||
// process::exit(2);
|
||||
// }
|
||||
// if !source_path.is_dir() {
|
||||
// eprintln!("Err: The source path must be a dir");
|
||||
// process::exit(2);
|
||||
// }
|
||||
|
||||
// let internal_path = Path::new(&args.internal_path);
|
||||
// if !internal_path.exists() {
|
||||
// eprintln!("Err: The internal path {:?} does not exists", internal_path);
|
||||
// process::exit(2);
|
||||
// }
|
||||
// if !internal_path.is_dir() {
|
||||
// eprintln!("Err: The internal path must be a dir");
|
||||
// process::exit(2);
|
||||
// }
|
||||
|
||||
// let index_res = indexer::index_and_save(source_path, internal_path);
|
||||
// match index_res {
|
||||
// Ok(entries) => {
|
||||
// dbg!(entries);
|
||||
// }
|
||||
// Err(err) => {
|
||||
// eprintln!("Failed indexing directory");
|
||||
// eprintln!("{:?}", err);
|
||||
// process::exit(2);
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
|
@ -1,7 +1,15 @@
|
|||
use serde::{Serialize, Deserialize};
|
||||
use std::path::PathBuf;
|
||||
use chrono::{DateTime, Utc};
|
||||
use fully_pub::fully_pub;
|
||||
use std::path::Path;
|
||||
use std::fs;
|
||||
use crate::pdel_parser::{Entry, parse_wrapper};
|
||||
use crate::pdel_parser::markdown::parse_markdown;
|
||||
use bincode::{serialize, deserialize};
|
||||
use std::fs::File;
|
||||
// import the write trait (not directly used)
|
||||
use std::io::Write;
|
||||
|
||||
mod reference_resolver;
|
||||
|
||||
|
@ -12,10 +20,43 @@ mod test_reference_resolver;
|
|||
#[derive(Debug)]
|
||||
pub enum IndexingErr {
|
||||
CannotOpen,
|
||||
ParseError
|
||||
IoErr,
|
||||
ParseErr
|
||||
}
|
||||
|
||||
pub fn index_dir(dir_path: &Path) -> Result<Vec<Entry>, IndexingErr> {
|
||||
#[fully_pub]
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct SourceFile {
|
||||
path: PathBuf
|
||||
}
|
||||
|
||||
|
||||
#[fully_pub]
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct EntryContainer {
|
||||
entry: Entry,
|
||||
source_file_index: usize
|
||||
}
|
||||
|
||||
|
||||
#[fully_pub]
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct Notebook {
|
||||
name: Option<String>,
|
||||
created_at: DateTime<Utc>,
|
||||
updated_at: DateTime<Utc>,
|
||||
files: Vec<SourceFile>,
|
||||
entries: Vec<EntryContainer> // TODO: transform to a hashmap
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct IndexingResult {
|
||||
files: Vec<SourceFile>,
|
||||
entries: Vec<EntryContainer>
|
||||
}
|
||||
|
||||
/// depth-first search recursive indexing of a directory
|
||||
fn index_dir(dir_path: &Path) -> Result<IndexingResult, IndexingErr> {
|
||||
let files_to_index = match fs::read_dir(dir_path) {
|
||||
Err(_err) => {
|
||||
return Err(IndexingErr::CannotOpen);
|
||||
|
@ -23,16 +64,23 @@ pub fn index_dir(dir_path: &Path) -> Result<Vec<Entry>, IndexingErr> {
|
|||
Ok(files) => files
|
||||
};
|
||||
|
||||
let mut parsed_entries: Vec<Entry> = vec![];
|
||||
let mut entries_containers: Vec<EntryContainer> = vec![];
|
||||
let mut indexed_files: Vec<SourceFile> = vec![];
|
||||
|
||||
// TODO: have a database of checksum to only reparse if the file has changed? benefices pas
|
||||
// certains
|
||||
for file in files_to_index {
|
||||
for (file_index, file) in files_to_index.enumerate() {
|
||||
let path = file.unwrap().path();
|
||||
|
||||
indexed_files.push(SourceFile {
|
||||
path: path.clone(),
|
||||
});
|
||||
|
||||
if path.is_dir() {
|
||||
match index_dir(&path) {
|
||||
Ok(entries) => {
|
||||
parsed_entries.extend(entries);
|
||||
Ok(index_res) => {
|
||||
indexed_files.extend(index_res.files);
|
||||
entries_containers.extend(index_res.entries);
|
||||
},
|
||||
Err(err) => {
|
||||
return Err(err);
|
||||
|
@ -46,11 +94,16 @@ pub fn index_dir(dir_path: &Path) -> Result<Vec<Entry>, IndexingErr> {
|
|||
|
||||
let res = parse_wrapper(parse_markdown, &contents);
|
||||
match res {
|
||||
Ok(entry) => {
|
||||
parsed_entries.extend(entry.p);
|
||||
Ok(pout) => {
|
||||
for entry in pout.p {
|
||||
entries_containers.push(EntryContainer {
|
||||
source_file_index: file_index.clone(),
|
||||
entry
|
||||
})
|
||||
}
|
||||
},
|
||||
Err(_err) => {
|
||||
return Err(IndexingErr::ParseError)
|
||||
return Err(IndexingErr::ParseErr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -64,8 +117,62 @@ pub fn index_dir(dir_path: &Path) -> Result<Vec<Entry>, IndexingErr> {
|
|||
// if valid, replace them with actual id (or pointer) of the referenced entry
|
||||
// look for an existing id,
|
||||
// if not generate an ID, and prepend it to the claim
|
||||
for entry in &parsed_entries {
|
||||
}
|
||||
|
||||
Ok(parsed_entries)
|
||||
Ok(IndexingResult {
|
||||
files: indexed_files,
|
||||
entries: entries_containers
|
||||
})
|
||||
}
|
||||
|
||||
fn index_notebook(path: &Path) -> Result<Notebook, IndexingErr>
|
||||
{
|
||||
let index_res = match index_dir(path) {
|
||||
Ok(res) => res,
|
||||
Err(err) => {
|
||||
return Err(err)
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Notebook {
|
||||
name: Some("Example notebook".to_string()),
|
||||
created_at: chrono::offset::Utc::now(),
|
||||
updated_at: chrono::offset::Utc::now(),
|
||||
entries: index_res.entries,
|
||||
files: index_res.files
|
||||
})
|
||||
}
|
||||
|
||||
fn save_to_file(notebook: Notebook, database_path: &Path) -> Result<(), IndexingErr> {
|
||||
let mut file = match File::create(database_path) {
|
||||
Ok(res) => res,
|
||||
Err(e) => {
|
||||
dbg!(e);
|
||||
return Err(IndexingErr::IoErr);
|
||||
}
|
||||
};
|
||||
let bytes = match serialize(¬ebook) {
|
||||
Ok(res) => res,
|
||||
Err(e) => {
|
||||
dbg!(e);
|
||||
return Err(IndexingErr::IoErr);
|
||||
}
|
||||
};
|
||||
match file.write_all(&bytes) {
|
||||
Ok(_) => {},
|
||||
Err(e) => {
|
||||
dbg!(e);
|
||||
return Err(IndexingErr::IoErr);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn index_and_save(source_path: &Path, internal_path: &Path) -> Result<(), IndexingErr> {
|
||||
// we use a file inside the internals directory which will store multiple files in the future.
|
||||
let database_path = internal_path.join(Path::new("./db.bin"));
|
||||
let notebook = index_notebook(source_path)?;
|
||||
save_to_file(notebook, &database_path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use fully_pub::fully_pub;
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
mod values;
|
||||
mod claim;
|
||||
|
@ -25,21 +26,21 @@ enum ParserState {
|
|||
}
|
||||
|
||||
#[fully_pub]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
struct Entry {
|
||||
labels: Option<ParseOutput<Vec<ParseOutput<EntryClaim>>>>,
|
||||
claims: ParseOutput<Vec<ParseOutput<EntryClaim>>>
|
||||
}
|
||||
|
||||
#[fully_pub]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
enum FunctionArgument {
|
||||
Named { name: String, value: EntryValue },
|
||||
Positional(EntryValue)
|
||||
}
|
||||
|
||||
#[fully_pub]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
struct Function {
|
||||
name: String,
|
||||
arguments: Vec<ParseOutput<FunctionArgument>>
|
||||
|
@ -49,7 +50,7 @@ struct Function {
|
|||
/// Still need checked and resolve the soft location
|
||||
// TODO: parse and verify the syntax of the hard location
|
||||
#[fully_pub]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
enum UnresolvedReference {
|
||||
SoftLocation(String), // inner: a query string
|
||||
HardLocation(String) // inner: a URI-like object or a wikidata alias
|
||||
|
@ -57,7 +58,7 @@ enum UnresolvedReference {
|
|||
|
||||
|
||||
#[fully_pub]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
enum EntryValue {
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
|
@ -70,14 +71,14 @@ enum EntryValue {
|
|||
/// this is a temp struct to contains the value and the qualifiers
|
||||
/// this type is not meant to be in the final parsed value
|
||||
#[fully_pub]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
struct EntryValueContainer {
|
||||
value: ParseOutput<EntryValue>,
|
||||
qualifiers: Vec<ParseOutput<EntryClaim>>
|
||||
}
|
||||
|
||||
#[fully_pub]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
struct EntryClaim {
|
||||
property: ParseOutput<String>,
|
||||
value_container: ParseOutput<EntryValueContainer>,
|
||||
|
@ -120,7 +121,7 @@ impl Default for ParseError {
|
|||
}
|
||||
|
||||
#[fully_pub]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
struct ParseOutput<T> {
|
||||
p: T,
|
||||
// the cursor position when the source object start
|
||||
|
|
|
@ -2,7 +2,6 @@ use crate::pdel_parser::{ParseError, ParseOutput, WHITESPACES};
|
|||
|
||||
pub fn parse_integer(subject: &str, initial_cursor: usize) -> Result<ParseOutput<i64>, ParseError>
|
||||
{
|
||||
dbg!("parse int");
|
||||
#[derive(PartialEq)]
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone)]
|
||||
|
|
|
@ -34,7 +34,6 @@ pub fn parse_reference(subject: &str, initial_cursor: usize) -> Result<ParseOutp
|
|||
})
|
||||
},
|
||||
Err(hard_ref_err) if hard_ref_err.location_kind == ParseLocationKind::Before => {
|
||||
dbg!(hard_ref_err);
|
||||
return Ok(ParseOutput {
|
||||
p: UnresolvedReference::SoftLocation(soft_ref_po.p),
|
||||
start_loc: initial_cursor,
|
||||
|
|
Loading…
Reference in a new issue