feat: custom entry intro

Support for custom entry intro after @ that will add an instance_of claim.
eg. @Human transform to { is: [Human] } or @Event transform to { is: [Event] }
The goal is to create a shortcut in the PDEL syntax
This commit is contained in:
Matthieu Bessat 2024-02-24 14:52:37 +01:00
parent 5040b404f5
commit 81ff6dcd46
9 changed files with 351 additions and 109 deletions

View file

@ -3,7 +3,7 @@ use chrono_tz::Europe::Paris;
use anyhow::{Result, anyhow};
use fully_pub::fully_pub;
use crate::pdel_parser::{PEntry, PEntryValue, ParseOutput, PEntryClaim, PFunction, PFunctionArgument};
use crate::pdel_parser::{PEntry, PEntryClaim, PEntryValue, PFunction, PFunctionArgument, ParseOutput, UnresolvedReference};
use crate::database::models::{EntryValue, EntryClaim, Property};
use super::ids::Id;
@ -162,6 +162,7 @@ pub fn get_pure_value(parsed_value: &PEntryValue) -> Result<EntryValue> {
impl Entry {
/// Special entry builder to use with caution
pub fn from_parsed_entry_without_id(parsed_entry: &PEntry) -> Result<Entry> {
Ok(Entry {
id: Id::generate(),
@ -169,10 +170,13 @@ impl Entry {
})
}
/// Build the pure entry from the parsed entry, apply claims transformation
pub fn from_parsed_entry_with_id(parsed_entry: &ParseOutput<PEntry>) -> Result<Entry, ExtractErr> {
let pure_claims = get_pure_claims(&parsed_entry.p.claims.p)
let original_pure_claims = get_pure_claims(&parsed_entry.p.claims.p)
.map_err(|_err| ExtractErr::CannotExtractClaims)?;
let id_claim = filter_claims_by_property(&pure_claims, "id")
// get the id from claims
let id_claim = filter_claims_by_property(&original_pure_claims, "id")
.get(0)
.map(|x| *x)
.ok_or(ExtractErr::ExpectedId)?;
@ -181,20 +185,50 @@ impl Entry {
} else {
return Err(ExtractErr::CannotParseId);
};
Ok(Entry {
id: Id::from_repr(&id_repr)
.map_err(|_err| ExtractErr::CannotParseId)?,
// remove the "id: " claim from the pure entry
claims: pure_claims
.iter()
let id = Id::from_repr(&id_repr)
.map_err(|_err| ExtractErr::CannotParseId)?;
let mut claims: Vec<EntryClaim> = original_pure_claims.clone();
// apply the parsed entry custom intro to the claims
let entry_intro = &parsed_entry.p.intro.p;
if entry_intro != "Entry" {
claims = claims.into_iter()
.filter(|x| x.property != Property::Custom("is".into()))
.collect();
if claims.len() != original_pure_claims.len() {
// this means that a claim with property 'is' was removed and will be overwritten
eprintln!("Warning: `is` property was overwritten by custom Entry intro");
}
// the entry intro string is the reference
let instance_claim = EntryClaim {
property: Property::Custom("is".into()),
value: EntryValue::Reference(
Reference::Unresolved(
UnresolvedReference::SoftLocation(
entry_intro.clone()
)
)
),
qualifiers: vec![]
};
claims.insert(0, instance_claim)
}
// remove the "id: " claim from the pure entry and clone claims
let final_claims = claims
.into_iter()
.filter_map(|x| {
if &x.property != &Property::Custom("id".to_string()) {
Some(x.clone())
if x.property != Property::Custom("id".to_string()) {
Some(x)
} else {
None
}
})
.collect()
.collect();
Ok(Entry {
id,
claims: final_claims
})
}

View file

@ -1,3 +1,5 @@
use std::assert_matches::assert_matches;
use crate::database::parse_extractor::get_pure_value;
use crate::database::models::{filter_claims_by_property, Entry, EntryValue, Property, Reference};
use crate::pdel_parser::{parse_wrapper, parse_entry, UnresolvedReference};
@ -156,3 +158,32 @@ fn test_transform_into_pure_entry_with_list_and_qualifiers() {
.is_some()
);
}
#[test]
fn test_transform_into_pure_entry_with_custom_entry_intro() {
let subj = r#"
@Human {
id: "AAAAAAAAAA",
first_name: "John",
last_name: "Doe"
}
"#.to_string();
let res = parse_wrapper(parse_entry, &subj);
assert_matches!(res, Ok(_));
let pout = res.unwrap();
let pure_entry_res = Entry::from_parsed_entry_with_id(&pout);
assert_matches!(pure_entry_res, Ok(_));
let pure_entry = pure_entry_res.unwrap();
assert_eq!(filter_claims_by_property(&pure_entry.claims, "is").len(), 1);
assert_eq!(filter_claims_by_property(&pure_entry.claims, "first_name").len(), 1);
assert_eq!(
filter_claims_by_property(&pure_entry.claims, "is").pop().unwrap().value,
EntryValue::Reference(
Reference::Unresolved(
UnresolvedReference::SoftLocation("Human".to_string())
)
)
);
}

View file

@ -44,11 +44,16 @@ pub fn link_claims(labels_index: &LabelEntryIndex, claims: &mut Vec<EntryClaim>)
// resolve
// FIXME: Error or warn if the resolve reference failed for a hard
// location
if let Ok(resolved) = resolve_reference(labels_index, unresolved_ref.clone()) {
match resolve_reference(labels_index, unresolved_ref.clone()) {
Ok(resolved) => {
linked_count += 1;
claim.value = EntryValue::Reference(
Reference::Resolved(resolved)
)
},
Err(e) => {
eprintln!("Warning: failed to link claim: {:?}", e)
}
}
},
_ => ()

View file

@ -102,27 +102,9 @@ fn index_file(base_dir: &Path, file_path: &Path, file_id: Uuid) -> Result<Indexi
.map_err(|e| IndexingErr::CannotWriteFileToAddId(e))?;
return index_file(base_dir, file_path, file_id);
},
Some(id_claim) => {
// extract id from id_claim
let id_repr = if let EntryValue::String(id_repr) = id_claim.value.clone() {
id_repr.to_string()
} else {
return Err(IndexingErr::ExtractErr(anyhow!("cannot parse id, id claim value is not a string")));
};
Entry {
id: Id::from_repr(&id_repr)
.context("Cannot parse id from existing repr")
.map_err(|e| IndexingErr::ExtractErr(e))?,
// remove the "id: " claim from the pure entry
claims: pure_claims
.iter()
.filter_map(|x|
(&x.property != &Property::Custom("id".to_string())).then(|| x.clone())
)
.collect()
}
Some(_id_claim) => {
Entry::from_parsed_entry_with_id(&pentry)
.map_err(|e| IndexingErr::RExtractErr(e))?
}
};

View file

@ -3,6 +3,8 @@ use std::path::PathBuf;
use std::assert_matches::assert_matches;
use crate::database::filter::filter_instance_of;
use crate::database::search::search_entry_by_label;
use crate::indexer;
use crate::pdel_parser::preprocess::preprocess_code;
use crate::{fs_notebook::NotebookContext, indexer::add_id_on_entry_code};
@ -80,13 +82,13 @@ fn test_add_id_on_entry_with_expansions() {
#[test]
fn test_basic_indexing_on_fs() {
let tmp_dir = std::path::Path::new("/tmp").join(format!("{}", uuid::Uuid::new_v4()));
assert_matches!(fs::create_dir(&tmp_dir), Ok(_));
// let tmp_dir = TempDir::new()
// .unwrap();
// let tmp_dir = std::path::Path::new("/tmp").join(format!("{}", uuid::Uuid::new_v4()));
// assert_matches!(fs::create_dir(&tmp_dir), Ok(_));
let tmp_dir = TempDir::new()
.unwrap();
let context = NotebookContext {
base_path:
tmp_dir.join("notebook")
tmp_dir.path().join("notebook")
};
// create a notebook in this dir
assert_matches!(fs::create_dir(&context.base_path), Ok(_));
@ -126,3 +128,39 @@ fn test_basic_indexing_on_fs() {
assert_eq!(a_entry.pure_entry.name(), "River");
assert_eq!(b_entry.pure_entry.name(), "Rivière Romaine");
}
#[test]
fn test_indexing_on_fs_contacts_notebook() {
let tmp_dir = TempDir::new()
.unwrap();
let context = NotebookContext {
base_path:
tmp_dir.path().join("notebook")
};
assert_matches!(fs::create_dir(&context.base_path), Ok(_));
assert!(fs::create_dir(&context.sources_path()).is_ok());
assert!(fs::create_dir(&context.internals_path()).is_ok());
assert!(fs::write(&context.manifest_path(), r#"{"name":"Test notebook", "description": "Incroyable"}"#).is_ok());
assert!(fs::write(&context.sources_path().join("note.md"), r#"
@Entry {
name: "Human"
}
@Human {
name: "Some woman"
}
@Human {
name: "Some man"
}
@Human {
name: "Some child"
}
"#).is_ok());
let notebook = indexer::index_and_save(&context).unwrap();
dbg!(&notebook);
let human_entry = search_entry_by_label(&notebook, "Human").get(0).unwrap().1;
let humans = filter_instance_of(&notebook, &human_entry.pure_entry.id);
assert_eq!(humans.len(), 3);
}

View file

@ -1,6 +1,6 @@
use fully_pub::fully_pub;
use crate::pdel_parser::{ParseError, ParseOutput, WHITESPACES, PEntry, parse_entry, ENTRY_INTRO};
use crate::pdel_parser::{parse_entry, PEntry, ParseError, ParseLocationKind, ParseOutput, ENTRY_INTRO, WHITESPACES};
use crate::{debug_state_machine, get_symbol_or_final};
use crate::utils::Substring;
@ -32,37 +32,20 @@ pub fn parse_markdown(initial_subject: &str, initial_cursor: usize) -> Result<Pa
#[derive(Debug, PartialEq)]
enum State {
Markdown,
InNewLine,
InEntry,
Final
}
let mut cursor = initial_cursor;
let mut state = State::InNewLine;
let mut state = State::InEntry;
loop {
// debug_state_machine!("parse_markdown", subject, state, cursor);
match state {
State::Markdown => {
let symbol = get_symbol_or_final!(subject, state, cursor);
if symbol == '\n' {
state = State::InNewLine;
}
},
State::InNewLine => {
let symbol = get_symbol_or_final!(subject, state, cursor);
if WHITESPACES.contains(&symbol) {
cursor += 1;
continue;
}
// Look ahead for an Entry
// potentially an expensive operation!
// I have to move from bytes slicing to chars slicing to handle non-ascii chars
let look_ahead: String = subject.chars().take(cursor + ENTRY_INTRO.len()).skip(cursor).collect();
// dbg!(symbol, cursor, subject.chars().nth(cursor).unwrap(), &look_ahead);
if look_ahead == ENTRY_INTRO {
// optimistic parsing
state = State::InEntry;
continue;
}
state = State::Markdown;
},
State::InEntry => {
match parse_entry(&subject, cursor) {
@ -73,6 +56,11 @@ pub fn parse_markdown(initial_subject: &str, initial_cursor: usize) -> Result<Pa
continue;
},
Err(err) => {
if err.location_kind == ParseLocationKind::Before || err.location_kind == ParseLocationKind::Header {
cursor = err.cursor;
state = State::Markdown;
continue;
}
// skip this snippet or abord the whole markdown file?
// for now I will abort the whole md file since I don't known how to detect
// the end of the entry

View file

@ -26,7 +26,7 @@ mod test_preprocess;
use fully_pub::fully_pub;
use serde::{Serialize, Deserialize};
use crate::{debug_state_machine, get_symbol_or_final};
use crate::get_symbol_or_final;
#[derive(Debug)]
enum ParserState {
@ -47,6 +47,9 @@ enum UnresolvedReference {
#[fully_pub]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct PEntry {
/// the name of the instance.
/// eg. Human, Presentation, Event or SportSession
intro: ParseOutput<String>,
/// the list of claims
claims: ParseOutput<Vec<ParseOutput<PEntryClaim>>>,
/// a free text description which can be also used to define sub-claims later
@ -284,12 +287,97 @@ pub fn parse_wrapper<T>(
process_after(&subject, parser(&subject, 0))
}
/// Count how many dict there are
/// until either the next @Entry or until the next blank line
fn count_ahead_entry_dict(initial_subject: &str, initial_cursor: usize) -> Result<u32, ParseError> {
// seek until the next {
return Ok(0);
/// we will look for Entry Intro which is one uppercase letter + CamelCase
fn parse_entry_intro(subject: &str, initial_cursor: usize) -> Result<ParseOutput<String>, ParseError> {
let mut cursor = initial_cursor;
#[derive(Debug, PartialEq)]
enum State {
Out,
FirstLetter,
InIntroName,
Final
}
let mut state: State = State::Out;
let mut intro_name = String::new();
let mut section_start: usize = 0;
loop {
// debug_state_machine!("parse_entry_intro", subject, state, cursor);
match state {
State::Out => {
let symbol = get_symbol_or_final!(subject, state, cursor);
if symbol == '@' {
section_start = cursor;
state = State::FirstLetter;
cursor += 1;
continue;
}
if !WHITESPACES.contains(&symbol) {
return Err(
ParseError {
msg: "Unexpected non-whitespace char before EntryIntro @".into(),
location_kind: ParseLocationKind::Before,
cursor,
..Default::default()
}
)
}
},
State::FirstLetter => {
let symbol = get_symbol_or_final!(subject, state, cursor);
intro_name.push(symbol);
if !symbol.is_uppercase() {
return Err(
ParseError {
msg: "Unexpected non-uppercase char in first char".into(),
location_kind: ParseLocationKind::Header,
cursor,
..Default::default()
}
);
}
state = State::InIntroName;
},
State::InIntroName => {
let symbol = get_symbol_or_final!(subject, state, cursor);
if WHITESPACES.contains(&symbol) {
state = State::Final;
cursor -= 1;
continue;
}
if !symbol.is_alphabetic() {
return Err(
ParseError {
msg: "Unexpected non-alphabetic char".into(),
location_kind: ParseLocationKind::Inside,
cursor,
..Default::default()
}
);
}
intro_name.push(symbol);
},
State::Final => {
if intro_name.len() == 0 {
return Err(ParseError {
msg: "No intro name was found".into(),
location_kind: ParseLocationKind::Header,
cursor,
..Default::default()
})
}
return Ok(ParseOutput {
start_loc: section_start,
end_loc: cursor,
p: intro_name
})
}
}
cursor += 1;
}
}
fn parse_free_description(subject: &str, initial_cursor: usize) -> Result<ParseOutput<String>, ParseError> {
@ -379,47 +467,21 @@ pub fn parse_entry(subject: &str, initial_cursor: usize) -> Result<ParseOutput<P
}
let mut state = State::Out;
let mut cursor = initial_cursor;
let mut cursor_after_whitespaces = initial_cursor;
let mut claims:
let mut intro_opt: Option<ParseOutput<String>> = None;
let mut claims_opt:
Option<ParseOutput<Vec<ParseOutput<PEntryClaim>>>> = None;
let mut description: Option<ParseOutput<String>> = None;
let mut description_opt: Option<ParseOutput<String>> = None;
loop {
// debug_state_machine!("parse_entry", subject, state, cursor);
match state {
State::Out => {
let look_ahead: String = subject.chars().take(cursor + ENTRY_INTRO.len()).skip(cursor).collect();
if look_ahead == ENTRY_INTRO {
cursor_after_whitespaces = cursor;
// before entering the labels, do a wild check by just checking how many curly
// braces there are and then we can go in labels if there is two pairs or
// directly into the claims mode if there is only one pair
cursor += ENTRY_INTRO.len();
// we will look for Entry Intro which is one uppercase letter + camelcase
let intro = parse_entry_intro(subject, cursor)?;
cursor = intro.end_loc;
state = State::InClaimsDict;
continue;
}
let symbol = match subject.chars().nth(cursor) {
None => {
return Err(ParseError {
msg: format!("Unexpected end of subject while parsing Entry {:?}", state),
cursor,
..Default::default()
});
},
Some(x) => x
};
if !WHITESPACES.contains(&symbol) {
dbg!(symbol);
return Err(ParseError {
msg: "Encountered unexpected char before Entry".into(),
cursor,
..Default::default()
})
}
intro_opt = Some(intro);
},
State::InClaimsDict => {
match collection::parse_dict("Dict", &subject, cursor) {
@ -435,7 +497,7 @@ pub fn parse_entry(subject: &str, initial_cursor: usize) -> Result<ParseOutput<P
},
Ok(pout) => {
cursor = pout.end_loc;
claims = Some(pout);
claims_opt = Some(pout);
state = State::InFreeDescription;
}
}
@ -453,16 +515,27 @@ pub fn parse_entry(subject: &str, initial_cursor: usize) -> Result<ParseOutput<P
},
Ok(d) => {
cursor = d.end_loc;
description = Some(d);
description_opt = Some(d);
}
};
state = State::Final;
continue;
},
State::Final => {
let intro = match intro_opt {
None => {
return Err(ParseError {
msg: "Entry intro was not found".into(),
cursor,
..Default::default()
})
},
Some(i) => i
};
return Ok(ParseOutput {
start_loc: intro.start_loc,
p: PEntry {
claims: match claims {
claims: match claims_opt {
Some(c) => c,
None => {
return Err(ParseError {
@ -472,9 +545,9 @@ pub fn parse_entry(subject: &str, initial_cursor: usize) -> Result<ParseOutput<P
})
}
},
description
intro,
description: description_opt
},
start_loc: cursor_after_whitespaces,
end_loc: cursor
})
}

View file

@ -108,3 +108,18 @@ fn test_parse_multiple_next_by_next() {
assert!(res.is_ok(), "{:?}", res);
assert_eq!(res.unwrap().tree.p.len(), 4);
}
#[test]
fn test_parse_multiple_with_custom_entry_intro() {
let subj = r#"
@Entry {
name: "Human"
}
@Human {}
@Human {}
@Human {}
"#;
let res = parse_markdown(subj, 0);
assert!(res.is_ok(), "{:?}", res);
assert_eq!(res.unwrap().tree.p.len(), 4);
}

View file

@ -9,7 +9,6 @@ use indoc::indoc;
#[test]
fn test_parse_simple_entry() {
let subj = " @Entry {}".to_string();
let res = parse_wrapper(parse_entry, &subj);
assert!(res.is_ok(), "{:?}", res);
@ -83,6 +82,19 @@ fn test_parse_simple_entry() {
assert_eq!(claims[2].p.value_container.p.value.p, PEntryValue::Integer(54));
}
#[test]
fn test_parse_with_custom_intro() {
let subj = r#"
@CustomIntro {
foo: "bar"
}
"#.to_string();
let res = parse_wrapper(parse_entry, &subj);
assert_matches!(res, Ok(_));
let val = res.unwrap();
assert_eq!(val.p.intro.p, "CustomIntro");
}
#[test]
fn test_parse_with_labels() {
let subj = r#"
@ -166,6 +178,20 @@ fn test_parse_entry_complex1() {
assert_eq!(*get_claim_value(&pout.p, &"name").unwrap(), PEntryValue::String("Douglas Adams".to_string()));
}
#[test]
fn test_parse_entry_abnormal_braces() {
// empty entry
let subj = indoc!{r#"
@Entry
{foo: 1
}
{free description
}
"#}.to_string();
let res = parse_wrapper(parse_entry, &subj);
assert_matches!(res, Ok(_));
}
#[test]
fn test_parse_entry_cursor_position() {
// empty entry
@ -253,3 +279,53 @@ fn test_parse_free_description_empty() {
assert_eq!(val.start_loc, 0);
assert_eq!(val.end_loc, 1);
}
#[test]
fn test_parse_entry_intro() {
let subj = "@Entry {";
let res = parse_entry_intro(subj, 0);
let val = res.unwrap();
assert_eq!(val.start_loc, 0);
assert_eq!(val.end_loc, 5);
assert_eq!(val.p, "Entry");
}
#[test]
fn test_parse_entry_intro_camel_case() {
let subj = "@EntryLongCamelCase {";
let res = parse_entry_intro(subj, 0);
let val = res.unwrap();
assert_eq!(val.p, "EntryLongCamelCase");
}
#[test]
fn test_parse_entry_intro_reject_invalid_name_first_char() {
let subj = "@invalid {";
let res = parse_entry_intro(subj, 0);
assert_matches!(res, Err(_));
let err_val = res.unwrap_err();
assert_eq!(err_val.location_kind, ParseLocationKind::Header);
assert!(err_val.msg.contains("Unexpected non-uppercase char in first char"));
}
#[test]
fn test_parse_entry_intro_reject_invalid_name() {
// snake case is disallowed
let subj = "@Entry_invalid_name {";
let res = parse_entry_intro(subj, 0);
assert_matches!(res, Err(_));
let err_val = res.unwrap_err();
assert_eq!(err_val.location_kind, ParseLocationKind::Inside);
assert!(err_val.msg.contains("Unexpected non-alphabetic char"));
}
#[test]
fn test_parse_entry_intro_reject_no_name() {
let subj = "@";
let res = parse_entry_intro(subj, 0);
assert_matches!(res, Err(_));
let err_val = res.unwrap_err();
assert!(err_val.msg.contains("No intro name was found"));
}