Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
f668b99
Unified: Add support for tree-sitter-style corpus tests
asgerf May 7, 2026
49f1909
Yeast: add reachable_node_ids()
asgerf May 7, 2026
a049850
Yeast: add type-checking errors in AST dump
asgerf May 7, 2026
c3a9218
Yeast: Add one-shot phase kind
asgerf May 8, 2026
bb9e996
Shared: Do not emit ReservedWord class when there are no unnamed tokens
asgerf May 7, 2026
5d0cb9e
YEAST: fix one-shot rules for unnamed nodes and self-captures
asgerf May 12, 2026
8a2a48d
Unified extractor: add AST schema, swift translation rules, and corpu…
asgerf May 12, 2026
72b683d
Unified: Add Swift corpus tests
asgerf May 12, 2026
5772ee4
YEAST: add NodeRef type, YeastDisplay trait, and source text storage
asgerf May 12, 2026
9283801
Unified: Add some more AST nodes and rules
asgerf May 11, 2026
2307839
Yeast: Change how patterns with repetition are parsed
asgerf May 11, 2026
6b58482
Yeast: Fix text associated with synthesized nodes
asgerf May 11, 2026
a966dff
Unified: Add more patterns and some fixes to the AST
asgerf May 11, 2026
ccc1dd5
Unified: Add tuple_pattern
asgerf May 11, 2026
3b7a53f
yeast-macros: merge repeated field declarations and support repetitio…
asgerf May 11, 2026
cbe4c81
Unified: add tuple_pattern and sequence_condition; refine if-let/guar…
asgerf May 11, 2026
55194dd
Unified: Support for calls and member access
asgerf May 11, 2026
600a496
Unified: Simplify concatenation of arguments
asgerf May 12, 2026
7fa6c4e
Unified: Update test output after rebasing on grammar changes
asgerf May 13, 2026
b031e5b
Unified: regenerate QL and make tests not crash
asgerf May 13, 2026
554bdf1
Yeast: fix warning about unnecessary mutability
asgerf May 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion shared/tree-sitter-extractor/src/extractor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ pub fn extract(

if let Some(yeast_runner) = yeast_runner {
let ast = yeast_runner
.run_from_tree(&tree)
.run_from_tree(&tree, source)
.unwrap_or_else(|e| panic!("Desugaring failed for {path_str}: {e}"));
traverse_yeast(&ast, &mut visitor);
} else {
Expand Down
13 changes: 12 additions & 1 deletion shared/tree-sitter-extractor/src/generator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,19 @@ pub fn generate(
&node_parent_table_name,
)),
ql::TopLevel::Class(ql_gen::create_token_class(&token_name, &tokeninfo_name)),
ql::TopLevel::Class(ql_gen::create_reserved_word_class(&reserved_word_name)),
];
// Only emit the ReservedWord class when there are actually unnamed token
// types in the schema (i.e., @{prefix}_reserved_word exists in the dbscheme).
// When converting from a YEAST YAML schema that has no unnamed tokens, this
// type is absent and referencing it would cause a QL compilation error.
let has_reserved_words = nodes
.values()
.any(|n| n.dbscheme_name == reserved_word_name);
if has_reserved_words {
body.push(ql::TopLevel::Class(ql_gen::create_reserved_word_class(
&reserved_word_name,
)));
}

// Overlay discard predicates
body.push(ql::TopLevel::Predicate(
Expand Down
122 changes: 104 additions & 18 deletions shared/yeast-macros/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,19 +113,65 @@ fn parse_query_node_inner(tokens: &mut Tokens) -> Result<TokenStream> {
/// appear in any order; bare patterns are accumulated and emitted as a
/// single `("child", ...)` entry.
fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
let mut fields = Vec::new();
// Accumulate per-field elems in declaration order; multiple uses of the
// same field name extend the same list (so e.g. `cond: (foo) cond: (bar)`
// matches a `cond` field whose first child is `foo` and second is `bar`).
let mut field_order: Vec<String> = Vec::new();
let mut field_elems: std::collections::HashMap<String, Vec<TokenStream>> =
std::collections::HashMap::new();
let mut bare_children: Vec<TokenStream> = Vec::new();
let push_field_elem = |order: &mut Vec<String>,
map: &mut std::collections::HashMap<String, Vec<TokenStream>>,
name: String,
elem: TokenStream| {
if !map.contains_key(&name) {
order.push(name.clone());
map.insert(name, vec![elem]);
} else {
map.get_mut(&name).unwrap().push(elem);
}
};
while tokens.peek().is_some() {
if peek_is_field(tokens) {
let field_name = expect_ident(tokens, "expected field name")?;
let field_str = field_name.to_string();

expect_punct(tokens, ':', "expected `:` after field name")?;

let child = parse_query_node(tokens)?;
fields.push(quote! {
(#field_str, vec![yeast::query::QueryListElem::SingleNode(#child)])
});
// Parse the field's pattern. To support repetition like
// `field: (kind)* @cap`, parse the atom first, then check for
// a quantifier, and lastly handle a trailing `@capture`.
let atom = parse_query_atom(tokens)?;
if peek_is_repetition(tokens) {
let rep = expect_repetition(tokens)?;
let elem = quote! {
yeast::query::QueryListElem::Repeated {
children: vec![yeast::query::QueryListElem::SingleNode(#atom)],
rep: #rep,
}
};
let elem = maybe_wrap_list_capture(tokens, elem)?;
push_field_elem(&mut field_order, &mut field_elems, field_str, elem);
} else {
let child = if peek_is_at(tokens) {
tokens.next();
let capture_name =
expect_ident(tokens, "expected capture name after @")?;
let name_str = capture_name.to_string();
quote! {
yeast::query::QueryNode::Capture {
capture: #name_str,
node: Box::new(#atom),
}
}
} else {
atom
};
let elem = quote! {
yeast::query::QueryListElem::SingleNode(#child)
};
push_field_elem(&mut field_order, &mut field_elems, field_str, elem);
}
} else {
// Bare patterns — accumulate into the implicit `child` field.
// We don't break here, so we can interleave with named fields.
Expand All @@ -137,6 +183,13 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
bare_children.extend(elems);
}
}
let mut fields: Vec<TokenStream> = Vec::new();
for name in field_order {
let elems = field_elems.remove(&name).unwrap();
fields.push(quote! {
(#name, vec![#(#elems),*])
});
}
if !bare_children.is_empty() {
fields.push(quote! {
("child", vec![#(#bare_children),*])
Expand Down Expand Up @@ -299,7 +352,7 @@ fn parse_direct_node(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStream> {
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => {
let group = expect_group(tokens, Delimiter::Brace)?;
let expr = group.stream();
Ok(quote! { #expr })
Ok(quote! { ::std::convert::Into::<usize>::into(#expr) })
}
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Parenthesis => {
let group = expect_group(tokens, Delimiter::Parenthesis)?;
Expand Down Expand Up @@ -329,12 +382,17 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
return Ok(quote! { #ctx.literal(#kind_str, #lit) });
}

// Check for (kind #{expr}) — computed literal, expr converted via .to_string()
// Check for (kind #{expr}) — computed literal, expr converted via YeastDisplay
if peek_is_hash(tokens) {
tokens.next(); // consume #
let group = expect_group(tokens, Delimiter::Brace)?;
let expr = group.stream();
return Ok(quote! { #ctx.literal(#kind_str, &(#expr).to_string()) });
return Ok(quote! {
{
let __value = yeast::YeastDisplay::yeast_to_string(&(#expr), &*#ctx.ast);
#ctx.literal(#kind_str, &__value)
}
});
}

// Check for (kind $fresh)
Expand Down Expand Up @@ -374,15 +432,19 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
inner.next(); // consume first .
inner.next(); // consume second .
let expr: proc_macro2::TokenStream = inner.collect();
stmts.push(quote! { let #temp: Vec<usize> = #expr; });
stmts.push(quote! {
let #temp: Vec<usize> = (#expr).into_iter()
.map(::std::convert::Into::<usize>::into)
.collect();
});
field_args.push(quote! { (#field_str, #temp) });
continue;
}
}
}

let value = parse_direct_node(tokens, ctx)?;
stmts.push(quote! { let #temp = #value; });
stmts.push(quote! { let #temp: usize = #value; });
field_args.push(quote! { (#field_str, vec![#temp]) });
}

Expand Down Expand Up @@ -427,10 +489,16 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
inner.next(); // consume first .
inner.next(); // consume second .
let expr: TokenStream = inner.collect();
items.push(quote! { __nodes.extend(#expr); });
items.push(quote! {
__nodes.extend(
(#expr).into_iter().map(::std::convert::Into::<usize>::into)
);
});
} else {
let expr = group.stream();
items.push(quote! { __nodes.push(#expr); });
items.push(quote! {
__nodes.push(::std::convert::Into::<usize>::into(#expr));
});
}
continue;
}
Expand Down Expand Up @@ -580,13 +648,24 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
let name_str = &cap.name;
match cap.multiplicity {
CaptureMultiplicity::Repeated => {
quote! { let #name: Vec<usize> = __captures.get_all(#name_str); }
quote! {
let #name: Vec<yeast::NodeRef> = __captures.get_all(#name_str)
.into_iter()
.map(yeast::NodeRef)
.collect();
}
}
CaptureMultiplicity::Optional => {
quote! { let #name: Option<usize> = __captures.get_opt(#name_str); }
quote! {
let #name: Option<yeast::NodeRef> =
__captures.get_opt(#name_str).map(yeast::NodeRef);
}
}
CaptureMultiplicity::Single => {
quote! { let #name: usize = __captures.get_var(#name_str).unwrap(); }
quote! {
let #name: yeast::NodeRef =
yeast::NodeRef(__captures.get_var(#name_str).unwrap());
}
}
}
})
Expand All @@ -613,19 +692,26 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
CaptureMultiplicity::Repeated => quote! {
let __field_id = #ctx_ident.ast.field_id_for_name(#name_str)
.unwrap_or_else(|| panic!("field '{}' not found", #name_str));
__fields.insert(__field_id, #name);
__fields.insert(
__field_id,
#name.into_iter()
.map(::std::convert::Into::<usize>::into)
.collect(),
);
},
CaptureMultiplicity::Optional => quote! {
let __field_id = #ctx_ident.ast.field_id_for_name(#name_str)
.unwrap_or_else(|| panic!("field '{}' not found", #name_str));
if let Some(__id) = #name {
__fields.entry(__field_id).or_insert_with(Vec::new).push(__id);
__fields.entry(__field_id).or_insert_with(Vec::new)
.push(::std::convert::Into::<usize>::into(__id));
}
},
CaptureMultiplicity::Single => quote! {
let __field_id = #ctx_ident.ast.field_id_for_name(#name_str)
.unwrap_or_else(|| panic!("field '{}' not found", #name_str));
__fields.entry(__field_id).or_insert_with(Vec::new).push(#name);
__fields.entry(__field_id).or_insert_with(Vec::new)
.push(::std::convert::Into::<usize>::into(#name));
},
}
})
Expand Down
13 changes: 11 additions & 2 deletions shared/yeast/doc/yeast.md
Original file line number Diff line number Diff line change
Expand Up @@ -349,8 +349,8 @@ to enable rewriting:

```rust
let desugar = yeast::DesugaringConfig::new()
.add_phase("cleanup", cleanup_rules())
.add_phase("desugar", desugar_rules())
.add_phase("cleanup", yeast::PhaseKind::Repeating, cleanup_rules())
.add_phase("translate", yeast::PhaseKind::OneShot, translate_rules())
.with_output_node_types_yaml(include_str!("output-node-types.yml"));

let lang = simple::LanguageSpec {
Expand All @@ -365,6 +365,15 @@ let lang = simple::LanguageSpec {
A single-phase config is just `.add_phase(...)` called once. Phase names
appear in error messages so you can tell which phase failed.

There are two kinds of phases:
- **Repeating**:
Each node is re-processed until none of the rules in the phase matches.
When a node no longer matches any rules, its children are recursively processed. In practice this is used to desugar or simplify an AST, while staying mostly within the same schema.
- **One-shot**:
Each node is processed by the first matching rule, and the engine panics if no rule matches.
Rules are then recursively applied to every captured node.
In practice this is used when translating from one AST schema to another, where an exhaustive match is required.

The same YAML node-types is used for both the runtime yeast `Schema` (so
rules can refer to output-only kinds and fields) and TRAP validation (it
is converted to JSON internally).
Expand Down
15 changes: 15 additions & 0 deletions shared/yeast/src/captures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,21 @@ impl Captures {
}
}
}

/// Apply a fallible function to every captured id (across all keys),
/// replacing each id with the result. Stops and returns the error on
/// the first failure.
pub fn try_map_all_captures<E>(
&mut self,
mut f: impl FnMut(Id) -> Result<Id, E>,
) -> Result<(), E> {
for ids in self.captures.values_mut() {
for id in ids {
*id = f(*id)?;
}
}
Ok(())
}
pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) {
if let Some(from_ids) = self.captures.get(from) {
let new_values = from_ids.iter().copied().map(f).collect();
Expand Down
Loading
Loading