Merge #873

873: add API guide to ra_syntax r=matklad a=matklad Co-authored-by: Aleksey Kladov <aleksey.kladov@gmail.com>
2019-02-21 14:36:08 +00:00 · 2019-02-21 14:36:08 +00:00 · 368bc56ac1
commit 368bc56ac1
parent 5cacdfcb3c 4fe07a2b61
1 changed files with 173 additions and 0 deletions
--- a/crates/ra_syntax/src/lib.rs
+++ b/crates/ra_syntax/src/lib.rs
@ -14,6 +14,8 @@
 //! CST). The actual parser live in a separate `ra_parser` crate, thought the
 //! lexer lives in this crate.
 //!
+//! See `api_walkthrough` test in this file for a quick API tour!
+//!
 //! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256>
 //! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md>

@ -87,3 +89,174 @@ pub fn check_fuzz_invariants(text: &str) {
    validation::validate_block_structure(root);
    let _ = file.errors();
 }
+
+/// This test does not assert anything and instead just shows off the crate's
+/// API.
+#[test]
+fn api_walkthrough() {
+    use ast::{ModuleItemOwner, NameOwner};
+
+    let source_code = "
+        fn foo() {
+            1 + 1
+        }
+    ";
+    // `SourceFile` is the main entry point.
+    //
+    // Note how `parse` does not return a `Result`: even completely invalid
+    // source code might be parsed.
+    let file = SourceFile::parse(source_code);
+
+    // Due to the way ownership is set up, owned syntax Nodes always live behind
+    // a `TreeArc` smart pointer. `TreeArc` is roughly an `std::sync::Arc` which
+    // points to the whole file instead of an individual node.
+    let file: TreeArc<SourceFile> = file;
+
+    // `SourceFile` is the root of the syntax tree. We can iterate file's items:
+    let mut func = None;
+    for item in file.items() {
+        match item.kind() {
+            ast::ModuleItemKind::FnDef(f) => func = Some(f),
+            _ => unreachable!(),
+        }
+    }
+    // The returned items are always references.
+    let func: &ast::FnDef = func.unwrap();
+
+    // All nodes implement `ToOwned` trait, with `Owned = TreeArc<Self>`.
+    // `to_owned` is a cheap operation: atomic increment.
+    let _owned_func: TreeArc<ast::FnDef> = func.to_owned();
+
+    // Each AST node has a bunch of getters for children. All getters return
+    // `Option`s though, to account for incomplete code. Some getters are common
+    // for several kinds of node. In this case, a trait like `ast::NameOwner`
+    // usually exists. By convention, all ast types should be used with `ast::`
+    // qualifier.
+    let name: Option<&ast::Name> = func.name();
+    let name = name.unwrap();
+    assert_eq!(name.text(), "foo");
+
+    // Let's get the `1 + 1` expression!
+    let block: &ast::Block = func.body().unwrap();
+    let expr: &ast::Expr = block.expr().unwrap();
+
+    // "Enum"-like nodes are represented using the "kind" pattern. It allows us
+    // to match exhaustively against all flavors of nodes, while maintaining
+    // internal representation flexibility. The drawback is that one can't write
+    // nested matches as one pattern.
+    let bin_expr: &ast::BinExpr = match expr.kind() {
+        ast::ExprKind::BinExpr(e) => e,
+        _ => unreachable!(),
+    };
+
+    // Besides the "typed" AST API, there's an untyped CST one as well.
+    // To switch from AST to CST, call `.syntax()` method:
+    let expr_syntax: &SyntaxNode = expr.syntax();
+
+    // Note how `expr` and `bin_expr` are in fact the same node underneath:
+    assert!(std::ptr::eq(expr_syntax, bin_expr.syntax()));
+
+    // To go from CST to AST, `AstNode::cast` function is used:
+    let expr = match ast::Expr::cast(expr_syntax) {
+        Some(e) => e,
+        None => unreachable!(),
+    };
+
+    // Note how expr is also a reference!
+    let expr: &ast::Expr = expr;
+
+    // This is possible because the underlying representation is the same:
+    assert_eq!(
+        expr as *const ast::Expr as *const u8,
+        expr_syntax as *const SyntaxNode as *const u8
+    );
+
+    // The two properties each syntax node has is a `SyntaxKind`:
+    assert_eq!(expr_syntax.kind(), SyntaxKind::BIN_EXPR);
+
+    // And text range:
+    assert_eq!(expr_syntax.range(), TextRange::from_to(32.into(), 37.into()));
+
+    // You can get node's text as a `SyntaxText` object, which will traverse the
+    // tree collecting token's text:
+    let text: SyntaxText<'_> = expr_syntax.text();
+    assert_eq!(text.to_string(), "1 + 1");
+
+    // There's a bunch of traversal methods on `SyntaxNode`:
+    assert_eq!(expr_syntax.parent(), Some(block.syntax()));
+    assert_eq!(block.syntax().first_child().map(|it| it.kind()), Some(SyntaxKind::L_CURLY));
+    assert_eq!(expr_syntax.next_sibling().map(|it| it.kind()), Some(SyntaxKind::WHITESPACE));
+
+    // As well as some iterator helpers:
+    let f = expr_syntax.ancestors().find_map(ast::FnDef::cast);
+    assert_eq!(f, Some(&*func));
+    assert!(expr_syntax.siblings(Direction::Next).any(|it| it.kind() == SyntaxKind::R_CURLY));
+    assert_eq!(
+        expr_syntax.descendants().count(),
+        8, // 5 tokens `1`, ` `, `+`, ` `, `!`
+           // 2 child literal expressions: `1`, `1`
+           // 1 the node itself: `1 + 1`
+    );
+
+    // There's also a `preorder` method with a more fine-grained iteration control:
+    let mut buf = String::new();
+    let mut indent = 0;
+    for event in expr_syntax.preorder() {
+        match event {
+            WalkEvent::Enter(node) => {
+                buf += &format!(
+                    "{:indent$}{:?} {:?}\n",
+                    " ",
+                    node.text(),
+                    node.kind(),
+                    indent = indent
+                );
+                indent += 2;
+            }
+            WalkEvent::Leave(_) => indent -= 2,
+        }
+    }
+    assert_eq!(indent, 0);
+    assert_eq!(
+        buf.trim(),
+        r#"
+"1 + 1" BIN_EXPR
+  "1" LITERAL
+    "1" INT_NUMBER
+  " " WHITESPACE
+  "+" PLUS
+  " " WHITESPACE
+  "1" LITERAL
+    "1" INT_NUMBER
+"#
+        .trim()
+    );
+
+    // To recursively process the tree, there are three approaches:
+    // 1. explicitly call getter methods on AST nodes.
+    // 2. use descendants and `AstNode::cast`.
+    // 3. use descendants and the visitor.
+    //
+    // Here's how the first one looks like:
+    let exprs_cast: Vec<String> = file
+        .syntax()
+        .descendants()
+        .filter_map(ast::Expr::cast)
+        .map(|expr| expr.syntax().text().to_string())
+        .collect();
+
+    // An alternative is to use a visitor. The visitor does not do traversal
+    // automatically (so it's more akin to a generic lambda) and is constructed
+    // from closures. This seems more flexible than a single generated visitor
+    // trait.
+    use algo::visit::{visitor, Visitor};
+    let mut exprs_visit = Vec::new();
+    for node in file.syntax().descendants() {
+        if let Some(result) =
+            visitor().visit::<ast::Expr, _>(|expr| expr.syntax().text().to_string()).accept(node)
+        {
+            exprs_visit.push(result);
+        }
+    }
+    assert_eq!(exprs_cast, exprs_visit);
+}