Data 304: Visualizing Data and Models
{vegabrite} and {altair} convert R code into a Vega-Lite JSON specification for a graphic.
If you read Vega-Lite documentation, you may need to do some translating.
For the most part,
Sometimes we can use
becomes
Note
R vectors require that each element have the same, basic data type; R lists can hold any R objects, even if they have different data types.
You can always test to make sure the your {vebabrite} or {altair} code is correctly creating the JSON you expect by converting to JSON and inspecting.
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"mark": {
"color": "red",
"type": "point"
},
"encoding": {
"x": {
"field": "some_variable",
"axis": {
"title": "X variable"
},
"type": "ordinal"
}
}
}
Debugging tip
You can copy and paste the JSON into the Vega Editor, which often provides better error messages.
We have mostly used encoding to map variables to channels.
field = "region:N"We can also use value to set the scaled value.
value = "red"Or we can provide a data value (pre-scale):
datum = "Asia"Expressions can be used anywhere the Vega-Lite documentation lists ExprRef as an option.
Expressions can be used to
Exercise 1 What are the key elements to making this half-violin plot?
vl_chart() |>
vl_density("IMDB_Rating", groupby = list("MPAA_Rating")) |>
vl_encode_y("value:Q", title = "IMDB Rating", axis = list(grid = FALSE)) |>
vl_encode_x("density:Q", title = NA, axis = FALSE) |>
vl_encode_color("MPAA_Rating:N", legend = FALSE) |>
vl_encode_column("MPAA_Rating:N", spacing = 0) |>
vl_mark_area(orient = "horizontal") |>
vl_add_properties(width = 50, height = 300) |>
vl_config_view(stroke = "transparent") |>
vl_add_data_url(vega_data$movies$url)We could compute a negative density using vl_calculate() to get the other side, but there is an easier way: vl_stack_x("center")
vl_chart() |>
vl_density("IMDB_Rating", groupby = list("MPAA_Rating")) |>
vl_encode_y("value:Q", title = "IMDB Rating", axis = list(grid = FALSE)) |>
vl_encode_x("density:Q", title = NA, axis = FALSE) |>
vl_encode_color("MPAA_Rating:N", legend = FALSE) |>
vl_encode_column("MPAA_Rating:N", spacing = 0) |>
vl_mark_area(orient = "horizontal") |>
vl_stack_x("center") |>
vl_add_properties(width = 50, height = 300) |>
vl_config_view(stroke = "transparent") |>
vl_add_data_url(vega_data$movies$url)Here’s another variation on the theme.
vl_chart() |>
vl_density("IMDB_Rating", groupby = list("MPAA_Rating")) |>
vl_calculate("random() * datum.density", as = "jdensity") |>
vl_encode_y("value:Q", title = "IMDB Rating",
axis = list(grid = FALSE)) |>
vl_encode_x("jdensity:Q", title = NA, axis = FALSE) |>
vl_encode_color("MPAA_Rating:N", legend = FALSE) |>
vl_encode_column("MPAA_Rating:N", spacing = 0) |>
vl_mark_point() |>
# vl_stack_x("center") |>
vl_add_properties(width = 50, height = 300) |>
vl_config_view(stroke = "transparent") |>
vl_add_data_url(vega_data$movies$url)Here’s another variation on the theme.
base <-
vl_chart() |>
# vl_filter("datum.MPAA_Rating == 'G'") |>
vl_density("IMDB_Rating", groupby = list("MPAA_Rating")) |>
vl_calculate("random() * (2 * random() - 1) * datum.density + 0.12",
as = "jdensity") |>
vl_encode_x("value:Q", title = "IMDB Rating",
axis = list(grid = FALSE)) |>
vl_encode_color("MPAA_Rating:N", legend = FALSE)
points <-
base |> vl_mark_point(size = 10) |>
vl_encode_y("jdensity:Q", title = NA, axis = FALSE)
violins <-
base |> vl_mark_area(fillOpacity = 0.4, strokeOpacity = 0.8) |>
vl_encode_y("density:Q", title = NA, axis = FALSE) |>
vl_stack_y("center")
(points + violins) |>
vl_add_properties(width = 500, height = 40) |>
vl_facet_row("MPAA_Rating:N") |>
vl_config_view(stroke = "transparent") |>
vl_config_facet(spacing = 0) |>
vl_add_data_url(vega_data$movies$url) vl_chart() |>
vl_density("IMDB_Rating", groupby = list("MPAA_Rating")) |>
vl_encode_x("value:Q", title = "IMDB Rating",
axis = list(grid = FALSE)) |>
vl_encode_y("density:Q", title = NA, axis = FALSE) |>
vl_encode_color("MPAA_Rating:N", legend = FALSE) |>
vl_encode_row("MPAA_Rating:N", spacing = -25) |> # too bad it doesn't work!
vl_mark_area() |>
vl_add_properties(width = 500, height = 50) |>
vl_config_view(stroke = "transparent") |>
vl_add_data_url(vega_data$movies$url)Here is the same graphic using an ordinal type for color.
vl_chart() |>
vl_density("IMDB_Rating", groupby = list("MPAA_Rating")) |>
vl_encode_x("value:Q", title = "IMDB Rating",
axis = list(grid = FALSE)) |>
vl_encode_y("density:Q", title = NA, axis = FALSE) |>
vl_encode_color("MPAA_Rating:O", legend = FALSE) |>
vl_encode_row("MPAA_Rating:O", spacing = -25) |> # too bad it doesn't work!
vl_mark_area() |>
vl_add_properties(width = 500, height = 50) |>
vl_config_view(stroke = "transparent") |>
vl_add_data_url(vega_data$movies$url)There is a package in R called {treemap} that makes (ugly) tree maps.
But we can grab just the data and make our own in {vegabrite}
base <-
vl_chart() |>
vl_calculate("datum.x0 + datum.w", as = "x1") |>
vl_calculate("datum.y0 + datum.h", as = "y1") |>
# vl_encode_fillOpacity("vColor:Q") |>
vl_encode_fill("continent:N") |>
vl_encode_x("x0:Q", axis = FALSE) |>
vl_encode_y("y0:Q", axis = FALSE) |>
vl_encode_x2("x1:Q") |>
vl_encode_y2("y1:Q") |>
vl_config_view(strokeOpacity = 0)
continents <-
base |>
vl_add_data(tm$tm |> filter(level == 1)) |>
vl_mark_rect(stroke = "navy", opacity = 0.5, strokeWidth = 3) |>
vl_add_properties(width = 500, height = 500)
countries <-
base |>
vl_add_data(tm$tm |> filter(level == 2)) |>
vl_mark_rect(stroke = "navy", fillOpacity = 0.1, strokeWidth = 1) |>
vl_encode_strokeWidth(value = 1) |>
vl_add_properties(width = 500, height = 500)
labels <-
base |>
vl_add_data(tm$tm |> filter(level == 2)) |>
vl_mark_text(align = "left", baseline = "bottom",
dx = 2, dy = -2, size = 5,
fillOpacity = 1) |>
vl_encode_fill(value = "black") |>
vl_encode_size("vSize", legend = FALSE, scale = list(type = "sqrt")) |>
vl_encode_text("iso3:N") |>
vl_add_properties(width = 500, height = 500)
(continents + countries + labels)Goals:
| Date | Completion Rate | Response Rate |
|---|---|---|
| Q1-2017 | 0.91 | 0.023 |
| Q2-2017 | 0.93 | 0.018 |
mailing_wide <-
mailing |>
separate(Date, into = c("quarter", "year"), sep = "-") |>
mutate(
quarter = parse_number(quarter),
year = parse_number(year),
date_str = paste0(year, "-", 3 * quarter - 2, "-01"),
date = ymd(date_str),
`Net Completion Rate` = `Response Rate` * `Completion Rate`
)
mailing_long <-
mailing_wide |>
pivot_longer(matches("Rate"), names_to = "rate type", values_to = "rate") |>
filter(`rate type` != "Completion Rate")
mailing_long |>
# vl_filter("datum[rate type] != 'Completion Rate'") |>
vl_chart() |>
vl_mark_line() |>
vl_encode_x("date:T") |>
vl_encode_y("rate:Q") |>
vl_encode_color("rate type:N") |>
vl_add_properties(width = 600, height = 200)Some key features:
pie_data <-
read.csv("https://calvin-data304.netlify.app/data/likert-survey.csv") |>
group_by(year) |>
mutate(percent = count / sum(count))
pie_layer <- vl_chart()|>
vl_mark_arc(outerRadius = 80, opacity = 0.7, stroke = "black") |>
vl_encode_color("response:O", sort = "number") |>
vl_scale_color(
range = c("steelblue", "lightsteelblue", "lightyellow",
"pink", "red", "lightgray"))
text_layer <- vl_chart()|>
vl_mark_text(radius = 55, color = "black", strokeOpacity = 0) |>
vl_encode_size(
"percent:Q", legend = FALSE,
scale = list(range = c(5,20), type = "sqrt")) |>
vl_encode_stroke("response:O", sort = "number", legend = FALSE) |>
vl_encode_text("percent:Q", format = ".0%")
vl_layer(pie_layer, text_layer) |>
vl_encode_order("number:Q", sort = "descending") |>
vl_encode_theta("percent:Q", stack = TRUE)|>
vl_facet_column("year:N", title = NA, header = list(labelFontSize = 20))|>
vl_add_data(pie_data){vegabrite} seems not to include the extent transformation. We can add it using the following definition, which mimics how the other transform functions are built.
Extent computes the minimum and maximum values of a variable.
Useful for placing annotation on a graphic.
set.seed(123)
example_data <- tibble(letter = LETTERS[1:5], number = sample(100, 5))
base <-
vl_chart(width = 300) |>
vl_add_data(example_data)
bars <-
base |>
vl_mark_bar() |>
vl_encode(x = "number:Q") |>
vl_encode(y = "letter:O")
rule <-
base |>
vl_extent(extent = "number", param = "number_extent") |>
vl_mark_rule(color = "red", size = 4) |>
vl_encode_x(datum = list(expr = "number_extent[0]"), type = "quantitative")
bars + ruleYou must specify type = "quantitative" when using