Data 304: Visualizing Data and Models
{vegabrite} and {altair} convert R code into a Vega-Lite JSON specification for a graphic.
If you read Vega-Lite documentation, you may need to do some translating.
For the most part,
Sometimes we can use
becomes
Note
R vectors require that each element have the same, basic data type; R lists can hold any R objects, even if they have different data types.
You can always test to make sure the your {vebabrite} or {altair} code is correctly creating the JSON you expect by converting to JSON and inspecting.
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"mark": {
"color": "red",
"type": "point"
},
"encoding": {
"x": {
"field": "some_variable",
"axis": {
"title": "X variable"
},
"type": "ordinal"
}
}
}
Debugging tip
You can copy and paste the JSON into the Vega Editor, which often provides better error messages.
We have mostly used encoding to map variables to channels.
field = "region:N"
We can also use value to set the scaled value.
value = "red"
Or we can provide a data value (pre-scale):
datum = "Asia"
{vegabrite} seems not to include the extent transformation. We can add it using the following definition, which mimics how the other transform functions are built.
Exercise 1 Copy the JSON that this produces into the Vega Editor and figure out (a) why it doesn’t work as desired and (b) how to fix it.
set.seed(123)
example_data <- tibble(letter = LETTERS[1:5], number = sample(100, 5))
base <-
vl_chart(width = 300) |>
vl_add_data(example_data)
bars <-
base |>
vl_mark_bar() |>
vl_encode(x = "number:Q") |>
vl_encode(y = "letter:O")
rule <-
base |>
vl_extent( extent = "number", param = "n_extent" ) |>
vl_mark_rule(color = "red") |>
vl_encode_x(datum = list(expr = "n_extent[0]"))
bars + rule
I think you can’t use vl_extent()
to populate a select input because I think options
doesn’t accept a parameter.
Expressions can be used anywhere the Vega-Lite documentation lists ExprRef
as an option.
Expressions can be used to
Several of you figured out how to make violin plots, others of you wondered about them.
Exercise 2 What are the key elements to making this half-violin plot?
vl_chart() |>
vl_density("IMDB_Rating", groupby = list("MPAA_Rating")) |>
vl_encode_y("value:Q", title = "IMDB Rating",
axis = list(grid = FALSE)) |>
vl_encode_x("density:Q", title = NA, axis = FALSE) |>
vl_encode_color("MPAA_Rating:N", legend = FALSE) |>
vl_encode_column("MPAA_Rating:N", spacing = 0) |>
vl_mark_area(orient = "horizontal") |>
vl_add_properties(width = 50, height = 300) |>
vl_config_view(stroke = "transparent") |>
vl_add_data_url(vega_data$movies$url)
We could compute a negative density using vl_calculate()
to get the other side, but there is an easier way: vl_stack_x("center")
vl_chart() |>
vl_density("IMDB_Rating", groupby = list("MPAA_Rating")) |>
vl_encode_y("value:Q", title = "IMDB Rating",
axis = list(grid = FALSE)) |>
vl_encode_x("density:Q", title = NA, axis = FALSE) |>
vl_encode_color("MPAA_Rating:N", legend = FALSE) |>
vl_encode_column("MPAA_Rating:N", spacing = 0) |>
vl_mark_area(orient = "horizontal") |>
vl_stack_x("center") |>
vl_add_properties(width = 50, height = 300) |>
vl_config_view(stroke = "transparent") |>
vl_add_data_url(vega_data$movies$url)
Here’s another variation on the theme.
vl_chart() |>
vl_density("IMDB_Rating", groupby = list("MPAA_Rating")) |>
vl_calculate("random() * datum.density", as = "jdensity") |>
vl_encode_y("value:Q", title = "IMDB Rating",
axis = list(grid = FALSE)) |>
vl_encode_x("jdensity:Q", title = NA, axis = FALSE) |>
vl_encode_color("MPAA_Rating:N", legend = FALSE) |>
vl_encode_column("MPAA_Rating:N", spacing = 0) |>
vl_mark_point() |>
# vl_stack_x("center") |>
vl_add_properties(width = 50, height = 300) |>
vl_config_view(stroke = "transparent") |>
vl_add_data_url(vega_data$movies$url)
Here’s another variation on the theme.
base <-
vl_chart() |>
# vl_filter("datum.MPAA_Rating == 'G'") |>
vl_density("IMDB_Rating", groupby = list("MPAA_Rating")) |>
vl_calculate("random() * (2 * random() - 1) * datum.density + 0.12",
as = "jdensity") |>
vl_encode_x("value:Q", title = "IMDB Rating",
axis = list(grid = FALSE)) |>
vl_encode_color("MPAA_Rating:N", legend = FALSE)
points <-
base |> vl_mark_point(size = 10) |>
vl_encode_y("jdensity:Q", title = NA, axis = FALSE)
violins <-
base |> vl_mark_area(fillOpacity = 0.4, strokeOpacity = 0.8) |>
vl_encode_y("density:Q", title = NA, axis = FALSE) |>
vl_stack_y("center")
(points + violins) |>
vl_add_properties(width = 500, height = 40) |>
vl_facet_row("MPAA_Rating:N") |>
vl_config_view(stroke = "transparent") |>
vl_config_facet(spacing = 0) |>
vl_add_data_url(vega_data$movies$url)
vl_chart() |>
vl_density("IMDB_Rating", groupby = list("MPAA_Rating")) |>
vl_encode_x("value:Q", title = "IMDB Rating",
axis = list(grid = FALSE)) |>
vl_encode_y("density:Q", title = NA, axis = FALSE) |>
vl_encode_color("MPAA_Rating:N", legend = FALSE) |>
vl_encode_row("MPAA_Rating:N", spacing = -25) |> # too bad it doesn't work!
vl_mark_area() |>
vl_add_properties(width = 500, height = 50) |>
vl_config_view(stroke = "transparent") |>
vl_add_data_url(vega_data$movies$url)
Here is the same graphic using an ordinal type for color.
vl_chart() |>
vl_density("IMDB_Rating", groupby = list("MPAA_Rating")) |>
vl_encode_x("value:Q", title = "IMDB Rating",
axis = list(grid = FALSE)) |>
vl_encode_y("density:Q", title = NA, axis = FALSE) |>
vl_encode_color("MPAA_Rating:O", legend = FALSE) |>
vl_encode_row("MPAA_Rating:O", spacing = -25) |> # too bad it doesn't work!
vl_mark_area() |>
vl_add_properties(width = 500, height = 50) |>
vl_config_view(stroke = "transparent") |>
vl_add_data_url(vega_data$movies$url)
There is a package in R called {treemap} that makes (ugly) tree maps.
But we can grab just the data and make our own in {vegabrite}
base <-
vl_chart() |>
vl_calculate("datum.x0 + datum.w", as = "x1") |>
vl_calculate("datum.y0 + datum.h", as = "y1") |>
# vl_encode_fillOpacity("vColor:Q") |>
vl_encode_fill("continent:N") |>
vl_encode_x("x0:Q", axis = FALSE) |>
vl_encode_y("y0:Q", axis = FALSE) |>
vl_encode_x2("x1:Q") |>
vl_encode_y2("y1:Q") |>
vl_config_view(strokeOpacity = 0)
continents <-
base |>
vl_add_data(tm$tm |> filter(level == 1)) |>
vl_mark_rect(stroke = "navy", opacity = 0.5, strokeWidth = 3) |>
vl_add_properties(width = 500, height = 500)
countries <-
base |>
vl_add_data(tm$tm |> filter(level == 2)) |>
vl_mark_rect(stroke = "navy", fillOpacity = 0.1, strokeWidth = 1) |>
vl_encode_strokeWidth(value = 1) |>
vl_add_properties(width = 500, height = 500)
labels <-
base |>
vl_add_data(tm$tm |> filter(level == 2)) |>
vl_mark_text(align = "left", baseline = "bottom",
dx = 2, dy = -2, size = 5,
fillOpacity = 1) |>
vl_encode_fill(value = "black") |>
vl_encode_size("vSize", legend = FALSE, scale = list(type = "sqrt")) |>
vl_encode_text("iso3:N") |>
vl_add_properties(width = 500, height = 500)
(continents + countries + labels)
Goals:
# A tibble: 6 × 3
Date `Completion Rate` `Response Rate`
<chr> <dbl> <dbl>
1 Q1-2017 0.91 0.023
2 Q2-2017 0.93 0.018
3 Q3-2017 0.91 0.028
4 Q4-2017 0.89 0.023
5 Q1-2018 0.84 0.034
6 Q2-2018 0.88 0.027
mailing_wide <-
mailing |>
separate(Date, into = c("quarter", "year"), sep = "-") |>
mutate(
quarter = parse_number(quarter),
year = parse_number(year),
date_str = paste0(year, "-", 3 * quarter - 2, "-01" ),
date = ymd(date_str),
`Net Completion Rate` = `Response Rate` * `Completion Rate`
)
mailing_long <-
mailing_wide |>
pivot_longer(matches("Rate"), names_to = "rate type", values_to = "rate") |>
filter(`rate type` != "Completion Rate")
mailing_long |>
# vl_filter("datum[rate type] != 'Completion Rate'") |>
vl_chart() |>
vl_mark_line() |>
vl_encode_x("date:T") |>
vl_encode_y("rate:Q") |>
vl_encode_color("rate type:N") |>
vl_add_properties(width = 600, height = 200)
Some key features:
pie_data <-
read.csv("https://calvin-data304.netlify.app/data/likert-survey.csv") |>
group_by(year) |>
mutate(percent = count / sum(count))
pie_layer <- vl_chart()|>
vl_mark_arc(outerRadius = 80, opacity = 0.7, stroke = "black") |>
vl_encode_color("response:O", sort = "number") |>
vl_scale_color(
range = c("steelblue", "lightsteelblue", "lightyellow",
"pink", "red", "lightgray"))
text_layer <- vl_chart()|>
vl_mark_text(radius = 55, color = "black", strokeOpacity = 0) |>
vl_encode_size(
"percent:Q", legend = FALSE,
scale = list(range = c(5,20), type = "sqrt")) |>
vl_encode_stroke("response:O", sort = "number", legend = FALSE) |>
vl_encode_text("percent:Q", format = ".0%")
vl_layer(pie_layer, text_layer) |>
vl_encode_order("number:Q", sort = "descending") |>
vl_encode_theta("percent:Q", stack = TRUE)|>
vl_facet_column("year:N", title = NA, header = list(labelFontSize = 20))|>
vl_add_data(pie_data)