flights |>
filter(
carrier == "UA",
dest %in% c("IAH", "HOU"),
sched_dep_time > 0900,
sched_arr_time < 2000
) |>
group_by(flight) |>
summarize(
delay = mean(arr_delay, na.rm = TRUE),
cancelled = sum(is.na(arr_delay)),
n = n()
) |>
filter(n > 10)
If you haven't used R, it has some serious data manipulation legs built into it.