Chapter 4: R
Querying the Assembly API with httr2 and tidyverse
This chapter provides a tidy R workflow for collecting Korean legislative data, with functions that integrate naturally into a dplyr/tidyverse pipeline.
Setup
install.packages(c("httr2", "jsonlite", "tidyverse", "glue"))Add your key to ~/.Renviron:
ASSEMBLY_API_KEY=your-key-here
Then run readRenviron("~/.Renviron") (or restart R).
Core functions
Save this as assembly_api.R:
library(httr2)
library(jsonlite)
library(tidyverse)
library(glue)
BASE_URL <- "https://open.assembly.go.kr/portal/openapi"
UNIT_CD <- c("22"="100022","21"="100021","20"="100020","19"="100019","18"="100018")
assembly_get <- function(endpoint, ...) {
params <- list(KEY = Sys.getenv("ASSEMBLY_API_KEY"), Type = "json", ...)
resp <- request(glue("{BASE_URL}/{endpoint}")) |>
req_url_query(!!!params) |>
req_error(is_error = \(r) FALSE) |>
req_perform() |>
resp_body_json()
body <- resp[[endpoint]]
code <- body[[1]]$head[[2]]$RESULT$CODE
if (code == "INFO-200") return(tibble()) # empty result — not an error
if (code != "INFO-000") stop(glue("API error {code}: {body[[1]]$head[[2]]$RESULT$MESSAGE}"))
rows <- body[[2]]$row
if (is.null(rows)) return(tibble())
# Normalize: single result returns a named list, not a list-of-lists
if (!is.null(names(rows))) rows <- list(rows)
bind_rows(map(rows, as_tibble))
}
# ── Convenience wrappers ───────────────────────────────────────────────────────
search_bills <- function(age, bill_name=NULL, proposer=NULL,
proc_result=NULL, committee=NULL,
page=1, page_size=100) {
assembly_get("nzmimeepazxkubdpn",
AGE=age, BILL_NAME=bill_name, PROPOSER=proposer,
PROC_RESULT=proc_result, COMMITTEE=committee,
pIndex=page, pSize=page_size)
}
get_members <- function(age="22", name=NULL, party=NULL,
district=NULL, committee=NULL, page_size=300) {
assembly_get("nwvrqwxyaytdsfvhu",
UNIT_CD=UNIT_CD[[age]], HG_NM=name, POLY_NM=party,
ORIG_NM=district, CMIT_NM=committee, pSize=page_size)
}
get_votes <- function(age, bill_name=NULL, page_size=100) {
assembly_get("ncocpgfiaoituanbr",
AGE=age, BILL_NAME=bill_name, pSize=page_size)
}
get_proposers <- function(bill_id) {
assembly_get("BILLINFOPPSR", BILL_ID=bill_id)
}Example 1: Party-level bill sponsorship summary
source("assembly_api.R")
# Get all 22nd Assembly members and their party affiliations
members <- get_members(age = "22", page_size = 300) |>
select(HG_NM, POLY_NM, CMIT_NM, ORIG_NM)
# Get all bills proposed in the 22nd Assembly
bills <- search_bills(age = "22", page_size = 100)
# Join on proposer name and compute party-level counts
bill_party <- bills |>
left_join(members, by = c("RST_PROPOSER" = "HG_NM")) |>
count(POLY_NM, PROC_RESULT, name = "n_bills") |>
filter(!is.na(POLY_NM)) |>
arrange(desc(n_bills))
print(bill_party)Sample output:
# A tibble: 12 × 3
POLY_NM PROC_RESULT n_bills
<chr> <chr> <int>
1 더불어민주당 폐기 312
2 국민의힘 폐기 287
3 더불어민주당 원안가결 104
4 국민의힘 원안가결 89
5 더불어민주당 수정가결 41
Example 2: Vote data for fixest regression
library(fixest)
source("assembly_api.R")
votes <- get_votes(age = "22", page_size = 100) |>
mutate(
across(c(YES_TCNT, NO_TCNT, BLANK_TCNT, MEMBER_TCNT), as.integer),
passed = as.integer(str_detect(PROC_RESULT_CD, "가결")),
yes_share = YES_TCNT / MEMBER_TCNT,
margin = YES_TCNT - NO_TCNT,
proc_date = as.Date(PROC_DT, "%Y%m%d"),
year = year(proc_date)
)
# Simple OLS: does higher yes_share predict passage?
m <- feols(passed ~ yes_share | year, data = votes)
summary(m)Example 3: Collect data across multiple pages
source("assembly_api.R")
collect_all_bills <- function(age, ...) {
all_rows <- list()
page <- 1
repeat {
rows <- search_bills(age = age, page = page, page_size = 100, ...)
if (nrow(rows) == 0) break
all_rows[[page]] <- rows
if (nrow(rows) < 100) break
page <- page + 1
Sys.sleep(0.2) # be polite
}
bind_rows(all_rows)
}
all_bills_22 <- collect_all_bills("22")
cat("Total bills:", nrow(all_bills_22), "\n")
write_csv(all_bills_22, "bills_22nd_assembly.csv")Example 4: Committee composition and balance-of-party analysis
source("assembly_api.R")
committees <- c("법제사법위원회", "국토교통위원회", "기획재정위원회",
"과학기술정보방송통신위원회", "보건복지위원회")
committee_data <- map_dfr(committees, function(cmit) {
get_members(age = "22", committee = cmit, page_size = 100) |>
mutate(committee = cmit)
}, .id = NULL)
# Party balance per committee
party_balance <- committee_data |>
count(committee, POLY_NM) |>
group_by(committee) |>
mutate(share = n / sum(n)) |>
ungroup()
party_balance |>
filter(POLY_NM %in% c("더불어민주당", "국민의힘")) |>
pivot_wider(names_from = POLY_NM, values_from = share) |>
arrange(desc(`더불어민주당`))
Using with fixest and did
The get_votes() data integrates well with fixest::feols() for two-way fixed effects, and the search_bills() data (with date variables) can feed did::att_gt() for staggered DiD designs where the treatment is a policy enactment date.