Chapter 4: R

Querying the Assembly API with httr2 and tidyverse

This chapter provides a tidy R workflow for collecting Korean legislative data, with functions that integrate naturally into a dplyr/tidyverse pipeline.


Setup

install.packages(c("httr2", "jsonlite", "tidyverse", "glue"))

Add your key to ~/.Renviron:

ASSEMBLY_API_KEY=your-key-here

Then run readRenviron("~/.Renviron") (or restart R).


Core functions

Save this as assembly_api.R:

library(httr2)
library(jsonlite)
library(tidyverse)
library(glue)

BASE_URL <- "https://open.assembly.go.kr/portal/openapi"
UNIT_CD  <- c("22"="100022","21"="100021","20"="100020","19"="100019","18"="100018")

assembly_get <- function(endpoint, ...) {
  params <- list(KEY = Sys.getenv("ASSEMBLY_API_KEY"), Type = "json", ...)

  resp <- request(glue("{BASE_URL}/{endpoint}")) |>
    req_url_query(!!!params) |>
    req_error(is_error = \(r) FALSE) |>
    req_perform() |>
    resp_body_json()

  body  <- resp[[endpoint]]
  code  <- body[[1]]$head[[2]]$RESULT$CODE

  if (code == "INFO-200") return(tibble())           # empty result — not an error
  if (code != "INFO-000") stop(glue("API error {code}: {body[[1]]$head[[2]]$RESULT$MESSAGE}"))

  rows <- body[[2]]$row
  if (is.null(rows)) return(tibble())

  # Normalize: single result returns a named list, not a list-of-lists
  if (!is.null(names(rows))) rows <- list(rows)

  bind_rows(map(rows, as_tibble))
}

# ── Convenience wrappers ───────────────────────────────────────────────────────

search_bills <- function(age, bill_name=NULL, proposer=NULL,
                         proc_result=NULL, committee=NULL,
                         page=1, page_size=100) {
  assembly_get("nzmimeepazxkubdpn",
    AGE=age, BILL_NAME=bill_name, PROPOSER=proposer,
    PROC_RESULT=proc_result, COMMITTEE=committee,
    pIndex=page, pSize=page_size)
}

get_members <- function(age="22", name=NULL, party=NULL,
                        district=NULL, committee=NULL, page_size=300) {
  assembly_get("nwvrqwxyaytdsfvhu",
    UNIT_CD=UNIT_CD[[age]], HG_NM=name, POLY_NM=party,
    ORIG_NM=district, CMIT_NM=committee, pSize=page_size)
}

get_votes <- function(age, bill_name=NULL, page_size=100) {
  assembly_get("ncocpgfiaoituanbr",
    AGE=age, BILL_NAME=bill_name, pSize=page_size)
}

get_proposers <- function(bill_id) {
  assembly_get("BILLINFOPPSR", BILL_ID=bill_id)
}

Example 1: Party-level bill sponsorship summary

source("assembly_api.R")

# Get all 22nd Assembly members and their party affiliations
members <- get_members(age = "22", page_size = 300) |>
  select(HG_NM, POLY_NM, CMIT_NM, ORIG_NM)

# Get all bills proposed in the 22nd Assembly
bills <- search_bills(age = "22", page_size = 100)

# Join on proposer name and compute party-level counts
bill_party <- bills |>
  left_join(members, by = c("RST_PROPOSER" = "HG_NM")) |>
  count(POLY_NM, PROC_RESULT, name = "n_bills") |>
  filter(!is.na(POLY_NM)) |>
  arrange(desc(n_bills))

print(bill_party)

Sample output:

# A tibble: 12 × 3
   POLY_NM      PROC_RESULT   n_bills
   <chr>        <chr>           <int>
 1 더불어민주당  폐기              312
 2 국민의힘      폐기              287
 3 더불어민주당  원안가결          104
 4 국민의힘      원안가결           89
 5 더불어민주당  수정가결           41

Example 2: Vote data for fixest regression

library(fixest)
source("assembly_api.R")

votes <- get_votes(age = "22", page_size = 100) |>
  mutate(
    across(c(YES_TCNT, NO_TCNT, BLANK_TCNT, MEMBER_TCNT), as.integer),
    passed    = as.integer(str_detect(PROC_RESULT_CD, "가결")),
    yes_share = YES_TCNT / MEMBER_TCNT,
    margin    = YES_TCNT - NO_TCNT,
    proc_date = as.Date(PROC_DT, "%Y%m%d"),
    year      = year(proc_date)
  )

# Simple OLS: does higher yes_share predict passage?
m <- feols(passed ~ yes_share | year, data = votes)
summary(m)

Example 3: Collect data across multiple pages

source("assembly_api.R")

collect_all_bills <- function(age, ...) {
  all_rows <- list()
  page <- 1

  repeat {
    rows <- search_bills(age = age, page = page, page_size = 100, ...)
    if (nrow(rows) == 0) break
    all_rows[[page]] <- rows
    if (nrow(rows) < 100) break
    page <- page + 1
    Sys.sleep(0.2)   # be polite
  }

  bind_rows(all_rows)
}

all_bills_22 <- collect_all_bills("22")
cat("Total bills:", nrow(all_bills_22), "\n")

write_csv(all_bills_22, "bills_22nd_assembly.csv")

Example 4: Committee composition and balance-of-party analysis

source("assembly_api.R")

committees <- c("법제사법위원회", "국토교통위원회", "기획재정위원회",
                "과학기술정보방송통신위원회", "보건복지위원회")

committee_data <- map_dfr(committees, function(cmit) {
  get_members(age = "22", committee = cmit, page_size = 100) |>
    mutate(committee = cmit)
}, .id = NULL)

# Party balance per committee
party_balance <- committee_data |>
  count(committee, POLY_NM) |>
  group_by(committee) |>
  mutate(share = n / sum(n)) |>
  ungroup()

party_balance |>
  filter(POLY_NM %in% c("더불어민주당", "국민의힘")) |>
  pivot_wider(names_from = POLY_NM, values_from = share) |>
  arrange(desc(`더불어민주당`))
Using with fixest and did

The get_votes() data integrates well with fixest::feols() for two-way fixed effects, and the search_bills() data (with date variables) can feed did::att_gt() for staggered DiD designs where the treatment is a policy enactment date.