Chapter 3: Python

Querying the Assembly API with async Python

This chapter provides a complete async Python client for the 열린국회정보 API, ready to drop into a research pipeline.


Setup

pip install httpx python-dotenv pandas

.env file:

ASSEMBLY_API_KEY=your-key-here

The client

Save this as assembly_client.py in your project:

import os, asyncio
from typing import Optional
import httpx
from dotenv import load_dotenv

load_dotenv()

BASE_URL = "https://open.assembly.go.kr/portal/openapi"
UNIT_CD  = {"22": "100022", "21": "100021", "20": "100020", "19": "100019", "18": "100018"}


class AssemblyClient:
    def __init__(self):
        self.key = os.getenv("ASSEMBLY_API_KEY")
        if not self.key:
            raise ValueError("ASSEMBLY_API_KEY not set")
        self._http = httpx.AsyncClient(timeout=30)

    async def __aenter__(self): return self
    async def __aexit__(self, *_): await self._http.aclose()

    def _base(self):
        return {"KEY": self.key, "Type": "json"}

    def _parse(self, data: dict, ep: str) -> list[dict]:
        """Parse 열린국회 API response. Returns [] on INFO-200 (no results)."""
        body = data.get(ep, [])
        if not body:
            return []
        code = body[0]["head"][1]["RESULT"]["CODE"]
        if code == "INFO-200":
            return []
        if code != "INFO-000":
            raise ValueError(f"API error {code}: {body[0]['head'][1]['RESULT']['MESSAGE']}")
        rows = body[1].get("row", []) if len(body) > 1 else []
        return rows if isinstance(rows, list) else [rows]  # normalize single-record responses

    async def get(self, endpoint: str, **params) -> list[dict]:
        merged = {**self._base(), **{k: v for k, v in params.items() if v is not None}}
        r = await self._http.get(f"{BASE_URL}/{endpoint}", params=merged)
        r.raise_for_status()
        return self._parse(r.json(), endpoint)

    # ── Convenience methods ────────────────────────────────────────────────────

    async def search_bills(self, age: str, bill_name=None, proposer=None,
                           proc_result=None, committee=None, page=1, page_size=100):
        return await self.get("nzmimeepazxkubdpn",
            AGE=age, BILL_NAME=bill_name, PROPOSER=proposer,
            PROC_RESULT=proc_result, COMMITTEE=committee,
            pIndex=page, pSize=page_size)

    async def get_members(self, age="22", name=None, party=None,
                          district=None, committee=None, page_size=300):
        return await self.get("nwvrqwxyaytdsfvhu",
            UNIT_CD=UNIT_CD.get(age, f"1000{age}"), HG_NM=name,
            POLY_NM=party, ORIG_NM=district, CMIT_NM=committee, pSize=page_size)

    async def get_votes(self, age: str, bill_name=None, page_size=100):
        return await self.get("ncocpgfiaoituanbr",
            AGE=age, BILL_NAME=bill_name, pSize=page_size)

    async def get_proposers(self, bill_id: str):
        return await self.get("BILLINFOPPSR", BILL_ID=bill_id)

Example 2: Build a co-sponsorship edge list

Useful for network analysis — who co-sponsors bills with whom?

import asyncio, pandas as pd, itertools
from assembly_client import AssemblyClient

async def build_edge_list(age="22", topic=None):
    async with AssemblyClient() as client:
        bills = await client.search_bills(age=age, bill_name=topic, page_size=100)

        edges = []
        for bill in bills:
            bill_id = bill.get("BILL_ID")
            bill_name = bill.get("BILL_NAME", "")
            if not bill_id:
                continue

            proposers = await client.get_proposers(bill_id)
            names = [p["PPSR_NM"] for p in proposers if "PPSR_NM" in p]

            # Create undirected edges for every pair of co-sponsors
            for a, b in itertools.combinations(names, 2):
                edges.append({"source": a, "target": b, "bill": bill_name})

    return pd.DataFrame(edges)

edges_df = asyncio.run(build_edge_list(age="22", topic="부동산"))
edges_df.to_csv("cosponsor_edges.csv", index=False, encoding="utf-8-sig")
print(f"{len(edges_df)} edges across {edges_df['bill'].nunique()} bills")

The resulting CSV can be loaded directly into networkx, igraph, or Gephi for visualization.


Example 3: Vote data for regression analysis

import asyncio, pandas as pd
from assembly_client import AssemblyClient

async def get_vote_dataset(age="22"):
    async with AssemblyClient() as client:
        votes = await client.get_votes(age=age, page_size=100)

    df = pd.DataFrame(votes)

    # Coerce vote counts to numeric
    for col in ["YES_TCNT", "NO_TCNT", "BLANK_TCNT", "MEMBER_TCNT"]:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    # Derived variables
    df["pass"]        = (df["PROC_RESULT_CD"].str.contains("가결", na=False)).astype(int)
    df["yes_share"]   = df["YES_TCNT"] / df["MEMBER_TCNT"]
    df["margin"]      = df["YES_TCNT"] - df["NO_TCNT"]

    return df

df = asyncio.run(get_vote_dataset("22"))
print(df[["BILL_NAME", "YES_TCNT", "NO_TCNT", "yes_share", "pass"]].describe())
df.to_csv("votes_22nd.csv", index=False, encoding="utf-8-sig")

Pagination

The API defaults to 10 results per page with a maximum of 100. For large datasets, paginate:

async def collect_all_bills(age: str, **kwargs) -> list[dict]:
    """Collect all bills across pages (handles >100 results)."""
    async with AssemblyClient() as client:
        all_rows = []
        page = 1
        while True:
            rows = await client.search_bills(age=age, page=page, page_size=100, **kwargs)
            if not rows:
                break
            all_rows.extend(rows)
            if len(rows) < 100:
                break
            page += 1
        return all_rows

all_bills = asyncio.run(collect_all_bills("22"))
print(f"Total bills collected: {len(all_bills)}")
Rate limiting

The API does not publish a rate limit, but empirically it handles ~5 requests/second reliably. For large collection jobs, add await asyncio.sleep(0.2) between requests.