Passing the Postgres 65535 parameter limit at 120x speed
15 May 2021
If you are doing bulk inserts into Postgres you may have hit a message like this extended protocol limited to 65535 parameters
, You may be tempted to rewrite your query into chunks and loop over those chunks, but there is a better way! We will use a Golang example that passes the 65535 constraints and reduces the time to insert 25k records from 2.9s to 24ms.
A caveat on speed claims, I only tested this with the Go pgx library - so mileage may vary
The 65535 error refers to the fact that Postgres wire protocol uses an Int16 for binding input parameters, which is also a limit on the backend. There is a trick I learned from @HarlemCavalier in promscale using the unnest
function to pass an array containing many values, with the array counting as a single parameter. The effective change in SQL looks like this:
-- before
INSERT INTO users
(id, name)
VALUES ($1,$2),($3,$4);
-- after
INSERT INTO users
(id, name)
(select * from unnest($1::int[], $2::int[]));
Example in golang
We are going to write up an example that first shows a 100x speed improvement, and second shows our unnest function exceeding our parameter limit.
For starters we need a running Postgres container to work with, in a new terminal window:
docker run --name bulk-postgres -e POSTGRES_PASSWORD=bulk postgres
Next, we switch into go and need a setup function to create our tables:
func setup() *pgx.Conn {
conn, err := pgx.Connect(context.Background(), dbURL)
if err != nil {
panic(err)
}
query := `
CREATE TABLE IF NOT EXISTS users (
id int,
name int
);
TRUNCATE TABLE users;
`
if _, err := conn.Exec(context.Background(), query); err != nil {
panic(err)
}
return conn
}
Our user table has two int fields, so we need a function to generate some test data in our application
// return a 2d array of ints. E.g. [[1,2],[3,4]]
func makeTestUsers(max int) [][]int {
a := make([][]int, max+1)
for i := range a {
a[i] = []int{i, i + 1}
}
return a
}
Regular Insert
// converts a 2d array to query numbers and values
// input: [[1,2],[3,4]]
// output: ($1,$2),($3,$4) and [1,2,3,4]
func valuesToRows(values [][]int) (string, []interface{}) {
rows := []interface{}{}
query := ""
for i, s := range values {
rows = append(rows, s[0], s[1])
numFields := 2
n := i * numFields
query += `(`
for j := 0; j < numFields; j++ {
query += `$` + strconv.Itoa(n+j+1) + `,`
}
query = query[:len(query)-1] + `),`
}
query = query[:len(query)-1]
return query, rows
}
And now our function to do the inserts
func RegularInsert(conn *pgx.Conn, values [][]int) {
start := time.Now()
defer func() { fmt.Println("regularInsert", time.Since(start)) }()
query := `
INSERT INTO users
(id, name)
VALUES %s;`
queryParams, params := valuesToRows(values)
query = fmt.Sprintf(query, queryParams)
if _, err := conn.Exec(context.Background(), query, params...); err != nil {
fmt.Println(err)
}
}
Unnest Insert
The unnest code is simpler because there are just two int arrays:
func UnnestInsert(conn *pgx.Conn, values [][]int) {
start := time.Now()
defer func() { fmt.Println("unnestInsert", time.Since(start)) }()
ids, names := []int{}, []int{}
for _, v := range values {
ids = append(ids, v[0])
names = append(names, v[1])
}
query := `
INSERT INTO users
(id, name)
(
select * from unnest($1::int[], $2::int[])
)`
if _, err := conn.Exec(context.Background(), query, ids, names); err != nil {
fmt.Println(err)
}
}
Results
At 25k rows (50k parameters at 2 parameters per row) our unnest insert is 120x faster. Beyond 25k rows, our regular insert hits its 65535 parameter constraint but our unnest function can still insert
rows (in 1000s) | regular (ms) | unnest (ms) | times faster |
---|---|---|---|
5 | 97.756644 | 6.165317 | 15 |
15 | 1099.48859 | 15.35103 | 71 |
25 | 2988.927136 | 24.724306 | 120 |
35 | 36.393635 | ||
70 | 111.888045 | ||
150 | 160.799487 | ||
500 | 689.425454 | ||
5000 | 6748.465272 |
Full code if you want to give it a try:
package main
import (
"context"
"fmt"
"strconv"
"time"
"github.com/jackc/pgx/v4"
)
// docker run --name bulk-postgres -e POSTGRES_PASSWORD=bulk postgres
const dbURL = "postgres://postgres:bulk@0.0.0.0:5432/postgres?sslmode=disable"
func main() {
conn := setup()
defer conn.Close(context.Background())
values := makeTestUsers(25_000)
RegularInsert(conn, values)
UnnestInsert(conn, values)
}
func setup() *pgx.Conn {
conn, err := pgx.Connect(context.Background(), dbURL)
if err != nil {
panic(err)
}
query := `
CREATE TABLE IF NOT EXISTS users (
id int,
name int
);
TRUNCATE TABLE users;
`
if _, err := conn.Exec(context.Background(), query); err != nil {
panic(err)
}
return conn
}
// return a 2d array of ints. E.g. [[1,2],[3,4]]
func makeTestUsers(max int) [][]int {
a := make([][]int, max+1)
for i := range a {
a[i] = []int{i, i + 1}
}
return a
}
func UnnestInsert(conn *pgx.Conn, values [][]int) {
start := time.Now()
defer func() { fmt.Println("unnestInsert", time.Since(start)) }()
ids, names := []int{}, []int{}
for _, v := range values {
ids = append(ids, v[0])
names = append(names, v[1])
}
query := `
INSERT INTO users
(id, name)
(
select * from unnest($1::int[], $2::int[])
)`
if _, err := conn.Exec(context.Background(), query, ids, names); err != nil {
fmt.Println(err)
}
}
func RegularInsert(conn *pgx.Conn, values [][]int) {
start := time.Now()
defer func() { fmt.Println("regularInsert", time.Since(start)) }()
query := `
INSERT INTO users
(id, name)
VALUES %s;`
queryParams, params := valuesToRows(values)
query = fmt.Sprintf(query, queryParams)
if _, err := conn.Exec(context.Background(), query, params...); err != nil {
fmt.Println(err)
}
}
// converts a 2d array to query numbers and values
// input: [[1,2],[3,4]]
// output: ($1,$2),($3,$4) and [1,2,3,4]
func valuesToRows(values [][]int) (string, []interface{}) {
rows := []interface{}{}
query := ""
for i, s := range values {
rows = append(rows, s[0], s[1])
numFields := 2
n := i * numFields
query += `(`
for j := 0; j < numFields; j++ {
query += `$` + strconv.Itoa(n+j+1) + `,`
}
query = query[:len(query)-1] + `),`
}
query = query[:len(query)-1]
return query, rows
}