Passing the Postgres 65535 parameter limit
15 May 2021
If you are doing bulk inserts into Postgres you may have hit a message like this extended protocol limited to 65535 parameters
, You may be tempted to rewrite your query into chunks and loop over those chunks, but there is a better way! We will use a Golang example that passes the 65535 constraints.
A note, this article previously had an incorrect claim about speed improvements - which were due to one implementation using string concatenation (slow) and another method not doing that
The 65535 error refers to the fact that Postgres wire protocol uses an Int16 for binding input parameters, which is also a limit on the backend. There is a trick I learned from @HarlemCavalier in promscale using the unnest
function to pass an array containing many values, with the array counting as a single parameter. The effective change in SQL looks like this:
-- before
INSERT INTO users
(id, name)
VALUES ($1,$2),($3,$4);
-- after
INSERT INTO users
(id, name)
(select * from unnest($1::int[], $2::int[]));
Example in golang
For starters we need a running Postgres container to work with, in a new terminal window:
docker run --name bulk-postgres -e POSTGRES_PASSWORD=bulk postgres
Next, we switch into go and need a setup function to create our tables:
func setup() *pgx.Conn {
conn, err := pgx.Connect(context.Background(), dbURL)
if err != nil {
panic(err)
}
query := `
CREATE TABLE IF NOT EXISTS users (
id int,
name int
);
TRUNCATE TABLE users;
`
if _, err := conn.Exec(context.Background(), query); err != nil {
panic(err)
}
return conn
}
Our user table has two int fields, so we need a function to generate some test data in our application
// return a 2d array of ints. E.g. [[1,2],[3,4]]
func makeTestUsers(max int) [][]int {
a := make([][]int, max+1)
for i := range a {
a[i] = []int{i, i + 1}
}
return a
}
Regular Insert
// converts a 2d array to query numbers and values
// input: [[1,2],[3,4]]
// output: ($1,$2),($3,$4) and [1,2,3,4]
func valuesToRows(values [][]int) (string, []interface{}) {
rows := []interface{}{}
query := ""
for i, s := range values {
rows = append(rows, s[0], s[1])
numFields := 2
n := i * numFields
query += `(`
for j := 0; j < numFields; j++ {
query += `$` + strconv.Itoa(n+j+1) + `,`
}
query = query[:len(query)-1] + `),`
}
query = query[:len(query)-1]
return query, rows
}
And now our function to do the inserts
func RegularInsert(conn *pgx.Conn, values [][]int) {
start := time.Now()
defer func() { fmt.Println("regularInsert", time.Since(start)) }()
query := `
INSERT INTO users
(id, name)
VALUES %s;`
queryParams, params := valuesToRows(values)
query = fmt.Sprintf(query, queryParams)
if _, err := conn.Exec(context.Background(), query, params...); err != nil {
fmt.Println(err)
}
}
Unnest Insert
The unnest code is simpler because there are just two int arrays:
func UnnestInsert(conn *pgx.Conn, values [][]int) {
start := time.Now()
defer func() { fmt.Println("unnestInsert", time.Since(start)) }()
ids, names := []int{}, []int{}
for _, v := range values {
ids = append(ids, v[0])
names = append(names, v[1])
}
query := `
INSERT INTO users
(id, name)
(
select * from unnest($1::int[], $2::int[])
)`
if _, err := conn.Exec(context.Background(), query, ids, names); err != nil {
fmt.Println(err)
}
}
Results
Beyond 25k rows, our regular insert hits its 65535 parameter constraint but our unnest function can still insert
Full code if you want to give it a try:
package main
import (
"context"
"fmt"
"strconv"
"time"
"github.com/jackc/pgx/v4"
)
// docker run --name bulk-postgres -e POSTGRES_PASSWORD=bulk postgres
const dbURL = "postgres://postgres:bulk@0.0.0.0:5432/postgres?sslmode=disable"
func main() {
conn := setup()
defer conn.Close(context.Background())
values := makeTestUsers(25_000)
RegularInsert(conn, values)
UnnestInsert(conn, values)
}
func setup() *pgx.Conn {
conn, err := pgx.Connect(context.Background(), dbURL)
if err != nil {
panic(err)
}
query := `
CREATE TABLE IF NOT EXISTS users (
id int,
name int
);
TRUNCATE TABLE users;
`
if _, err := conn.Exec(context.Background(), query); err != nil {
panic(err)
}
return conn
}
// return a 2d array of ints. E.g. [[1,2],[3,4]]
func makeTestUsers(max int) [][]int {
a := make([][]int, max+1)
for i := range a {
a[i] = []int{i, i + 1}
}
return a
}
func UnnestInsert(conn *pgx.Conn, values [][]int) {
start := time.Now()
defer func() { fmt.Println("unnestInsert", time.Since(start)) }()
ids, names := []int{}, []int{}
for _, v := range values {
ids = append(ids, v[0])
names = append(names, v[1])
}
query := `
INSERT INTO users
(id, name)
(
select * from unnest($1::int[], $2::int[])
)`
if _, err := conn.Exec(context.Background(), query, ids, names); err != nil {
fmt.Println(err)
}
}
func RegularInsert(conn *pgx.Conn, values [][]int) {
start := time.Now()
defer func() { fmt.Println("regularInsert", time.Since(start)) }()
query := `
INSERT INTO users
(id, name)
VALUES %s;`
queryParams, params := valuesToRows(values)
query = fmt.Sprintf(query, queryParams)
if _, err := conn.Exec(context.Background(), query, params...); err != nil {
fmt.Println(err)
}
}
// converts a 2d array to query numbers and values
// input: [[1,2],[3,4]]
// output: ($1,$2),($3,$4) and [1,2,3,4]
func valuesToRows(values [][]int) (string, []interface{}) {
rows := []interface{}{}
query := ""
for i, s := range values {
rows = append(rows, s[0], s[1])
numFields := 2
n := i * numFields
query += `(`
for j := 0; j < numFields; j++ {
query += `$` + strconv.Itoa(n+j+1) + `,`
}
query = query[:len(query)-1] + `),`
}
query = query[:len(query)-1]
return query, rows
}