cloudquery · kodiakhq · Jun 19, 2026 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026
@@ -164,3 +164,130 @@ func TestInsertDuplicateSameBatch(t *testing.T) {
 	require.NoError(t, rows.Err())
 	require.Equal(t, int64(1), count)
 }
+
+func TestListPrimaryKey(t *testing.T) {
+	ctx := context.Background()
+	p := plugin.NewPlugin("duckdb", "development", New)
+	tempDB := path.Join(t.TempDir(), "test_list_primary_key.duckdb") + "?threads=1"
+
+	spec := Spec{
+		ConnectionString: tempDB,
+		Debug:            true,
+	}
+	specBytes, err := json.Marshal(spec)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	testingLog := &testingLog{TB: t, Buf: bytes.Buffer{}}
+	testWriter := zerolog.TestWriter{T: testingLog}
+	p.SetLogger(zerolog.New(testWriter).Level(zerolog.DebugLevel))
+
+	if err := p.Init(ctx, specBytes, plugin.NewClientOptions{}); err != nil {
+		t.Fatal(err)
+	}
+
+	t.Cleanup(func() {
+		if err := p.Close(ctx); err != nil {
+			t.Logf("failed to close plugin: %v", err)
+		}
+	})
+
+	table := &schema.Table{
+		Name: "test_list_primary_key",
+		Columns: []schema.Column{
+			{Name: "name", Type: arrow.BinaryTypes.String, PrimaryKey: true},
+			{Name: "locations", Type: arrow.ListOf(arrow.BinaryTypes.String), PrimaryKey: true},
+		},
+	}
+	res := make(chan message.WriteMessage, 10)
+	var writeErr error
+	wg := sync.WaitGroup{}
+	wg.Go(func() {
+		writeErr = p.Write(ctx, res)
+	})
+
+	res <- &message.WriteMigrateTable{
+		Table: table,
+	}
+
+	bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema())
+	appendRow := func(name, location string) {
+		bldr.Field(0).(*array.StringBuilder).Append(name)
+		lb := bldr.Field(1).(*array.ListBuilder)
+		lb.Append(true)
+		lb.ValueBuilder().(*array.StringBuilder).Append(location)
+	}
+	appendRow("standard", "eastus")
+	appendRow("standard", "westus")
+	appendRow("standard", "eastus")
+
+	record := bldr.NewRecordBatch()
+
+	res <- &message.WriteInsert{
+		Record: record,
+	}
+	close(res)
+
+	wg.Wait()
+	require.NoError(t, writeErr)
+
+	require.NotContains(t, testingLog.Buf.String(), "Invalid type for index key")
+	connector, err := duckdb.NewConnector(tempDB, nil)
+	require.NoError(t, err)
+	defer connector.Close()
+	db := sql.OpenDB(connector)
+	defer db.Close()
+
+	var colType string
+	require.NoError(t, db.QueryRowContext(ctx,
+		"SELECT data_type FROM information_schema.columns WHERE table_name = 'test_list_primary_key' AND column_name = 'locations'").
+		Scan(&colType))
+	require.Equal(t, "VARCHAR", colType)
+
+	rows, err := db.QueryContext(ctx, "SELECT count(*) FROM test_list_primary_key")
+	require.NoError(t, err)
+	defer rows.Close()
+	var count int64
+	for rows.Next() {
+		require.NoError(t, rows.Scan(&count))
+	}
+	require.NoError(t, rows.Err())
+	require.Equal(t, int64(2), count)
+}
+
+func TestKeyListColumnDetection(t *testing.T) {
+	listType := arrow.ListOf(arrow.BinaryTypes.String)
+	mapType := arrow.MapOf(arrow.BinaryTypes.String, arrow.BinaryTypes.String)
+
+	require.True(t, duckDBListColumn(schema.Column{Type: listType}))
+	require.False(t, duckDBListColumn(schema.Column{Type: mapType}))
+
+	require.True(t, keyListColumn(schema.Column{Type: listType, PrimaryKey: true}))
+	require.False(t, keyListColumn(schema.Column{Type: mapType, PrimaryKey: true}))
+	require.False(t, keyListColumn(schema.Column{Type: listType}))
+
+	require.Equal(t, "varchar", duckDBType(schema.Column{Type: listType, PrimaryKey: true}))
+	require.Equal(t, "json", duckDBType(schema.Column{Type: mapType, PrimaryKey: true}))
+	require.Equal(t, "varchar[]", duckDBType(schema.Column{Type: listType}))
+}
+
+func TestListColumnStringRoundTrip(t *testing.T) {
+	listType := arrow.ListOf(arrow.BinaryTypes.String)
+	lb := array.NewListBuilder(memory.DefaultAllocator, arrow.BinaryTypes.String)
+	defer lb.Release()
+	lb.Append(true)
+	lb.ValueBuilder().(*array.StringBuilder).Append("eastus")
+	lb.ValueBuilder().(*array.StringBuilder).Append("westus")
+	listArr := lb.NewArray()
+	defer listArr.Release()
+
+	strArr, ok := transformToStringArray(listArr).(*array.String)
+	require.True(t, ok)
+
+	back, ok := reverseTransformFromString(listType, strArr).(*array.List)
+	require.True(t, ok)
+	vals := back.ListValues().(*array.String)
+	require.Equal(t, "eastus", vals.Value(0))
+	require.Equal(t, "westus", vals.Value(1))
+}
@@ -34,9 +34,12 @@ func (*Client) normalizeColumns(tables schema.Tables) schema.Tables {
 		normalizedTable := *table
 		normalizedTable.Columns = make(schema.ColumnList, len(table.Columns))
 		for i := range table.Columns {
+			normalizedColumn := table.Columns[i]
+			if keyListColumn(normalizedColumn) {
+				normalizedColumn.Type = duckDBToArrow("varchar")
+			}
 			// In DuckDB, a PK column must be NOT NULL, so we need to make sure that the schema we're comparing to has the same
 			// constraint.
-			normalizedColumn := table.Columns[i]
 			if normalizedColumn.PrimaryKey {
 				normalizedColumn.NotNull = true
 			}
@@ -185,7 +188,7 @@ func (c *Client) createTableIfNotExist(ctx context.Context, tableName string, ta
 
 	var pks []string
 	for i, col := range table.Columns {
-		sqlType := arrowToDuckDB(col.Type)
+		sqlType := duckDBType(col)
 		fieldDef := sanitizeID(col.Name) + ` ` + sqlType
 		if col.PrimaryKey && !skipConstraints {
 			pks = append(pks, col.Name)

@@ -137,6 +137,10 @@ func reverseTransformArray(dt arrow.DataType, arr arrow.Array) arrow.Array {
 			0, // we use 0 as offset for struct arrays, as the child arrays would already be sliced properly
 		))
 	case arrow.ListLikeType: // also handles maps
+		// Key list columns are stored as varchar, so they come back as strings.
+		if sarr, ok := arr.(*array.String); ok {
+			return reverseTransformFromString(dt, sarr)
+		}
 		if mapdt, ok := dt.(*arrow.MapType); ok {
 			if sarr, ok := arr.(*array.Binary); ok {
 				return reverseTransformMap(mapdt, sarr)

@@ -10,7 +10,12 @@ import (
 func transformRecord(sc *arrow.Schema, rec arrow.RecordBatch) arrow.RecordBatch {
 	cols := make([]arrow.Array, rec.NumCols())
 	for i := 0; i < int(rec.NumCols()); i++ {
-		cols[i] = transformArray(rec.Column(i))
+		col := rec.Column(i)
+		if _, isList := col.DataType().(arrow.ListLikeType); isList && sc.Field(i).Type.ID() == arrow.STRING {
+			cols[i] = transformToStringArray(col)
+			continue
+		}
+		cols[i] = transformArray(col)
 	}
 	return array.NewRecordBatch(sc, cols, rec.NumRows())
 }

@@ -5,12 +5,20 @@ import (
 	"strings"
 
 	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/cloudquery/plugin-sdk/v4/schema"
 	"github.com/cloudquery/plugin-sdk/v4/types"
 )
 
-func transformSchemaForWriting(sc *arrow.Schema) *arrow.Schema {
+func transformSchemaForWriting(table *schema.Table) *arrow.Schema {
+	sc := table.ToArrowSchema()
 	md := arrow.MetadataFrom(sc.Metadata().ToMap())
-	return arrow.NewSchema(transformFieldsForWriting(sc.Fields()), &md)
+	fields := transformFieldsForWriting(sc.Fields())
+	for i := range table.Columns {
+		if keyListColumn(table.Columns[i]) {
+			fields[i].Type = arrow.BinaryTypes.String
+		}
+	}
+	return arrow.NewSchema(fields, &md)
 }
 
 func transformFieldsForWriting(fields []arrow.Field) []arrow.Field {

@@ -32,20 +32,31 @@ func nonPkIndices(sc *schema.Table) []int {
 // but this is unavoidable until support is added to duckdb itself.
 // See https://github.com/duckdb/duckdb/blob/c5d9afb97bbf0be12216f3b89ae3131afbbc3156/src/storage/table/list_column_data.cpp#L243-L251
 func containsList(sc *schema.Table) bool {
-	return slices.ContainsFunc(sc.Columns, func(c schema.Column) bool { return dtContainsList(c.Type) })
+	return slices.ContainsFunc(sc.Columns, func(c schema.Column) bool {
+		return duckDBListColumn(c) && !keyListColumn(c)
+	})
 }
 
-func dtContainsList(dt arrow.DataType) bool {
-	switch dt := dt.(type) {
-	case *arrow.StructType:
-		return slices.ContainsFunc(dt.Fields(), func(f arrow.Field) bool { return dtContainsList(f.Type) })
-	case *arrow.MapType:
-		return dtContainsList(dt.KeyType()) || dtContainsList(dt.ItemType())
-	case arrow.ListLikeType:
-		return true
-	default:
+func duckDBListColumn(c schema.Column) bool {
+	// Maps also implement arrow.ListLikeType but map to DuckDB json, not a LIST.
+	if _, isMap := c.Type.(*arrow.MapType); isMap {
 		return false
 	}
+	_, ok := c.Type.(arrow.ListLikeType)
+	return ok
+}
+
+// keyListColumn reports whether a column is part of a key (primary key or unique
+// constraint) and maps to a duckdb LIST type, which can't be indexed.
+func keyListColumn(c schema.Column) bool {
+	return (c.PrimaryKey || c.Unique) && duckDBListColumn(c)
+}
+
+func duckDBType(c schema.Column) string {
+	if keyListColumn(c) {
+		return "varchar"
+	}
+	return arrowToDuckDB(c.Type)
 }
 
 func (c *Client) upsert(ctx context.Context, tmpTableName string, table *schema.Table) error {
@@ -175,7 +186,7 @@ func (c *Client) WriteTableBatch(ctx context.Context, name string, msgs message.
 }
 
 func writeTMPFile(table *schema.Table, msgs []*message.WriteInsert) (fileName string, err error) {
-	sc := transformSchemaForWriting(table.ToArrowSchema())
+	sc := transformSchemaForWriting(table)
 
 	// create temp file
 	f, err := os.CreateTemp("", fmt.Sprintf("%s-*.parquet", table.Name))