From c706c5bb2ad92f72ba9dd290bfddca71722185b5 Mon Sep 17 00:00:00 2001 From: John-W-Lewis Date: Mon, 11 May 2026 10:03:14 +0100 Subject: [PATCH 1/2] GH-1142: Fix buffer ownership in VectorSchemaRoot addVector/removeVector Use TransferPair to properly transfer buffer ownership when creating a new VectorSchemaRoot, preventing use-after-free when the original root is closed. Co-authored-by: Cursor --- .../apache/arrow/vector/VectorSchemaRoot.java | 37 +++++-- .../arrow/vector/TestVectorSchemaRoot.java | 101 ++++++++++++++++-- 2 files changed, 119 insertions(+), 19 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java b/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java index 4c1fbf761..ee7f1b796 100644 --- a/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java +++ b/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java @@ -193,6 +193,10 @@ public FieldVector getVector(int index) { /** * Add vector to the record batch, producing a new VectorSchemaRoot. * + *

Buffer ownership is transferred to the returned root via {@link TransferPair}. After this + * operation, the vectors in this root and the added vector are left in a transferred (empty) + * state. This root can be reused by calling {@link #allocateNew()}. + * * @param index field index * @param vector vector to be added. * @return out VectorSchemaRoot with vector added @@ -201,16 +205,21 @@ public VectorSchemaRoot addVector(int index, FieldVector vector) { Preconditions.checkNotNull(vector); Preconditions.checkArgument(index >= 0 && index <= fieldVectors.size()); List newVectors = new ArrayList<>(); - if (index == fieldVectors.size()) { - newVectors.addAll(fieldVectors); - newVectors.add(vector); - } else { - for (int i = 0; i < fieldVectors.size(); i++) { - if (i == index) { - newVectors.add(vector); - } - newVectors.add(fieldVectors.get(i)); + for (int i = 0; i < fieldVectors.size(); i++) { + if (i == index) { + TransferPair addPair = vector.getTransferPair(vector.getAllocator()); + addPair.transfer(); + newVectors.add((FieldVector) addPair.getTo()); } + FieldVector v = fieldVectors.get(i); + TransferPair transferPair = v.getTransferPair(v.getAllocator()); + transferPair.transfer(); + newVectors.add((FieldVector) transferPair.getTo()); + } + if (index == fieldVectors.size()) { + TransferPair addPair = vector.getTransferPair(vector.getAllocator()); + addPair.transfer(); + newVectors.add((FieldVector) addPair.getTo()); } return new VectorSchemaRoot(newVectors); } @@ -218,6 +227,11 @@ public VectorSchemaRoot addVector(int index, FieldVector vector) { /** * Remove vector from the record batch, producing a new VectorSchemaRoot. * + *

Buffer ownership is transferred to the returned root via {@link TransferPair}. After this + * operation, the vectors in this root are left in a transferred (empty) state. The removed + * vector's data is not transferred and is released. This root can be reused by calling {@link + * #allocateNew()}. + * * @param index field index * @return out VectorSchemaRoot with vector removed */ @@ -226,7 +240,10 @@ public VectorSchemaRoot removeVector(int index) { List newVectors = new ArrayList<>(); for (int i = 0; i < fieldVectors.size(); i++) { if (i != index) { - newVectors.add(fieldVectors.get(i)); + FieldVector v = fieldVectors.get(i); + TransferPair transferPair = v.getTransferPair(v.getAllocator()); + transferPair.transfer(); + newVectors.add((FieldVector) transferPair.getTo()); } } return new VectorSchemaRoot(newVectors); diff --git a/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java b/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java index bd3113f8b..493f29d05 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java @@ -157,14 +157,14 @@ private VectorSchemaRoot createBatch() { public void testAddVector() { try (final IntVector intVector1 = new IntVector("intVector1", allocator); final IntVector intVector2 = new IntVector("intVector2", allocator); - final IntVector intVector3 = new IntVector("intVector3", allocator); ) { + final IntVector intVector3 = new IntVector("intVector3", allocator)) { VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector1, intVector2)); assertEquals(2, original.getFieldVectors().size()); VectorSchemaRoot newRecordBatch = original.addVector(1, intVector3); assertEquals(3, newRecordBatch.getFieldVectors().size()); - assertEquals(intVector3, newRecordBatch.getFieldVectors().get(1)); + assertEquals(intVector3.getField(), newRecordBatch.getFieldVectors().get(1).getField()); original.close(); newRecordBatch.close(); @@ -175,16 +175,16 @@ public void testAddVector() { public void testAddVectorAtEnd() { try (final IntVector intVector1 = new IntVector("intVector1", allocator); final IntVector intVector2 = new IntVector("intVector2", allocator); - final IntVector intVector3 = new IntVector("intVector3", allocator); ) { + final IntVector intVector3 = new IntVector("intVector3", allocator)) { VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector1, intVector2)); assertEquals(2, original.getFieldVectors().size()); VectorSchemaRoot newRecordBatch = original.addVector(2, intVector3); assertEquals(3, newRecordBatch.getFieldVectors().size()); - assertEquals(intVector1, newRecordBatch.getFieldVectors().get(0)); - assertEquals(intVector2, newRecordBatch.getFieldVectors().get(1)); - assertEquals(intVector3, newRecordBatch.getFieldVectors().get(2)); + assertEquals(intVector1.getField(), newRecordBatch.getFieldVectors().get(0).getField()); + assertEquals(intVector2.getField(), newRecordBatch.getFieldVectors().get(1).getField()); + assertEquals(intVector3.getField(), newRecordBatch.getFieldVectors().get(2).getField()); original.close(); newRecordBatch.close(); @@ -195,7 +195,7 @@ public void testAddVectorAtEnd() { public void testRemoveVector() { try (final IntVector intVector1 = new IntVector("intVector1", allocator); final IntVector intVector2 = new IntVector("intVector2", allocator); - final IntVector intVector3 = new IntVector("intVector3", allocator); ) { + final IntVector intVector3 = new IntVector("intVector3", allocator)) { VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector1, intVector2, intVector3)); @@ -203,8 +203,8 @@ public void testRemoveVector() { VectorSchemaRoot newRecordBatch = original.removeVector(0); assertEquals(2, newRecordBatch.getFieldVectors().size()); - assertEquals(intVector2, newRecordBatch.getFieldVectors().get(0)); - assertEquals(intVector3, newRecordBatch.getFieldVectors().get(1)); + assertEquals(intVector2.getField(), newRecordBatch.getFieldVectors().get(0).getField()); + assertEquals(intVector3.getField(), newRecordBatch.getFieldVectors().get(1).getField()); original.close(); newRecordBatch.close(); @@ -344,4 +344,87 @@ public void testSchemaSync() { assertFalse(schemaRoot.syncSchema()); } } + + @Test + public void testAddVectorOwnership() { + try (final IntVector intVector1 = new IntVector("intVector1", allocator); + final IntVector intVector2 = new IntVector("intVector2", allocator); + final IntVector intVector3 = new IntVector("intVector3", allocator)) { + + intVector1.allocateNew(); + intVector2.allocateNew(); + intVector3.allocateNew(); + for (int i = 0; i < 5; i++) { + intVector1.setSafe(i, i * 10); + intVector2.setSafe(i, i * 20); + intVector3.setSafe(i, i * 30); + } + intVector1.setValueCount(5); + intVector2.setValueCount(5); + intVector3.setValueCount(5); + + VectorSchemaRoot original = + new VectorSchemaRoot(Arrays.asList(intVector1, intVector2)); + original.setRowCount(5); + + VectorSchemaRoot result = original.addVector(1, intVector3); + + // Close the original root and the added vector -- the result should still have valid data + original.close(); + intVector3.close(); + + assertEquals(3, result.getFieldVectors().size()); + assertEquals(5, result.getRowCount()); + IntVector resultVec0 = (IntVector) result.getVector(0); + IntVector resultVec1 = (IntVector) result.getVector(1); + IntVector resultVec2 = (IntVector) result.getVector(2); + for (int i = 0; i < 5; i++) { + assertEquals(i * 10, resultVec0.get(i)); + assertEquals(i * 30, resultVec1.get(i)); + assertEquals(i * 20, resultVec2.get(i)); + } + + result.close(); + } + } + + @Test + public void testRemoveVectorOwnership() { + try (final IntVector intVector1 = new IntVector("intVector1", allocator); + final IntVector intVector2 = new IntVector("intVector2", allocator); + final IntVector intVector3 = new IntVector("intVector3", allocator)) { + + intVector1.allocateNew(); + intVector2.allocateNew(); + intVector3.allocateNew(); + for (int i = 0; i < 5; i++) { + intVector1.setSafe(i, i * 10); + intVector2.setSafe(i, i * 20); + intVector3.setSafe(i, i * 30); + } + intVector1.setValueCount(5); + intVector2.setValueCount(5); + intVector3.setValueCount(5); + + VectorSchemaRoot original = + new VectorSchemaRoot(Arrays.asList(intVector1, intVector2, intVector3)); + original.setRowCount(5); + + VectorSchemaRoot result = original.removeVector(1); + + // Close the original root -- the result should still have valid data + original.close(); + + assertEquals(2, result.getFieldVectors().size()); + assertEquals(5, result.getRowCount()); + IntVector resultVec0 = (IntVector) result.getVector(0); + IntVector resultVec1 = (IntVector) result.getVector(1); + for (int i = 0; i < 5; i++) { + assertEquals(i * 10, resultVec0.get(i)); + assertEquals(i * 30, resultVec1.get(i)); + } + + result.close(); + } + } } From f0128b3b2b9bc4261679fe7aa0d4b8304f0945c1 Mon Sep 17 00:00:00 2001 From: John-W-Lewis Date: Sun, 17 May 2026 18:37:29 +0100 Subject: [PATCH 2/2] Address review feedback: extract helper, fix formatting, clear removed vector - Extract transferVector() helper to reduce duplication in addVector/removeVector - Fix Spotless formatting violation in TestVectorSchemaRoot.java - Clear the removed vector's buffers in removeVector() so the original root is consistently empty after the operation, matching the documented behaviour Co-authored-by: Cursor --- .../apache/arrow/vector/VectorSchemaRoot.java | 25 ++++++++----------- .../arrow/vector/TestVectorSchemaRoot.java | 3 +-- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java b/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java index ee7f1b796..0a91e2ee2 100644 --- a/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java +++ b/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java @@ -207,19 +207,12 @@ public VectorSchemaRoot addVector(int index, FieldVector vector) { List newVectors = new ArrayList<>(); for (int i = 0; i < fieldVectors.size(); i++) { if (i == index) { - TransferPair addPair = vector.getTransferPair(vector.getAllocator()); - addPair.transfer(); - newVectors.add((FieldVector) addPair.getTo()); + newVectors.add(transferVector(vector)); } - FieldVector v = fieldVectors.get(i); - TransferPair transferPair = v.getTransferPair(v.getAllocator()); - transferPair.transfer(); - newVectors.add((FieldVector) transferPair.getTo()); + newVectors.add(transferVector(fieldVectors.get(i))); } if (index == fieldVectors.size()) { - TransferPair addPair = vector.getTransferPair(vector.getAllocator()); - addPair.transfer(); - newVectors.add((FieldVector) addPair.getTo()); + newVectors.add(transferVector(vector)); } return new VectorSchemaRoot(newVectors); } @@ -240,15 +233,19 @@ public VectorSchemaRoot removeVector(int index) { List newVectors = new ArrayList<>(); for (int i = 0; i < fieldVectors.size(); i++) { if (i != index) { - FieldVector v = fieldVectors.get(i); - TransferPair transferPair = v.getTransferPair(v.getAllocator()); - transferPair.transfer(); - newVectors.add((FieldVector) transferPair.getTo()); + newVectors.add(transferVector(fieldVectors.get(i))); } } + fieldVectors.get(index).clear(); return new VectorSchemaRoot(newVectors); } + private static FieldVector transferVector(FieldVector vector) { + TransferPair transferPair = vector.getTransferPair(vector.getAllocator()); + transferPair.transfer(); + return (FieldVector) transferPair.getTo(); + } + public Schema getSchema() { return schema; } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java b/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java index 493f29d05..93d4ee646 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java @@ -363,8 +363,7 @@ public void testAddVectorOwnership() { intVector2.setValueCount(5); intVector3.setValueCount(5); - VectorSchemaRoot original = - new VectorSchemaRoot(Arrays.asList(intVector1, intVector2)); + VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector1, intVector2)); original.setRowCount(5); VectorSchemaRoot result = original.addVector(1, intVector3);