apache · Fokko · Jul 9, 2025 · Jun 16, 2025 · Jun 18, 2025 · Jun 30, 2025
diff --git a/.../src/main/java/org/apache/iceberg/arrow/vectorized/GenericArrowVectorAccessorFactory.java b/.../src/main/java/org/apache/iceberg/arrow/vectorized/GenericArrowVectorAccessorFactory.java
@@ -156,7 +156,8 @@ public ArrowVectorAccessor<DecimalT, Utf8StringT, ArrayT, ChildVectorT> getVecto
       switch (primitive.getPrimitiveTypeName()) {
         case FIXED_LEN_BYTE_ARRAY:
         case BINARY:
-          return new DictionaryBinaryAccessor<>((IntVector) vector, dictionary);
+          return new DictionaryBinaryAccessor<>(
+              (IntVector) vector, dictionary, stringFactorySupplier.get());
         case FLOAT:
           return new DictionaryFloatAccessor<>((IntVector) vector, dictionary);
         case INT64:
@@ -452,17 +453,27 @@ private static class DictionaryBinaryAccessor<
       extends ArrowVectorAccessor<DecimalT, Utf8StringT, ArrayT, ChildVectorT> {
     private final IntVector offsetVector;
     private final Dictionary dictionary;
+    private final StringFactory<Utf8StringT> stringFactory;
 
-    DictionaryBinaryAccessor(IntVector vector, Dictionary dictionary) {
+    DictionaryBinaryAccessor(
+        IntVector vector, Dictionary dictionary, StringFactory<Utf8StringT> stringFactory) {
       super(vector);
       this.offsetVector = vector;
       this.dictionary = dictionary;
+      this.stringFactory = stringFactory;
     }
 
     @Override
     public final byte[] getBinary(int rowId) {
       return dictionary.decodeToBinary(offsetVector.get(rowId)).getBytes();
     }
+
+    @Override
+    public Utf8StringT getUTF8String(int rowId) {
+      return null == stringFactory
+          ? super.getUTF8String(rowId)
+          : stringFactory.ofRow(offsetVector, dictionary, rowId);
+    }
   }
 
   private static class DictionaryTimestampInt96Accessor<
@@ -815,6 +826,13 @@ default Utf8StringT ofRow(FixedSizeBinaryVector vector, int rowId) {
               getGenericClass().getSimpleName()));
     }
 
+    /** Create a UTF8 String from the row value in the Dictionary. */
+    default Utf8StringT ofRow(IntVector offsetVector, Dictionary dictionary, int rowId) {
+      throw new UnsupportedOperationException(
+          String.format(
+              "Creating %s from a Dictionary is not supported", getGenericClass().getSimpleName()));
+    }
+
     /** Create a UTF8 String from the byte array. */
     Utf8StringT ofBytes(byte[] bytes);
 

diff --git a/...rk/src/main/java/org/apache/iceberg/spark/data/vectorized/ArrowVectorAccessorFactory.java b/...rk/src/main/java/org/apache/iceberg/spark/data/vectorized/ArrowVectorAccessorFactory.java
@@ -22,11 +22,13 @@
 import java.nio.ByteBuffer;
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.ValueVector;
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.iceberg.arrow.vectorized.GenericArrowVectorAccessorFactory;
 import org.apache.iceberg.util.UUIDUtil;
+import org.apache.parquet.column.Dictionary;
 import org.apache.spark.sql.types.Decimal;
 import org.apache.spark.sql.vectorized.ArrowColumnVector;
 import org.apache.spark.sql.vectorized.ColumnarArray;
@@ -81,6 +83,12 @@ public UTF8String ofRow(FixedSizeBinaryVector vector, int rowId) {
       return UTF8String.fromString(UUIDUtil.convert(vector.get(rowId)).toString());
     }
 
+    @Override
+    public UTF8String ofRow(IntVector offsetVector, Dictionary dictionary, int rowId) {
+      byte[] bytes = dictionary.decodeToBinary(offsetVector.get(rowId)).getBytes();
+      return UTF8String.fromString(UUIDUtil.convert(bytes).toString());
+    }
+
     @Override
     public UTF8String ofBytes(byte[] bytes) {
       return UTF8String.fromBytes(bytes);

diff --git a/...est/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/...est/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java
@@ -400,4 +400,19 @@ public void testUnsupportedReadsForParquetV2() throws Exception {
         .hasMessageStartingWith("Cannot support vectorized reads for column")
         .hasMessageEndingWith("Disable vectorized reads to read this table/file");
   }
+
+  @Test
+  public void testUuidReads() throws Exception {
+    // Just one row to maintain dictionary encoding
+    int numRows = 1;
+    Schema schema = new Schema(optional(100, "uuid", Types.UUIDType.get()));
+
+    File dataFile = File.createTempFile("junit", null, temp.toFile());
+    assertThat(dataFile.delete()).as("Delete should succeed").isTrue();
+    Iterable<Record> data = generateData(schema, numRows, 0L, 0, IDENTITY);
+    try (FileAppender<Record> writer = getParquetV2Writer(schema, dataFile)) {
+      writer.addAll(data);
+    }
+    assertRecordsMatch(schema, numRows, data, dataFile, false, BATCH_SIZE);
+  }
 }