|  | 
| 33 | 33 | import org.apache.flink.table.data.RowData; | 
| 34 | 34 | import org.apache.flink.table.data.StringData; | 
| 35 | 35 | import org.apache.flink.table.data.TimestampData; | 
|  | 36 | + | 
| 36 | 37 | import org.junit.jupiter.api.BeforeEach; | 
| 37 | 38 | import org.junit.jupiter.api.Test; | 
| 38 | 39 | import org.junit.jupiter.api.io.TempDir; | 
|  | 
| 48 | 49 | 
 | 
| 49 | 50 | import static org.junit.jupiter.api.Assertions.assertArrayEquals; | 
| 50 | 51 | import static org.junit.jupiter.api.Assertions.assertEquals; | 
|  | 52 | +import static org.junit.jupiter.api.Assertions.assertTrue; | 
| 51 | 53 | 
 | 
| 52 | 54 | /** | 
| 53 | 55 |  * Test cases for {@link AppendWriteFunctionWithBufferSort}. | 
| @@ -175,6 +177,117 @@ public void testSortedResult() throws Exception { | 
| 175 | 177 |     assertArrayEquals(expected.toArray(), filteredResult.toArray()); | 
| 176 | 178 |   } | 
| 177 | 179 | 
 | 
|  | 180 | +  @Test | 
|  | 181 | +  public void testMultipleCheckpoints() throws Exception { | 
|  | 182 | +    List<RowData> batch1 = Arrays.asList( | 
|  | 183 | +        createRowData("uuid1", "Charlie", 35, "1970-01-01 00:00:01.123", "p1"), | 
|  | 184 | +        createRowData("uuid2", "Alice", 25, "1970-01-01 00:00:01.124", "p1") | 
|  | 185 | +    ); | 
|  | 186 | + | 
|  | 187 | +    List<RowData> batch2 = Arrays.asList( | 
|  | 188 | +        createRowData("uuid3", "Bob", 30, "1970-01-01 00:00:01.125", "p1"), | 
|  | 189 | +        createRowData("uuid4", "Diana", 28, "1970-01-01 00:00:01.126", "p1") | 
|  | 190 | +    ); | 
|  | 191 | + | 
|  | 192 | +    TestHarness testHarness = TestWriteBase.TestHarness.instance() | 
|  | 193 | +        .preparePipeline(tempFile, conf); | 
|  | 194 | + | 
|  | 195 | +    testHarness.consume(batch1).checkpoint(1); | 
|  | 196 | +    testHarness.consume(batch2).checkpoint(2); | 
|  | 197 | +    testHarness.endInput(); | 
|  | 198 | + | 
|  | 199 | +    List<GenericRecord> actualData = TestData.readAllData(new File(conf.get(FlinkOptions.PATH)), rowType, 1); | 
|  | 200 | +    assertEquals(4, actualData.size()); | 
|  | 201 | +  } | 
|  | 202 | + | 
|  | 203 | +  @Test | 
|  | 204 | +  public void testLargeDatasetWithMultipleFlushes() throws Exception { | 
|  | 205 | +    this.conf.set(FlinkOptions.WRITE_BUFFER_SIZE, 50L); | 
|  | 206 | + | 
|  | 207 | +    List<RowData> inputData = new ArrayList<>(); | 
|  | 208 | +    for (int i = 0; i < 500; i++) { | 
|  | 209 | +      inputData.add(createRowData("uuid" + i, "Name" + (i % 10), i % 100, "1970-01-01 00:00:01.123", "p" + (i % 3))); | 
|  | 210 | +    } | 
|  | 211 | + | 
|  | 212 | +    TestWriteBase.TestHarness.instance() | 
|  | 213 | +        .preparePipeline(tempFile, conf) | 
|  | 214 | +        .consume(inputData) | 
|  | 215 | +        .endInput(); | 
|  | 216 | + | 
|  | 217 | +    List<GenericRecord> actualData = TestData.readAllData(new File(conf.get(FlinkOptions.PATH)), rowType, 3); | 
|  | 218 | +    assertEquals(500, actualData.size()); | 
|  | 219 | +  } | 
|  | 220 | + | 
|  | 221 | +  @Test | 
|  | 222 | +  public void testSortStabilityWithDuplicateKeys() throws Exception { | 
|  | 223 | +    List<RowData> inputData = Arrays.asList( | 
|  | 224 | +        createRowData("uuid1", "Alice", 25, "1970-01-01 00:00:01.123", "p1"), | 
|  | 225 | +        createRowData("uuid2", "Alice", 25, "1970-01-01 00:00:01.124", "p1"), | 
|  | 226 | +        createRowData("uuid3", "Alice", 25, "1970-01-01 00:00:01.125", "p1"), | 
|  | 227 | +        createRowData("uuid4", "Bob", 30, "1970-01-01 00:00:01.126", "p1") | 
|  | 228 | +    ); | 
|  | 229 | + | 
|  | 230 | +    TestWriteBase.TestHarness.instance() | 
|  | 231 | +        .preparePipeline(tempFile, conf) | 
|  | 232 | +        .consume(inputData) | 
|  | 233 | +        .endInput(); | 
|  | 234 | + | 
|  | 235 | +    List<GenericRecord> actualData = TestData.readAllData(new File(conf.get(FlinkOptions.PATH)), rowType, 1); | 
|  | 236 | +    assertEquals(4, actualData.size()); | 
|  | 237 | + | 
|  | 238 | +    List<String> filteredResult = actualData.stream() | 
|  | 239 | +        .map(TestData::filterOutVariablesWithoutHudiMetadata) | 
|  | 240 | +        .collect(Collectors.toList()); | 
|  | 241 | + | 
|  | 242 | +    assertTrue(filteredResult.get(0).contains("Alice")); | 
|  | 243 | +    assertTrue(filteredResult.get(1).contains("Alice")); | 
|  | 244 | +    assertTrue(filteredResult.get(2).contains("Alice")); | 
|  | 245 | +    assertTrue(filteredResult.get(3).contains("Bob")); | 
|  | 246 | +  } | 
|  | 247 | + | 
|  | 248 | +  @Test | 
|  | 249 | +  public void testDifferentPartitions() throws Exception { | 
|  | 250 | +    List<RowData> inputData = Arrays.asList( | 
|  | 251 | +        createRowData("uuid1", "Alice", 25, "1970-01-01 00:00:01.123", "p1"), | 
|  | 252 | +        createRowData("uuid2", "Bob", 30, "1970-01-01 00:00:01.124", "p2"), | 
|  | 253 | +        createRowData("uuid3", "Charlie", 35, "1970-01-01 00:00:01.125", "p3"), | 
|  | 254 | +        createRowData("uuid4", "Diana", 28, "1970-01-01 00:00:01.126", "p1") | 
|  | 255 | +    ); | 
|  | 256 | + | 
|  | 257 | +    TestWriteBase.TestHarness.instance() | 
|  | 258 | +        .preparePipeline(tempFile, conf) | 
|  | 259 | +        .consume(inputData) | 
|  | 260 | +        .endInput(); | 
|  | 261 | + | 
|  | 262 | +    List<GenericRecord> actualData = TestData.readAllData(new File(conf.get(FlinkOptions.PATH)), rowType, 3); | 
|  | 263 | +    assertEquals(4, actualData.size()); | 
|  | 264 | +  } | 
|  | 265 | + | 
|  | 266 | +  @Test | 
|  | 267 | +  public void testConcurrentWriteScenario() throws Exception { | 
|  | 268 | +    this.conf.set(FlinkOptions.WRITE_BUFFER_SIZE, 20L); | 
|  | 269 | + | 
|  | 270 | +    List<RowData> inputData = new ArrayList<>(); | 
|  | 271 | +    for (int i = 0; i < 200; i++) { | 
|  | 272 | +      inputData.add(createRowData("uuid" + i, "Name" + (i % 5), i % 50, "1970-01-01 00:00:01.123", "p1")); | 
|  | 273 | +    } | 
|  | 274 | + | 
|  | 275 | +    TestHarness testHarness = TestWriteBase.TestHarness.instance() | 
|  | 276 | +        .preparePipeline(tempFile, conf); | 
|  | 277 | + | 
|  | 278 | +    for (int i = 0; i < inputData.size(); i += 10) { | 
|  | 279 | +      List<RowData> batch = inputData.subList(i, Math.min(i + 10, inputData.size())); | 
|  | 280 | +      testHarness.consume(batch); | 
|  | 281 | +      if (i % 50 == 0) { | 
|  | 282 | +        testHarness.checkpoint(i / 50 + 1); | 
|  | 283 | +      } | 
|  | 284 | +    } | 
|  | 285 | +    testHarness.endInput(); | 
|  | 286 | + | 
|  | 287 | +    List<GenericRecord> actualData = TestData.readAllData(new File(conf.get(FlinkOptions.PATH)), rowType, 1); | 
|  | 288 | +    assertEquals(200, actualData.size()); | 
|  | 289 | +  } | 
|  | 290 | + | 
| 178 | 291 |   private GenericRowData createRowData(String uuid, String name, int age, String timestamp, String partition) { | 
| 179 | 292 |     return GenericRowData.of(StringData.fromString(uuid), StringData.fromString(name), | 
| 180 | 293 |         age, TimestampData.fromTimestamp(Timestamp.valueOf(timestamp)), StringData.fromString(partition)); | 
|  | 
0 commit comments