Skip to content

Commit 3fe1663

Browse files
committed
feat: use external header file in admin import demo
1 parent 26ee63c commit 3fe1663

File tree

2 files changed

+45
-48
lines changed

2 files changed

+45
-48
lines changed

examples/neo4j-admin/src/test/java/org/neo4j/importer/Neo4jAdminExampleIT.java

Lines changed: 37 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
import com.fasterxml.jackson.databind.node.ObjectNode;
2222
import java.io.*;
2323
import java.net.URL;
24+
import java.nio.file.Files;
2425
import java.sql.DriverManager;
25-
import java.sql.SQLException;
2626
import java.util.*;
2727
import java.util.function.Function;
2828
import java.util.stream.Collectors;
@@ -61,17 +61,13 @@ public class Neo4jAdminExampleIT {
6161

6262
private static final String SHARED_FOLDER = "/admin-import/";
6363

64-
// neo4j-admin database import from Parquet files is an unreleased feature, and we are using a custom docker image
65-
// for that reason, until the feature is publicly available. Set this image name using
66-
// `NEO4J_PRE_RELEASE_DOCKER_IMAGE` environment variable.
6764
@Container
68-
private static final GenericContainer<?> NEO4J = new Neo4jContainer<>(DockerImageName.parse(
69-
Optional.ofNullable(System.getenv("NEO4J_PRE_RELEASE_DOCKER_IMAGE"))
70-
.orElseThrow(
71-
() -> new IllegalArgumentException(
72-
"Docker image name is not set through NEO4J_PRE_RELEASE_DOCKER_IMAGE environment variable!")))
73-
.asCompatibleSubstituteFor("neo4j"))
65+
private static final GenericContainer<?> NEO4J = new Neo4jContainer<>(
66+
DockerImageName.parse("neo4j:2025.03-enterprise"))
7467
.withEnv("NEO4J_ACCEPT_LICENSE_AGREEMENT", "yes")
68+
.withNeo4jConfig("dbms.integrations.cloud_storage.gs.project_id", "connectors-public")
69+
.withNeo4jConfig("server.config.strict_validation.enabled", "false")
70+
.withNeo4jConfig("internal.dbms.cloud.storage.gs.host", "https://storage.googleapis.com")
7571
.withAdminPassword("letmein!")
7672
.withCreateContainerCmdModifier(cmd -> cmd.withUser("neo4j"))
7773
.withFileSystemBind(
@@ -106,7 +102,7 @@ void runs_an_offline_import_of_dvd_rental_data_set() throws Exception {
106102
var specification = ImportSpecificationDeserializer.deserialize(reader);
107103
var sharedFolder = pathFor(SHARED_FOLDER);
108104
var neo4jAdmin = new Neo4jAdmin(sharedFolder, driver, TARGET_DATABASE);
109-
neo4jAdmin.copyFiles(specification);
105+
neo4jAdmin.createHeaderFiles(specification);
110106
neo4jAdmin.executeImport(specification, NEO4J);
111107
}
112108
}
@@ -338,35 +334,35 @@ private void migrateTemporalProperties() {
338334
}
339335
}
340336

341-
public void copyFiles(ImportSpecification specification) throws Exception {
337+
public void createHeaderFiles(ImportSpecification specification) throws Exception {
342338
Map<String, Source> indexedSources =
343339
specification.getSources().stream().collect(Collectors.toMap(Source::getName, Function.identity()));
344340
Map<String, NodeTarget> indexedNodes = specification.getTargets().getNodes().stream()
345341
.collect(Collectors.toMap(Target::getName, Function.identity()));
346342
for (Target target : specification.getTargets().getAll()) {
347343
switch (target) {
348-
case NodeTarget nodeTarget -> copyFile(indexedSources, nodeTarget);
344+
case NodeTarget nodeTarget -> createHeaderFile(indexedSources, nodeTarget);
349345
case RelationshipTarget relationshipTarget ->
350-
copyFile(indexedSources, indexedNodes, relationshipTarget);
346+
createHeaderFile(indexedSources, indexedNodes, relationshipTarget);
351347
default -> throw new RuntimeException("unsupported target type: %s".formatted(target.getClass()));
352348
}
353349
}
354350
}
355351

356-
private void copyFile(Map<String, Source> sources, NodeTarget nodeTarget) throws Exception {
352+
private void createHeaderFile(Map<String, Source> sources, NodeTarget nodeTarget) throws Exception {
357353
var source = sources.get(nodeTarget.getSource());
358354
assertThat(source).isInstanceOf(ParquetSource.class);
359-
File parquetFile = new File(sharedFolder, fileName(nodeTarget));
355+
File parquetFile = new File(sharedFolder, headerFileName(nodeTarget));
360356
List<String> fields = readFieldNames(source);
361357
Map<String, String> fieldMappings = computeFieldMappings(fields, nodeTarget);
362358

363-
copyParquetSource((ParquetSource) source, parquetFile, fieldMappings);
359+
createHeaderFile(parquetFile, fieldMappings);
364360
}
365361

366-
private void copyFile(
362+
private void createHeaderFile(
367363
Map<String, Source> sources, Map<String, NodeTarget> nodes, RelationshipTarget relationshipTarget)
368364
throws Exception {
369-
File parquetFile = new File(sharedFolder, fileName(relationshipTarget));
365+
File parquetFile = new File(sharedFolder, headerFileName(relationshipTarget));
370366

371367
var source = sources.get(relationshipTarget.getSource());
372368
assertThat(source).isInstanceOf(ParquetSource.class);
@@ -377,26 +373,14 @@ private void copyFile(
377373
Map<String, String> fieldMappings =
378374
computeFieldMappings(fields, relationshipTarget, startNodeTarget, endNodeTarget);
379375

380-
copyParquetSource((ParquetSource) source, parquetFile, fieldMappings);
376+
createHeaderFile(parquetFile, fieldMappings);
381377
}
382378

383-
// 🐤
384-
private void copyParquetSource(ParquetSource source, File targetFile, Map<String, String> fieldMappings)
385-
throws SQLException {
386-
var renamedColumns = String.join(
387-
", ",
388-
fieldMappings.entrySet().stream()
389-
.map(e -> String.format("%s AS \"%s\"", e.getKey(), e.getValue()))
390-
.toList());
391-
392-
try (var connection = DriverManager.getConnection("jdbc:duckdb:");
393-
var statement = connection.prepareStatement(String.format(
394-
"COPY (SELECT %s FROM read_parquet($1)) TO '%s' (FORMAT 'parquet', CODEC 'zstd')",
395-
renamedColumns, targetFile.getAbsolutePath()))) {
379+
private void createHeaderFile(File targetFile, Map<String, String> fieldMappings) throws IOException {
396380

397-
statement.setString(1, source.uri());
398-
statement.execute();
399-
}
381+
var sortedKeys = fieldMappings.keySet().stream().sorted().toList();
382+
var values = sortedKeys.stream().map(fieldMappings::get).collect(Collectors.joining(","));
383+
Files.writeString(targetFile.toPath(), values + "\n" + String.join(",", sortedKeys));
400384
}
401385

402386
private static String[] importCommand(ImportSpecification specification, String database) {
@@ -409,14 +393,16 @@ private static String[] importCommand(ImportSpecification specification, String
409393
command.append(" --nodes=");
410394
command.append(String.join(":", nodeTarget.getLabels()));
411395
command.append("=");
412-
command.append("/import/%s".formatted(fileName(nodeTarget)));
396+
command.append("/import/%s,".formatted(headerFileName(nodeTarget)));
397+
command.append("%s".formatted(sourceUri(specification, nodeTarget)));
413398
}
414399

415400
for (RelationshipTarget relationshipTarget : targets.getRelationships()) {
416401
command.append(" --relationships=");
417402
command.append(relationshipTarget.getType());
418403
command.append("=");
419-
command.append("/import/%s".formatted(fileName(relationshipTarget)));
404+
command.append("/import/%s,".formatted(headerFileName(relationshipTarget)));
405+
command.append("%s".formatted(sourceUri(specification, relationshipTarget)));
420406
}
421407

422408
return command.toString().split(" ");
@@ -493,8 +479,19 @@ private static Map<String, String> indexByField(List<PropertyMapping> properties
493479
return result;
494480
}
495481

496-
private static String fileName(Target target) {
497-
return "%s.parquet".formatted(target.getName());
482+
private static String headerFileName(Target target) {
483+
return "%s_header.csv".formatted(target.getName());
484+
}
485+
486+
private static String sourceUri(ImportSpecification specification, Target target) {
487+
var maybeSource = specification.getSources().stream()
488+
.filter(src -> src.getName().equals(target.getSource()))
489+
.findFirst();
490+
assertThat(maybeSource).isPresent();
491+
var rawSource = maybeSource.get();
492+
assertThat(rawSource).isInstanceOf(ParquetSource.class);
493+
var source = (ParquetSource) rawSource;
494+
return source.uri();
498495
}
499496

500497
private static String idSpaceFor(NodeTarget nodeTarget) {

examples/neo4j-admin/src/test/resources/specs/dvd_rental.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,28 @@ version: "1"
22
sources:
33
- name: actor
44
type: parquet
5-
uri: gs://connectors-dev/dvdrental/actor.parquet
5+
uri: gs://connectors-public/dvdrental/actor.parquet
66
- name: category
77
type: parquet
8-
uri: gs://connectors-dev/dvdrental/category.parquet
8+
uri: gs://connectors-public/dvdrental/category.parquet
99
- name: customer
1010
type: parquet
11-
uri: gs://connectors-dev/dvdrental/customer.parquet
11+
uri: gs://connectors-public/dvdrental/customer.parquet
1212
- name: film_actor
1313
type: parquet
14-
uri: gs://connectors-dev/dvdrental/film_actor.parquet
14+
uri: gs://connectors-public/dvdrental/film_actor.parquet
1515
- name: film_category
1616
type: parquet
17-
uri: gs://connectors-dev/dvdrental/film_category.parquet
17+
uri: gs://connectors-public/dvdrental/film_category.parquet
1818
- name: film
1919
type: parquet
20-
uri: gs://connectors-dev/dvdrental/film.parquet
20+
uri: gs://connectors-public/dvdrental/film.parquet
2121
- name: inventory
2222
type: parquet
23-
uri: gs://connectors-dev/dvdrental/inventory.parquet
23+
uri: gs://connectors-public/dvdrental/inventory.parquet
2424
- name: rental
2525
type: parquet
26-
uri: gs://connectors-dev/dvdrental/rental.parquet
26+
uri: gs://connectors-public/dvdrental/rental.parquet
2727
targets:
2828
nodes:
2929
- source: actor

0 commit comments

Comments
 (0)