Skip to content

Commit 3fa0b75

Browse files
committed
feat: use external header file in admin import demo
1 parent 26ee63c commit 3fa0b75

File tree

2 files changed

+48
-48
lines changed

2 files changed

+48
-48
lines changed

examples/neo4j-admin/src/test/java/org/neo4j/importer/Neo4jAdminExampleIT.java

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@
2121
import com.fasterxml.jackson.databind.node.ObjectNode;
2222
import java.io.*;
2323
import java.net.URL;
24+
import java.nio.file.Files;
25+
import java.nio.file.StandardOpenOption;
2426
import java.sql.DriverManager;
25-
import java.sql.SQLException;
2627
import java.util.*;
2728
import java.util.function.Function;
2829
import java.util.stream.Collectors;
@@ -61,17 +62,13 @@ public class Neo4jAdminExampleIT {
6162

6263
private static final String SHARED_FOLDER = "/admin-import/";
6364

64-
// neo4j-admin database import from Parquet files is an unreleased feature, and we are using a custom docker image
65-
// for that reason, until the feature is publicly available. Set this image name using
66-
// `NEO4J_PRE_RELEASE_DOCKER_IMAGE` environment variable.
6765
@Container
68-
private static final GenericContainer<?> NEO4J = new Neo4jContainer<>(DockerImageName.parse(
69-
Optional.ofNullable(System.getenv("NEO4J_PRE_RELEASE_DOCKER_IMAGE"))
70-
.orElseThrow(
71-
() -> new IllegalArgumentException(
72-
"Docker image name is not set through NEO4J_PRE_RELEASE_DOCKER_IMAGE environment variable!")))
73-
.asCompatibleSubstituteFor("neo4j"))
66+
private static final GenericContainer<?> NEO4J = new Neo4jContainer<>(
67+
DockerImageName.parse("neo4j:2025.03-enterprise"))
7468
.withEnv("NEO4J_ACCEPT_LICENSE_AGREEMENT", "yes")
69+
.withNeo4jConfig("dbms.integrations.cloud_storage.gs.project_id", "connectors-public")
70+
.withNeo4jConfig("server.config.strict_validation.enabled", "false")
71+
.withNeo4jConfig("internal.dbms.cloud.storage.gs.host", "https://storage.googleapis.com")
7572
.withAdminPassword("letmein!")
7673
.withCreateContainerCmdModifier(cmd -> cmd.withUser("neo4j"))
7774
.withFileSystemBind(
@@ -106,7 +103,7 @@ void runs_an_offline_import_of_dvd_rental_data_set() throws Exception {
106103
var specification = ImportSpecificationDeserializer.deserialize(reader);
107104
var sharedFolder = pathFor(SHARED_FOLDER);
108105
var neo4jAdmin = new Neo4jAdmin(sharedFolder, driver, TARGET_DATABASE);
109-
neo4jAdmin.copyFiles(specification);
106+
neo4jAdmin.createHeaderFiles(specification);
110107
neo4jAdmin.executeImport(specification, NEO4J);
111108
}
112109
}
@@ -338,35 +335,35 @@ private void migrateTemporalProperties() {
338335
}
339336
}
340337

341-
public void copyFiles(ImportSpecification specification) throws Exception {
338+
public void createHeaderFiles(ImportSpecification specification) throws Exception {
342339
Map<String, Source> indexedSources =
343340
specification.getSources().stream().collect(Collectors.toMap(Source::getName, Function.identity()));
344341
Map<String, NodeTarget> indexedNodes = specification.getTargets().getNodes().stream()
345342
.collect(Collectors.toMap(Target::getName, Function.identity()));
346343
for (Target target : specification.getTargets().getAll()) {
347344
switch (target) {
348-
case NodeTarget nodeTarget -> copyFile(indexedSources, nodeTarget);
345+
case NodeTarget nodeTarget -> createHeaderFile(indexedSources, nodeTarget);
349346
case RelationshipTarget relationshipTarget ->
350-
copyFile(indexedSources, indexedNodes, relationshipTarget);
347+
createHeaderFile(indexedSources, indexedNodes, relationshipTarget);
351348
default -> throw new RuntimeException("unsupported target type: %s".formatted(target.getClass()));
352349
}
353350
}
354351
}
355352

356-
private void copyFile(Map<String, Source> sources, NodeTarget nodeTarget) throws Exception {
353+
private void createHeaderFile(Map<String, Source> sources, NodeTarget nodeTarget) throws Exception {
357354
var source = sources.get(nodeTarget.getSource());
358355
assertThat(source).isInstanceOf(ParquetSource.class);
359-
File parquetFile = new File(sharedFolder, fileName(nodeTarget));
356+
File parquetFile = new File(sharedFolder, headerFileName(nodeTarget));
360357
List<String> fields = readFieldNames(source);
361358
Map<String, String> fieldMappings = computeFieldMappings(fields, nodeTarget);
362359

363-
copyParquetSource((ParquetSource) source, parquetFile, fieldMappings);
360+
createHeaderFile(parquetFile, fieldMappings);
364361
}
365362

366-
private void copyFile(
363+
private void createHeaderFile(
367364
Map<String, Source> sources, Map<String, NodeTarget> nodes, RelationshipTarget relationshipTarget)
368365
throws Exception {
369-
File parquetFile = new File(sharedFolder, fileName(relationshipTarget));
366+
File parquetFile = new File(sharedFolder, headerFileName(relationshipTarget));
370367

371368
var source = sources.get(relationshipTarget.getSource());
372369
assertThat(source).isInstanceOf(ParquetSource.class);
@@ -377,26 +374,16 @@ private void copyFile(
377374
Map<String, String> fieldMappings =
378375
computeFieldMappings(fields, relationshipTarget, startNodeTarget, endNodeTarget);
379376

380-
copyParquetSource((ParquetSource) source, parquetFile, fieldMappings);
377+
createHeaderFile(parquetFile, fieldMappings);
381378
}
382379

383-
// 🐤
384-
private void copyParquetSource(ParquetSource source, File targetFile, Map<String, String> fieldMappings)
385-
throws SQLException {
386-
var renamedColumns = String.join(
387-
", ",
388-
fieldMappings.entrySet().stream()
389-
.map(e -> String.format("%s AS \"%s\"", e.getKey(), e.getValue()))
390-
.toList());
391-
392-
try (var connection = DriverManager.getConnection("jdbc:duckdb:");
393-
var statement = connection.prepareStatement(String.format(
394-
"COPY (SELECT %s FROM read_parquet($1)) TO '%s' (FORMAT 'parquet', CODEC 'zstd')",
395-
renamedColumns, targetFile.getAbsolutePath()))) {
380+
private void createHeaderFile(File targetFile, Map<String, String> fieldMappings) throws IOException {
396381

397-
statement.setString(1, source.uri());
398-
statement.execute();
399-
}
382+
var sortedKeys = fieldMappings.keySet().stream().sorted().toList();
383+
var values = sortedKeys.stream().map(fieldMappings::get).collect(Collectors.joining(","));
384+
Files.writeString(targetFile.toPath(), values);
385+
Files.writeString(targetFile.toPath(), "\n", StandardOpenOption.APPEND);
386+
Files.writeString(targetFile.toPath(), String.join(",", sortedKeys), StandardOpenOption.APPEND);
400387
}
401388

402389
private static String[] importCommand(ImportSpecification specification, String database) {
@@ -409,14 +396,16 @@ private static String[] importCommand(ImportSpecification specification, String
409396
command.append(" --nodes=");
410397
command.append(String.join(":", nodeTarget.getLabels()));
411398
command.append("=");
412-
command.append("/import/%s".formatted(fileName(nodeTarget)));
399+
command.append("/import/%s,".formatted(headerFileName(nodeTarget)));
400+
command.append("%s".formatted(sourceUri(specification, nodeTarget)));
413401
}
414402

415403
for (RelationshipTarget relationshipTarget : targets.getRelationships()) {
416404
command.append(" --relationships=");
417405
command.append(relationshipTarget.getType());
418406
command.append("=");
419-
command.append("/import/%s".formatted(fileName(relationshipTarget)));
407+
command.append("/import/%s,".formatted(headerFileName(relationshipTarget)));
408+
command.append("%s".formatted(sourceUri(specification, relationshipTarget)));
420409
}
421410

422411
return command.toString().split(" ");
@@ -493,8 +482,19 @@ private static Map<String, String> indexByField(List<PropertyMapping> properties
493482
return result;
494483
}
495484

496-
private static String fileName(Target target) {
497-
return "%s.parquet".formatted(target.getName());
485+
private static String headerFileName(Target target) {
486+
return "%s_header.csv".formatted(target.getName());
487+
}
488+
489+
private static String sourceUri(ImportSpecification specification, Target target) {
490+
var maybeSource = specification.getSources().stream()
491+
.filter(src -> src.getName().equals(target.getSource()))
492+
.findFirst();
493+
assertThat(maybeSource).isPresent();
494+
var rawSource = maybeSource.get();
495+
assertThat(rawSource).isInstanceOf(ParquetSource.class);
496+
var source = (ParquetSource) rawSource;
497+
return source.uri();
498498
}
499499

500500
private static String idSpaceFor(NodeTarget nodeTarget) {

examples/neo4j-admin/src/test/resources/specs/dvd_rental.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,28 @@ version: "1"
22
sources:
33
- name: actor
44
type: parquet
5-
uri: gs://connectors-dev/dvdrental/actor.parquet
5+
uri: gs://connectors-public/dvdrental/actor.parquet
66
- name: category
77
type: parquet
8-
uri: gs://connectors-dev/dvdrental/category.parquet
8+
uri: gs://connectors-public/dvdrental/category.parquet
99
- name: customer
1010
type: parquet
11-
uri: gs://connectors-dev/dvdrental/customer.parquet
11+
uri: gs://connectors-public/dvdrental/customer.parquet
1212
- name: film_actor
1313
type: parquet
14-
uri: gs://connectors-dev/dvdrental/film_actor.parquet
14+
uri: gs://connectors-public/dvdrental/film_actor.parquet
1515
- name: film_category
1616
type: parquet
17-
uri: gs://connectors-dev/dvdrental/film_category.parquet
17+
uri: gs://connectors-public/dvdrental/film_category.parquet
1818
- name: film
1919
type: parquet
20-
uri: gs://connectors-dev/dvdrental/film.parquet
20+
uri: gs://connectors-public/dvdrental/film.parquet
2121
- name: inventory
2222
type: parquet
23-
uri: gs://connectors-dev/dvdrental/inventory.parquet
23+
uri: gs://connectors-public/dvdrental/inventory.parquet
2424
- name: rental
2525
type: parquet
26-
uri: gs://connectors-dev/dvdrental/rental.parquet
26+
uri: gs://connectors-public/dvdrental/rental.parquet
2727
targets:
2828
nodes:
2929
- source: actor

0 commit comments

Comments
 (0)