From ec79e58767df2c2035bf8eb2094df2ce0a7c3b5f Mon Sep 17 00:00:00 2001 From: dzlab Date: Fri, 14 Feb 2025 16:28:38 -0800 Subject: [PATCH 1/8] Article on ingesting data into Elasticsearch with Vector --- _posts/2025-02-15-vector-pipeline.md | 244 ++++++++++++++++++++ assets/2025/02/20250215-vector-pipeline.svg | 2 + assets/logos/vector-by-datadog.svg | 24 ++ 3 files changed, 270 insertions(+) create mode 100644 _posts/2025-02-15-vector-pipeline.md create mode 100644 assets/2025/02/20250215-vector-pipeline.svg create mode 100644 assets/logos/vector-by-datadog.svg diff --git a/_posts/2025-02-15-vector-pipeline.md b/_posts/2025-02-15-vector-pipeline.md new file mode 100644 index 0000000..1a448cf --- /dev/null +++ b/_posts/2025-02-15-vector-pipeline.md @@ -0,0 +1,244 @@ +--- +layout: post +comments: true +title: Building data pipelines with Vector by Datadog +excerpt: Learn how to build a data pipeline with Vector to ingest data from Kafka into Elasticsearch. +categories: monitoring +tags: [docker,elastic,kibana] +toc: true +img_excerpt: 2025/02/20250215-vector-pipeline.svg +--- + + +
+ + + + + +## Infra setup + +```yaml +# docker-compose.yaml +version: '3.8' + +services: + zookeeper: + image: confluentinc/cp-zookeeper:7.4.4 + container_name: zookeeper + networks: + - vecnet + environment: + ZOOKEEPER_CLIENT_PORT: 2181 + ZOOKEEPER_TICK_TIME: 2000 + ports: + - 22181:2181 + + kafka: + image: confluentinc/cp-kafka:7.4.4 + container_name: kafka + networks: + - vecnet + depends_on: + - zookeeper + ports: + - 29092:29092 + environment: + KAFKA_BROKER_ID: 1 + KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT + KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:7.16.3 + container_name: elasticsearch + networks: + - vecnet + environment: + - discovery.type=single-node + - xpack.security.enabled=false + ports: + - "9200:9200" + kibana: + image: docker.elastic.co/kibana/kibana:7.16.3 + container_name: kibana + networks: + - vecnet + environment: + - ELASTICSEARCH_URL=http://elasticsearch:9200 + ports: + - "5601:5601" + depends_on: + - elasticsearch + +networks: + vecnet: + name: vector_example_network +``` + +``` +$ docker-compose up -d + +Creating network "vector_example_network" with the default driver +Creating zookeeper ... done +Creating elasticsearch ... done +Creating kibana ... done +Creating kafka ... done +``` + +Check all services are running + +``` +$ docker-compose ps + + Name Command State Ports +----------------------------------------------------------------------------------------------------------------------- +elasticsearch /bin/tini -- /usr/local/bi ... Up 0.0.0.0:9200->9200/tcp,:::9200->9200/tcp, 9300/tcp +kafka /etc/confluent/docker/run Up 0.0.0.0:29092->29092/tcp,:::29092->29092/tcp, 9092/tcp +kibana /bin/tini -- /usr/local/bi ... Up 0.0.0.0:5601->5601/tcp,:::5601->5601/tcp +zookeeper /etc/confluent/docker/run Up 0.0.0.0:22181->2181/tcp,:::22181->2181/tcp, 2888/tcp, 3888/tcp +``` + +Check all ports are open + +``` +$ nc -zv localhost 22181 +Connection to localhost 22181 port [tcp/*] succeeded! + +$ nc -zv localhost 29092 +Connection to localhost 29092 port [tcp/*] succeeded! + +$ nc -zv localhost 9200 +Connection to localhost 9200 port [tcp/*] succeeded! + +$ nc -zv localhost 5601 +Connection to localhost 5601 port [tcp/*] succeeded! +``` + + +ssh -vNL 5601:localhost:5601 $LDAP_USERNAME@$DEV_HOME.meraki.com + + +ssh -vNL 8686:localhost:8686 $LDAP_USERNAME@$DEV_HOME.meraki.com + +### Kafka + +connecting to kafka https://www.baeldung.com/kafka-docker-connection + +``` +$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-console-producer --bootstrap-server kafka:9092 --topic stocks-xyz +``` + +``` +$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-console-consumer --bootstrap-server kafka:9092 --topic stocks-xyz --from-beginning + +{"ticker":"xyz","open":1.1,"close":1.2,"date":"2025-02-15"} +Processed a total of 1 messages +``` + +## Vector + +![Vector pipeline]({{ "/assets/2025/02/20250215-vector-pipeline.svg" | absolute_url }}) + +```yaml +# vector.yaml +api: + enabled: true + +sources: + kafka_in: + type: "kafka" + bootstrap_servers: "kafka:9092" + group_id: "stocks" + key_field: "message" + topics: ["stocks-*"] + +transforms: + json_parse: + type: "remap" + inputs: ["kafka_in"] + source: | + parsed, err = parse_json(.message) + if err != null { + log(err, level: "error") + } + . |= object(parsed) ?? {} + +sinks: + elasticsearch_out: + type: elasticsearch + inputs: + - json_parse + endpoints: ["http://elasticsearch:9200"] + api_version: "v7" + mode: "bulk" + bulk: + action: "index" + index: "stocks-%Y-%m-%d" + compression: "none" +``` + +``` +docker run \ + -d \ + -v $PWD/vector.yaml:/etc/vector/vector.yaml:ro \ + -p 8686:8686 \ + --name vector \ + --network vector_example_network \ + timberio/vector:nightly-debian --verbose +``` + +Validate the target config, then exit + +``` +$ docker exec -ti $(docker ps -aqf "name=vector") vector validate + +√ Loaded ["/etc/vector/vector.yaml"] +√ Component configuration +√ Health check "elasticsearch_out" +------------------------------------ + Validated +``` + +``` +docker logs -f $(docker ps -aqf "name=vector") +``` + +Display topology and metrics in the console, for a local or remote Vector instance + +``` +$ docker exec -ti $(docker ps -aqf "name=vector") vector top +``` + +Output the topology as visual representation using the DOT language which can be rendered by GraphViz + +``` +$ docker exec -ti $(docker ps -aqf "name=vector") vector graph + +digraph { + "kafka_in" [shape="trapezium"] + "json_parse" [shape="diamond"] + "kafka_in" -> "json_parse" + "elasticsearch_out" [shape="invtrapezium"] + "json_parse" -> "elasticsearch_out" +} +``` + + + +Observe output log events from source or transform components. Logs are sampled at a specified interval + +``` +$ docker exec -ti $(docker ps -aqf "name=vector") vector tap + +2025-02-13T23:10:13.677283Z INFO vector::app: Log level is enabled. level="info" +[tap] Pattern '*' successfully matched. +[tap] Warning: sink outputs cannot be tapped. Output pattern '*' matches sinks ["elasticsearch_out"] +``` + + +``` +$ docker rm -f $(docker ps -aqf "name=vector") +``` \ No newline at end of file diff --git a/assets/2025/02/20250215-vector-pipeline.svg b/assets/2025/02/20250215-vector-pipeline.svg new file mode 100644 index 0000000..c872a25 --- /dev/null +++ b/assets/2025/02/20250215-vector-pipeline.svg @@ -0,0 +1,2 @@ +Source: KafkaSink: Elasticsearchtransforms: json_parse \ No newline at end of file diff --git a/assets/logos/vector-by-datadog.svg b/assets/logos/vector-by-datadog.svg new file mode 100644 index 0000000..b401151 --- /dev/null +++ b/assets/logos/vector-by-datadog.svg @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file From b1c330ed25803db2b370872daf4f751f2194684c Mon Sep 17 00:00:00 2001 From: dzlab Date: Sat, 15 Feb 2025 16:18:26 -0800 Subject: [PATCH 2/8] update vector config --- _posts/2025-02-15-vector-pipeline.md | 52 ++++++++++++++++++++++------ assets/2025/02/20250215-vector.svg | 1 + 2 files changed, 42 insertions(+), 11 deletions(-) create mode 100644 assets/2025/02/20250215-vector.svg diff --git a/_posts/2025-02-15-vector-pipeline.md b/_posts/2025-02-15-vector-pipeline.md index 1a448cf..da3fc51 100644 --- a/_posts/2025-02-15-vector-pipeline.md +++ b/_posts/2025-02-15-vector-pipeline.md @@ -6,7 +6,7 @@ excerpt: Learn how to build a data pipeline with Vector to ingest data from Kafk categories: monitoring tags: [docker,elastic,kibana] toc: true -img_excerpt: 2025/02/20250215-vector-pipeline.svg +img_excerpt: assets/logos/vector-by-datadog.svg --- @@ -14,6 +14,7 @@ img_excerpt: 2025/02/20250215-vector-pipeline.svg +![Vector architecture]({{ "/assets/2025/02/20250215-vector.svg" | absolute_url }}) ## Infra setup @@ -127,14 +128,35 @@ ssh -vNL 8686:localhost:8686 $LDAP_USERNAME@$DEV_HOME.meraki.com connecting to kafka https://www.baeldung.com/kafka-docker-connection +List topics + +``` +$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-topics --bootstrap-server kafka:9092 --list + +logs-2025-02-15 ``` -$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-console-producer --bootstrap-server kafka:9092 --topic stocks-xyz + +Describe a topic + ``` +$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-topics --bootstrap-server kafka:9092 --describe --topic logs-2025-02-15 +Topic: logs-2025-02-15 TopicId: KS47H7xDRV2BhEI7CYyOOg PartitionCount: 1 ReplicationFactor: 1 Configs: + Topic: logs-2025-02-15 Partition: 0 Leader: 1 Replicas: 1 Isr: 1 Elr: N/A LastKnownElr: N/A ``` -$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-console-consumer --bootstrap-server kafka:9092 --topic stocks-xyz --from-beginning -{"ticker":"xyz","open":1.1,"close":1.2,"date":"2025-02-15"} +Manually publish messages to the topic + +``` +$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-console-producer --bootstrap-server kafka:9092 --topic logs-2025-02-15 +``` + +Read published messages + +``` +$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-console-consumer --bootstrap-server kafka:9092 --topic logs-2025-02-15 --from-beginning + +{"appname":"BronzeGamer","facility":"audit","hostname":"for.yun","message":"We're gonna need a bigger boat","msgid":"ID897","procid":4423,"severity":"debug","timestamp":"2025-02-15T23:50:09.677Z","version":1} Processed a total of 1 messages ``` @@ -142,6 +164,10 @@ Processed a total of 1 messages ![Vector pipeline]({{ "/assets/2025/02/20250215-vector-pipeline.svg" | absolute_url }}) +The Kafka topics names to read events from. + +Regular expression syntax is supported if the topic begins with ^. + ```yaml # vector.yaml api: @@ -151,9 +177,11 @@ sources: kafka_in: type: "kafka" bootstrap_servers: "kafka:9092" - group_id: "stocks" + group_id: "logs" key_field: "message" - topics: ["stocks-*"] + topics: ["^logs-.+"] + metrics: + topic_lag_metric: true transforms: json_parse: @@ -172,11 +200,11 @@ sinks: inputs: - json_parse endpoints: ["http://elasticsearch:9200"] - api_version: "v7" - mode: "bulk" + api_version: "auto" + mode: "data_stream" bulk: - action: "index" - index: "stocks-%Y-%m-%d" + action: "create" + index: "logs-%Y-%m-%d" compression: "none" ``` @@ -187,9 +215,11 @@ docker run \ -p 8686:8686 \ --name vector \ --network vector_example_network \ - timberio/vector:nightly-debian --verbose + timberio/vector:nightly-debian ``` +> Note: use the `--verbose` to get debug level logging + Validate the target config, then exit ``` diff --git a/assets/2025/02/20250215-vector.svg b/assets/2025/02/20250215-vector.svg new file mode 100644 index 0000000..904a5ce --- /dev/null +++ b/assets/2025/02/20250215-vector.svg @@ -0,0 +1 @@ +Vector DiagramA lightweight and ultra-fast tool for building observability pipelines \ No newline at end of file From 71c6a0a4380de764fcaf1bb020256d1773f7272c Mon Sep 17 00:00:00 2001 From: dzlab Date: Sun, 16 Feb 2025 19:13:28 -0800 Subject: [PATCH 3/8] add kafka sink --- _posts/2025-02-15-vector-pipeline.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/_posts/2025-02-15-vector-pipeline.md b/_posts/2025-02-15-vector-pipeline.md index da3fc51..60cc68d 100644 --- a/_posts/2025-02-15-vector-pipeline.md +++ b/_posts/2025-02-15-vector-pipeline.md @@ -195,10 +195,15 @@ transforms: . |= object(parsed) ?? {} sinks: + console_out: + inputs: ["remap_syslog"] + type: "console" + encoding: + codec: "json" + elasticsearch_out: type: elasticsearch - inputs: - - json_parse + inputs: [ "remap_syslog" ] endpoints: ["http://elasticsearch:9200"] api_version: "auto" mode: "data_stream" @@ -206,6 +211,14 @@ sinks: action: "create" index: "logs-%Y-%m-%d" compression: "none" + + kafka_out: + type: "kafka" + inputs: [ "remap_syslog" ] + bootstrap_servers: "kafka:9092" + topic: "logs-%Y-%m-%d" + encoding: + codec: "json" ``` ``` From 94e81fda88ba335fd13d2355894e8262d724d894 Mon Sep 17 00:00:00 2001 From: dzlab Date: Thu, 27 Feb 2025 16:23:16 -0800 Subject: [PATCH 4/8] added elasticsearch search examples --- _posts/2025-02-15-vector-pipeline.md | 207 +++++++++++++++++++++++---- 1 file changed, 180 insertions(+), 27 deletions(-) diff --git a/_posts/2025-02-15-vector-pipeline.md b/_posts/2025-02-15-vector-pipeline.md index 60cc68d..f7ef1aa 100644 --- a/_posts/2025-02-15-vector-pipeline.md +++ b/_posts/2025-02-15-vector-pipeline.md @@ -174,25 +174,18 @@ api: enabled: true sources: - kafka_in: - type: "kafka" - bootstrap_servers: "kafka:9092" - group_id: "logs" - key_field: "message" - topics: ["^logs-.+"] - metrics: - topic_lag_metric: true + generate_syslog: + type: "demo_logs" + format: "syslog" + count: 50 transforms: - json_parse: + remap_syslog: + inputs: [ "generate_syslog"] type: "remap" - inputs: ["kafka_in"] source: | - parsed, err = parse_json(.message) - if err != null { - log(err, level: "error") - } - . |= object(parsed) ?? {} + parsed = parse_syslog!(.message) + . = object(parsed) sinks: console_out: @@ -202,15 +195,10 @@ sinks: codec: "json" elasticsearch_out: - type: elasticsearch - inputs: [ "remap_syslog" ] + type: "elasticsearch" + inputs: ["remap_syslog"] + healthcheck: false endpoints: ["http://elasticsearch:9200"] - api_version: "auto" - mode: "data_stream" - bulk: - action: "create" - index: "logs-%Y-%m-%d" - compression: "none" kafka_out: type: "kafka" @@ -261,11 +249,11 @@ Output the topology as visual representation using the DOT language which can be $ docker exec -ti $(docker ps -aqf "name=vector") vector graph digraph { - "kafka_in" [shape="trapezium"] - "json_parse" [shape="diamond"] - "kafka_in" -> "json_parse" + "generate_syslog" [shape="trapezium"] + "remap_syslog" [shape="diamond"] + "generate_syslog" -> "remap_syslog" "elasticsearch_out" [shape="invtrapezium"] - "json_parse" -> "elasticsearch_out" + "remap_syslog" -> "elasticsearch_out" } ``` @@ -279,9 +267,174 @@ $ docker exec -ti $(docker ps -aqf "name=vector") vector tap 2025-02-13T23:10:13.677283Z INFO vector::app: Log level is enabled. level="info" [tap] Pattern '*' successfully matched. [tap] Warning: sink outputs cannot be tapped. Output pattern '*' matches sinks ["elasticsearch_out"] + +{"appname":"BronzeGamer","facility":"local4","hostname":"names.rsvp","message":"#hugops to everyone who has to deal with this","msgid":"ID347","procid":6651,"severity":"emerg","timestamp":"2025-02-28T00:08:10.006Z","version":2} +``` + +``` +$ curl -s http://localhost:9200/_aliases | jq + +{ + ... + "vector-2025.02.28": { + "aliases": {} + }, + ... +} +``` + + +``` +$ curl -s http://localhost:9200/vector-2025.02.28 | jq + +{ + "vector-2025.02.28": { + "aliases": {}, + "mappings": { + "properties": { + "appname": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "facility": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "hostname": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "message": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "msgid": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "procid": { + "type": "long" + }, + "severity": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "timestamp": { + "type": "date" + }, + "version": { + "type": "long" + } + } + }, + "settings": { + "index": { + "routing": { + "allocation": { + "include": { + "_tier_preference": "data_content" + } + } + }, + "number_of_shards": "1", + "provided_name": "vector-2025.02.28", + "creation_date": "1740701263010", + "number_of_replicas": "1", + "uuid": "GE9reUyzTSyS0W-og3YQ6g", + "version": { + "created": "7160399" + } + } + } + } +} ``` +``` +$ curl -s 'http://localhost:9200/vector-2025.02.28/_search?pretty=true&size=1' + +{ + "took" : 1, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 50, + "relation" : "eq" + }, + "max_score" : 1.0, + "hits" : [ + { + "_index" : "vector-2025.02.28", + "_type" : "_doc", + "_id" : "Yo_hSZUBbMT2FeGGgyTf", + "_score" : 1.0, + "_source" : { + "appname" : "BryanHorsey", + "facility" : "uucp", + "hostname" : "random.helsinki", + "message" : "Pretty pretty pretty good", + "msgid" : "ID4", + "procid" : 4488, + "severity" : "notice", + "timestamp" : "2025-02-28T00:07:50.006Z", + "version" : 2 + } + } + ] + } +} +``` ``` $ docker rm -f $(docker ps -aqf "name=vector") +``` + +``` +$ docker-compose down + +Stopping kafka ... done +Stopping kibana ... done +Stopping zookeeper ... done +Stopping elasticsearch ... done +Removing kafka ... done +Removing kibana ... done +Removing zookeeper ... done +Removing elasticsearch ... done +Removing network vector_example_network ``` \ No newline at end of file From 72e1c37796c8dcdcdcf450edbaca1c8e41b7b58c Mon Sep 17 00:00:00 2001 From: dzlab Date: Thu, 27 Feb 2025 16:31:55 -0800 Subject: [PATCH 5/8] updated vector pipeline --- assets/2025/02/20250215-vector-pipeline.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/2025/02/20250215-vector-pipeline.svg b/assets/2025/02/20250215-vector-pipeline.svg index c872a25..d96ba61 100644 --- a/assets/2025/02/20250215-vector-pipeline.svg +++ b/assets/2025/02/20250215-vector-pipeline.svg @@ -1,2 +1,2 @@ -Source: KafkaSink: Elasticsearchtransforms: json_parse \ No newline at end of file +transformsSourceSinksSysLog \ No newline at end of file From 0cf63f6d2b081a881ea63a893693692f56e0b0af Mon Sep 17 00:00:00 2001 From: dzlab Date: Fri, 28 Feb 2025 18:40:26 -0800 Subject: [PATCH 6/8] added description for sections --- _posts/2025-02-15-vector-pipeline.md | 106 +++++++++++++++------------ 1 file changed, 60 insertions(+), 46 deletions(-) diff --git a/_posts/2025-02-15-vector-pipeline.md b/_posts/2025-02-15-vector-pipeline.md index f7ef1aa..122a5d6 100644 --- a/_posts/2025-02-15-vector-pipeline.md +++ b/_posts/2025-02-15-vector-pipeline.md @@ -2,7 +2,7 @@ layout: post comments: true title: Building data pipelines with Vector by Datadog -excerpt: Learn how to build a data pipeline with Vector to ingest data from Kafka into Elasticsearch. +excerpt: Learn how to build a data pipeline with Vector to ingest data into Elasticsearch. categories: monitoring tags: [docker,elastic,kibana] toc: true @@ -13,11 +13,23 @@ img_excerpt: assets/logos/vector-by-datadog.svg
+[Vector](https://vector.dev/) is an open-source log aggregator developed by Datadog. Vector is a lightweight, exceptionally fast, and memory efficiency alternative to [Logstash](https://www.elastic.co/logstash). Vector makes it easy to build observability pipelines by seamlessly capturing logs from many sources, applying transformations, and routing to one of the many predefined sinks. ![Vector architecture]({{ "/assets/2025/02/20250215-vector.svg" | absolute_url }}) +In this article, we will explore how to leverage Vector to collect syslog messages, transform them into JSON events, then write to a Kafka topic as well as an Elasticsearch Index. -## Infra setup + +## Infrastructure setup + +First, let's setup the infrastructure using Docker. The following Docker Compose file defines the setup with the following components: + +* **ZooKeeper** a coordination service for distributed systems, used by Kafka. It exposes port 2181 (mapped to host port 22181). +* **Kafka** a distributed event streaming platform, exposes port 29092 (for host access) +* **Elasticsearch**: a Search and analytics engine, available on port 9200 +* **Kibana** a Data visualization dashboard for Elasticsearch, available on port 5601 + +Additionally, all services are connected through a custom Docker network called. ```yaml # docker-compose.yaml @@ -79,6 +91,8 @@ networks: name: vector_example_network ``` +We can start the infrastructure as follows: + ``` $ docker-compose up -d @@ -102,7 +116,7 @@ kibana /bin/tini -- /usr/local/bi ... Up 0.0.0.0:5601->5601/tcp, zookeeper /etc/confluent/docker/run Up 0.0.0.0:22181->2181/tcp,:::22181->2181/tcp, 2888/tcp, 3888/tcp ``` -Check all ports are open +Another check to perform before moving further, is to verify that all exposed ports are open ``` $ nc -zv localhost 22181 @@ -118,48 +132,6 @@ $ nc -zv localhost 5601 Connection to localhost 5601 port [tcp/*] succeeded! ``` - -ssh -vNL 5601:localhost:5601 $LDAP_USERNAME@$DEV_HOME.meraki.com - - -ssh -vNL 8686:localhost:8686 $LDAP_USERNAME@$DEV_HOME.meraki.com - -### Kafka - -connecting to kafka https://www.baeldung.com/kafka-docker-connection - -List topics - -``` -$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-topics --bootstrap-server kafka:9092 --list - -logs-2025-02-15 -``` - -Describe a topic - -``` -$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-topics --bootstrap-server kafka:9092 --describe --topic logs-2025-02-15 - -Topic: logs-2025-02-15 TopicId: KS47H7xDRV2BhEI7CYyOOg PartitionCount: 1 ReplicationFactor: 1 Configs: - Topic: logs-2025-02-15 Partition: 0 Leader: 1 Replicas: 1 Isr: 1 Elr: N/A LastKnownElr: N/A -``` - -Manually publish messages to the topic - -``` -$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-console-producer --bootstrap-server kafka:9092 --topic logs-2025-02-15 -``` - -Read published messages - -``` -$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-console-consumer --bootstrap-server kafka:9092 --topic logs-2025-02-15 --from-beginning - -{"appname":"BronzeGamer","facility":"audit","hostname":"for.yun","message":"We're gonna need a bigger boat","msgid":"ID897","procid":4423,"severity":"debug","timestamp":"2025-02-15T23:50:09.677Z","version":1} -Processed a total of 1 messages -``` - ## Vector ![Vector pipeline]({{ "/assets/2025/02/20250215-vector-pipeline.svg" | absolute_url }}) @@ -271,6 +243,44 @@ $ docker exec -ti $(docker ps -aqf "name=vector") vector tap {"appname":"BronzeGamer","facility":"local4","hostname":"names.rsvp","message":"#hugops to everyone who has to deal with this","msgid":"ID347","procid":6651,"severity":"emerg","timestamp":"2025-02-28T00:08:10.006Z","version":2} ``` +## Kafka + +connecting to kafka https://www.baeldung.com/kafka-docker-connection + +List topics + +``` +$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-topics --bootstrap-server kafka:9092 --list + +logs-2025-02-15 +``` + +Describe a topic + +``` +$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-topics --bootstrap-server kafka:9092 --describe --topic logs-2025-02-15 + +Topic: logs-2025-02-15 TopicId: KS47H7xDRV2BhEI7CYyOOg PartitionCount: 1 ReplicationFactor: 1 Configs: + Topic: logs-2025-02-15 Partition: 0 Leader: 1 Replicas: 1 Isr: 1 Elr: N/A LastKnownElr: N/A +``` + +Manually publish messages to the topic + +``` +$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-console-producer --bootstrap-server kafka:9092 --topic logs-2025-02-15 +``` + +Read published messages + +``` +$ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-console-consumer --bootstrap-server kafka:9092 --topic logs-2025-02-15 --from-beginning + +{"appname":"BronzeGamer","facility":"audit","hostname":"for.yun","message":"We're gonna need a bigger boat","msgid":"ID897","procid":4423,"severity":"debug","timestamp":"2025-02-15T23:50:09.677Z","version":1} +Processed a total of 1 messages +``` + +## Elasticsearch + ``` $ curl -s http://localhost:9200/_aliases | jq @@ -437,4 +447,8 @@ Removing kibana ... done Removing zookeeper ... done Removing elasticsearch ... done Removing network vector_example_network -``` \ No newline at end of file +``` + +## That's all folks + +I hope you enjoyed this article, feel free to leave a comment or reach out on twitter [@bachiirc](https://twitter.com/bachiirc). From 783abaf2bb17abb5120d99ce0fbd499f7cf772f6 Mon Sep 17 00:00:00 2001 From: dzlab Date: Sun, 4 May 2025 16:23:05 -0700 Subject: [PATCH 7/8] added details for vector pipeline setup --- _posts/2025-02-15-vector-pipeline.md | 41 ++++++++++++++++++---------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/_posts/2025-02-15-vector-pipeline.md b/_posts/2025-02-15-vector-pipeline.md index 122a5d6..0a85277 100644 --- a/_posts/2025-02-15-vector-pipeline.md +++ b/_posts/2025-02-15-vector-pipeline.md @@ -133,12 +133,16 @@ Connection to localhost 5601 port [tcp/*] succeeded! ``` ## Vector +In this section, we will build the data processing pipeline for our log data, specifically using Vector, which is a high-performance observability data pipeline tool. Our pipeline will generate few samples of syslog data, apply some transformations, then send the processed data to selected destinations. ![Vector pipeline]({{ "/assets/2025/02/20250215-vector-pipeline.svg" | absolute_url }}) -The Kafka topics names to read events from. +The structure of a our Vector pipeline as defined in the below YAML file, is as follows: + +- **Sources**: defines the input data origin. In our case, we will simply generate sample syslog data. +- **Transforms**: defines a step called `remap_syslog` to parse the syslog-formatted messages into structured data, then extract few fields like timestamp, severity, facility, etc. +- **Sinks**: defines the output of the pipeline; we use Console Output to monitor in real-time the output of the pipeline. We also forward the data for storage into Elasticsearch and Kafka. -Regular expression syntax is supported if the topic begins with ^. ```yaml # vector.yaml @@ -181,6 +185,8 @@ sinks: codec: "json" ``` +Now we can start the Vector service and pass in the YAML file containing the pipeline definition + ``` docker run \ -d \ @@ -205,11 +211,13 @@ $ docker exec -ti $(docker ps -aqf "name=vector") vector validate Validated ``` +Check the logs from the container running Vector to make sure everything is running correctly: + ``` docker logs -f $(docker ps -aqf "name=vector") ``` -Display topology and metrics in the console, for a local or remote Vector instance +Display Vector's metrics in the console ``` $ docker exec -ti $(docker ps -aqf "name=vector") vector top @@ -229,9 +237,7 @@ digraph { } ``` - - -Observe output log events from source or transform components. Logs are sampled at a specified interval +Also, we can observe output log events from the source or transform components: ``` $ docker exec -ti $(docker ps -aqf "name=vector") vector tap @@ -243,11 +249,8 @@ $ docker exec -ti $(docker ps -aqf "name=vector") vector tap {"appname":"BronzeGamer","facility":"local4","hostname":"names.rsvp","message":"#hugops to everyone who has to deal with this","msgid":"ID347","procid":6651,"severity":"emerg","timestamp":"2025-02-28T00:08:10.006Z","version":2} ``` -## Kafka - -connecting to kafka https://www.baeldung.com/kafka-docker-connection - -List topics +## Kafka setup +Our Vector pipeline will forward message to a Kafka topic, we can list topics to verify that our topic for receiving events: ``` $ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-topics --bootstrap-server kafka:9092 --list @@ -255,7 +258,7 @@ $ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bi logs-2025-02-15 ``` -Describe a topic +We can also list more information about the topic created by Vector: ``` $ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-topics --bootstrap-server kafka:9092 --describe --topic logs-2025-02-15 @@ -264,13 +267,13 @@ Topic: logs-2025-02-15 TopicId: KS47H7xDRV2BhEI7CYyOOg PartitionCount: 1 Replica Topic: logs-2025-02-15 Partition: 0 Leader: 1 Replicas: 1 Isr: 1 Elr: N/A LastKnownElr: N/A ``` -Manually publish messages to the topic +Optionally publish messages to the topic manually for testing: ``` $ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-console-producer --bootstrap-server kafka:9092 --topic logs-2025-02-15 ``` -Read published messages +Read the published messages as they are pushed to Kafka ``` $ docker run -it --rm --network vector_example_network confluentinc/cp-kafka /bin/kafka-console-consumer --bootstrap-server kafka:9092 --topic logs-2025-02-15 --from-beginning @@ -281,6 +284,8 @@ Processed a total of 1 messages ## Elasticsearch +Elasticsearch is another destination for the logs shipped by Vector. We can list the indices and check that we have one created by Vector: + ``` $ curl -s http://localhost:9200/_aliases | jq @@ -293,6 +298,7 @@ $ curl -s http://localhost:9200/_aliases | jq } ``` +We can check the structure of the documents that will be sent by Vector ``` $ curl -s http://localhost:9200/vector-2025.02.28 | jq @@ -390,6 +396,8 @@ $ curl -s http://localhost:9200/vector-2025.02.28 | jq } ``` +We can view the documents inserted in this index by Vector + ``` $ curl -s 'http://localhost:9200/vector-2025.02.28/_search?pretty=true&size=1' @@ -431,10 +439,15 @@ $ curl -s 'http://localhost:9200/vector-2025.02.28/_search?pretty=true&size=1' } ``` +## Wrapping up +Stop the Vector container + ``` $ docker rm -f $(docker ps -aqf "name=vector") ``` +And tear down the infrastucture previously setup with Docker Compose: + ``` $ docker-compose down From c9e48313b52379a1358d60aa07547bb9fa9e5f97 Mon Sep 17 00:00:00 2001 From: dzlab Date: Sun, 4 May 2025 16:23:30 -0700 Subject: [PATCH 8/8] update date --- ...025-02-15-vector-pipeline.md => 2025-02-28-vector-pipeline.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename _posts/{2025-02-15-vector-pipeline.md => 2025-02-28-vector-pipeline.md} (100%) diff --git a/_posts/2025-02-15-vector-pipeline.md b/_posts/2025-02-28-vector-pipeline.md similarity index 100% rename from _posts/2025-02-15-vector-pipeline.md rename to _posts/2025-02-28-vector-pipeline.md