From 4fdbe8be38019645c9ae4ba8389075c2175a0184 Mon Sep 17 00:00:00 2001 From: Miguel Neiva Date: Mon, 4 Aug 2025 12:55:26 +0100 Subject: [PATCH 1/4] fix: last line anchor improvements --- .2ms.yml | 222 +++++++++++- .github/ISSUE_TEMPLATE/bug_report.md | 40 +++ .github/ISSUE_TEMPLATE/feature_request.md | 20 ++ .../scripts/pr-issue-info/get_title_types.py | 13 + .github/scripts/pr-issue-info/issue-fail.md | 14 + .github/scripts/pr-issue-info/title-fail.md | 8 + .github/workflows/validate-issues.yaml | 120 +++++++ engine/detect/baseline.go | 85 +++++ engine/detect/baseline_test.go | 88 +++++ engine/detect/detect.go | 327 +++++++++++++++++ engine/detect/detect_test.go | 332 ++++++++++++++++++ engine/detect/location.go | 79 +++++ engine/detect/location_test.go | 57 +++ engine/detect/utils.go | 38 ++ engine/engine.go | 61 ++-- engine/engine_test.go | 218 +++++++++++- go.mod | 4 +- pkg/scan_test.go | 21 +- pkg/testData/expectedReport.json | 22 +- .../expectedReportWithIgnoredResults.json | 12 +- .../expectedReportWithIgnoredRule.json | 10 +- .../expectedReportWithValidation.json | 22 +- tests/e2e_test.go | 7 +- tests/testData/baseline/baseline.csv | 2 + tests/testData/baseline/baseline.json | 40 +++ tests/testData/baseline/baseline.sarif | 6 + tests/testData/config/allow_aws_re.toml | 9 + tests/testData/config/allow_commit.toml | 9 + .../testData/config/allow_global_aws_re.toml | 8 + tests/testData/config/allow_path.toml | 9 + tests/testData/config/bad_entropy_group.toml | 8 + .../testData/config/entropy_group - Copy.toml | 8 + tests/testData/config/entropy_group.toml | 8 + .../config/escaped_character_group.toml | 8 + .../config/generic_with_py_path - Copy.toml | 36 ++ .../testData/config/generic_with_py_path.toml | 36 ++ tests/testData/config/simple.toml | 222 ++++++++++++ .../multi_line_secret_report.json | 14 +- .../expectedReport/secret_at_end_report.json | 32 +- .../secret_at_end_with_newline_report.json | 32 +- tests/testData/input/multi_line_secret.txt | 3 +- 41 files changed, 2184 insertions(+), 126 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/scripts/pr-issue-info/get_title_types.py create mode 100644 .github/scripts/pr-issue-info/issue-fail.md create mode 100644 .github/scripts/pr-issue-info/title-fail.md create mode 100644 .github/workflows/validate-issues.yaml create mode 100644 engine/detect/baseline.go create mode 100644 engine/detect/baseline_test.go create mode 100644 engine/detect/detect.go create mode 100644 engine/detect/detect_test.go create mode 100644 engine/detect/location.go create mode 100644 engine/detect/location_test.go create mode 100644 engine/detect/utils.go create mode 100644 tests/testData/baseline/baseline.csv create mode 100644 tests/testData/baseline/baseline.json create mode 100644 tests/testData/baseline/baseline.sarif create mode 100644 tests/testData/config/allow_aws_re.toml create mode 100644 tests/testData/config/allow_commit.toml create mode 100644 tests/testData/config/allow_global_aws_re.toml create mode 100644 tests/testData/config/allow_path.toml create mode 100644 tests/testData/config/bad_entropy_group.toml create mode 100644 tests/testData/config/entropy_group - Copy.toml create mode 100644 tests/testData/config/entropy_group.toml create mode 100644 tests/testData/config/escaped_character_group.toml create mode 100644 tests/testData/config/generic_with_py_path - Copy.toml create mode 100644 tests/testData/config/generic_with_py_path.toml create mode 100644 tests/testData/config/simple.toml diff --git a/.2ms.yml b/.2ms.yml index ce0febcd..9d27b838 100644 --- a/.2ms.yml +++ b/.2ms.yml @@ -139,4 +139,224 @@ ignore-result: - f701cd699fcb706453af869581c74a7133a5a317 # value used for testing - b3f999807edd036ffd73f14a2ca43c543bcf366d # value used for testing - 7585409b82ac064a256b70d9e526a011ebfb0411 # value used for testing -- f4d8d834faf54a9551b2a1d937a436bea498506e # value used for testing \ No newline at end of file +- f4d8d834faf54a9551b2a1d937a436bea498506e # value used for testing +- 3bddc52a5b6aff995b3829eeba5f87626f2b7f7f # value used for testing +- 531b49a6f6f43623b8971948c53cc082304dcc2b # value used for testing +- a439aa5bbae6f094100481d0b6d87744524eff15 # value used for testing +- 30f3898634851d3b981defe7e75ded299ee9530e # value used for testing +- df291d1e6c760ae2a6ffec5b25c147c754c4cb77 # value used for testing +- ff4f05a3f1528baa4b4fb91fb15ca676cf13b464 # value used for testing +- 59aa978a876ea2b13fedcbea67e7b55f6ccafede # value used for testing +- 4e6d7ff4d583c4c2cb5a5a5cfe667844e39cff00 # value used for testing +- 1e4a59dc529e21bde7a7a46c42988e890c7a60c0 # value used for testing +- 8609df53e5a89d3d0e416c6c1ecb9880b6975590 # value used for testing +- 695ffd03463749a414e321d715897a7c4c9ad226 # value used for testing +- 060edd23b363a635e117ba0391554c2d579c28fa # value used for testing +- 440c11b6ca9ba4cec96f92955e28d99253ac799d # value used for testing +- 65812acbb0b28020978bfd3518769f9c7c5dcc5d # value used for testing +- 74eaaa8c2376af5dbfc3533e477c6e5ece9938d7 # value used for testing +- 90769e2fc97e5a376dd101cbe54ece3e09de90a8 # value used for testing +- 5f977ef254f7109e1b421c8f7f14e01fa0a19bbf # value used for testing +- 4e9e15fb0b09fc20706797968baa569a9e4c8eba # value used for testing +- 984bd1736c9bb3a002f915002f6ef29132211495 # value used for testing +- e176d40f89b4b24f5c1f370f0958ea2faf2a4083 # value used for testing +- 6c5a7ecbc9d44e6b90092346196cc9cd79889632 # value used for testing +- 5525dfc852da8dfa26fdc37885190faff7b9ee5e # value used for testing +- b8775b699c7ddb9cdea349f5e4190ca733ab986d # value used for testing +- a04eac50810b66a055df05984ab338a8c494a578 # value used for testing +- 152613bc2fe61c0a9e70d4682ce55f4d52bb4965 # value used for testing +- bc582d65e164c9ba42a9e1fa7c8aafe5833da55a # value used for testing +- 660ab191041768525e7a13a153d6c997cdf58a3e # value used for testing +- 3003dd847b2c587e2451a9883f42b73cb32f15e4 # value used for testing +- 1beb3a59c22a5fd8d7c205e404016f7c7d06f262 # value used for testing +- 0609506c0e8435fddbcd37bbcafda1d76b18134a # value used for testing +- 0609506c0e8435fddbcd37bbcafda1d76b18134a # value used for testing +- 618837d3cb846fd621ae50803dec892db150f807 # value used for testing +- b630c78ead3aea9db9f2e40757aaffc5dbb7f5e1 # value used for testing +- 07269afcf8cbb841bc27b38ce59344346257f202 # value used for testing +- 9857e431ac8bd495f850fb3d1fad5455ee7d88d8 # value used for testing +- f59aa7b1240a40448c4ae1cd41f75418b2c59218 # value used for testing +- 2ef051908f6244a3136d12fcf69173b21ae48850 # value used for testing +- ad586867369a33cafc7f9487f66ad3458ce88ec8 # value used for testing +- 4f1d13e1bbebef31175ffe9a8d752609b9edc174 # test/development data from authenticated_url.go +- 99f9ffb901cb72a0282ce32cf7dc050e5225cd81 # test data from multi_line_secret_report.json +- 8149f62cd847f3c4ba5ffc502bdcb8d66e800c7f # test data from multi_line_secret.txt +- 2d2c9ddd21109c7d4909e166c18d570eadc95812 # test data from expectedReportWithIgnoredResults.json +- acf7fa74f45c792f8ab3f701609a0b6ba4283b91 # test data from expectedReport.json +- 05ab8f76aebbca7ea76b904ce4f4f7cf287f16c3 # unit test from extra_test.go +- 1e829f2d6236716cecb4683af3d26236fe4d5467 # test data from expectedReport.json +- deddd58b4aa4999419d6b9046dffa9fffdfd4860 # unit test from secrets_test.go +- 255853e2044119bf502261713e2f892265d4b5c1 # test/development data from rules.go +- ffc22deda44ebb0d4633bed184c5e26e99657084 # unit test from report_test.go +- e53a3a4e8c0665454eb9a4c36eaf040e9317e450 # unit test from report_test.go +- 0f80a32cc85ea5c04b65dbf7d6db6ddb8c2e4d29 # test data from secret_at_end_with_newline.txt +- 53803ee7e880952e926898a434acff4483fec67e # test/development data from generic-key.go +- 4e243aa393011da09ff2bc4a598c2879e967abb8 # test data from expectedReportWithIgnoredResults.json +- bdd20706ea03aa38c8c9f3f87200cf6ab9010a53 # test data from secret_at_end.txt +- 30ea5ee224b162075bf512dbf5854002b6c5e727 # test/development data from generic-key.go +- ca6b0f5b7f055a1ae52c2f34ad5d0409f7625c85 # test data from github-pat.txt +- da610673906f695e3e85bda6fc0a916762f01a70 # unit test from workers_test.go +- b7c3ac03d8a24892a2c4be5810ce73ffdf6ba3ae # unit test from report_test.go +- 0b217706e100e9a05bbaa8427070d181bb2e2465 # unit test from secrets_test.go +- 9c6853ebe9b5e20774224ba6e5ea739191330e53 # unit test from secrets_test.go +- e3b354d102fe73cd4f4016e1ee17e468256d2ae8 # test data from expectedReport.json +- bc457e70eee50d9edd5ec8194bc7749fa95fde34 # test data from github-pat.txt +- a324bc00bebfbd268b1b9e4cddcd095da1193cd2 # unit test from engine_test.go +- 59f8916ff79257c8f86207d6e89767cc8e156814 # test/development data from hardcodedPassword.go +- d766d69fed184582fc0cba1515f9beef7901e7a1 # test/development data from authenticated_url.go +- f86543794ab8c77a54adc91581dcf72bfef6bf78 # test data from multi_line_secret.txt +- 51a6f4e3c7e3a79c9722abb7541b4902098e526b # test/development data from generic-key.go +- 489123e817ab17accc3d47a6d39cdb17e37b6c97 # test data from expectedReport.json +- f8da5c56428cf708773be38269932c46aaf44cd4 # unit test from workers_test.go +- d9207d5fa344d2423e97384f45014c87c0c91d4f # test data from expectedReport.json +- 5e73b4b73bf4a59b11f37066829af01478879067 # test/development data from secrets.go +- aa52405f239a8be1284d933025c557b071b24036 # test/development data from generic-key.go +- d7cf637e896d585946966d95d7b04910569a0191 # test data from expectedReportWithIgnoredResults.json +- 353627158f2e7fa5bb60271cee17da80e5fbba17 # test/development data from generic-key.go +- 4632bbfa56424359e6386b56d05bcec49b16548a # test data from jwt.txt +- 8ad1f44906b41c1f4abf613b30bc2da89949cb7e # test/development data from privateKey.go +- a3a83b7224e7e98e3cca6bd2cd138dbca831e06d # test/development data from hardcodedPassword.go +- 65706aeda7939dca8035f4b0a3446babffc7fcef # test/development data from authenticated_url.go +- ba1f0517b77a5b451d1d55078218cd23d96b686e # test/development data from hardcodedPassword.go +- d696fe501f3860f76cf768c7ebbccc416db6e4d2 # unit test from secrets_test.go +- 4d5a3dd11c58b7b471ec087ab589b1995f09891e # unit test from workers_test.go +- e7bf294c124122a6cf919edbffa40bf6572927b6 # unit test from secrets_test.go +- c5748512948b492f5c07849ae2e69e7e831d36d3 # unit test from secrets_test.go +- 854547fc6e35c0d1f63c0f4d426aebd4d64679fc # test/development data from secrets.go +- 5c2e640a480ca64c809133e1b157fd97960356bf # test data from expectedReport.json +- 1bd84965941175ee61639964adbff6170bea7703 # test data from secret_at_end.txt +- 29a593e19a06c138d63468b8a028696ccdfc7eb2 # test data from secret_at_end_with_newline.txt +- 37bd36009d49856cecfe7df2063e1bdc6d28e707 # test/development data from privateKey.go +- 7c73d41f23ba8e59a1c8d744594dbb54f87197b4 # test/development data from hardcodedPassword.go +- ae0f7e65c291d7f0ea998dfa77485bfc632e5d62 # test data from multi_line_secret.txt +- 09f677494966f740534bc70d6194529a96d59a69 # test data from expectedReport.json +- 670491bf5e759f4c03bf0e47f519deaccdc9ac44 # test data from github-pat.txt +- 10abdf40e0d14bdfb965f9a5be9056a09c7eb40d # test data from expectedReportWithIgnoredResults.json +- 14472805c7de45b2c3b5caf92f19effcba98c386 # test data from jwt.txt +- 33a14f1d1e4a1201a3e0062ebf09079fe8c84714 # unit test from workers_test.go +- 4a2ce0e91c76ae86ead9a17a66ae071258d592f9 # test/development data from privateKey.go +- d1a56c3e06ef27d9dbd0bcb6c38416935ee7aed1 # test data from expectedReport.json +- feb671ccd2fb03b181aa8bb64455441cea4070e0 # test data from expectedReportWithIgnoredRule.json +- 3a2c9e23253b0e5b8a83deaea772d93396e4f350 # test data from expectedReportWithValidation.json +- eebd28cd68ee73b9a1f68b85453575498c12c5b8 # test data from expectedReportWithValidation.json +- 7054f43a4dcd4954c3353800167e41a927934620 # test data from expectedReport.json +- cf3ce6be9ae0c492bafeeac34978dcda9a5fb7b9 # test data from expectedReportWithValidation.json +- 754506f714ffc10628e6fe6dd05affa486d78234 # test data from expectedReportWithValidation.json +- 33269ddd7e8734ef20906f888fcd4c971d1483bc # test data from expectedReportWithIgnoredRule.json +- cfb06617a386e8c6a6fd25cf2dee18d88dfecbdd # test data from expectedReportWithIgnoredRule.json +- 62cf656ad2a1f6e82f31df38ced303c9e860428f # unit test from engine_test.go +- df951402e6372dd78d4ed845e3b89ff6ac8b98ef # test data from multi_line_secret_report.json +- d8901c5a580965cd0c1ad89aec17e94a9286ee01 # test data from multi_line_secret_report.json +- cd186f025418cb008f61dcf881b62926be892f71 # test data from expectedReportWithValidation.json +- cfb862dc1f06113443c9c0b908f6322f139754f6 # test data from expectedReportWithValidation.json +- 14f5cf9d2716f2cec7daf95ab86e1a4feaf7ba41 # test data from expectedReportWithValidation.json +- 8d4ce06e0e27b22695fe1b99b70bc5c6896da00a # test data from secret_at_end_with_newline.txt +- c7510bd9bcfa7887912dd28bd57aab89be736acd # test data from secret_at_end_with_newline.txt +- 2227678335c1fbeb1ce0cdcb6934a9a435d622cb # test/development data from hardcodedPassword.go +- 43b3f5938c8fd428f2c90c514d47c8e6acc4c440 # unit test from secrets_test.go +- 99b01d122f4c834da6846b9ab6e97d8f49a469cf # test data from expectedReportWithIgnoredResults.json +- 9ab011bfabbff53875a01e31d189c11eb3d9e950 # unit test from report_test.go +- c19bd48e0c705dc106fb4349f9013a53260b6000 # test data from secret_at_end.txt +- 2035e72a99cba967cce9d583e184c0014c92f4c1 # unit test from workers_test.go +- b1fabc91396348f5afd2deceeefc83d364ca4032 # test/development data from privateKey.go +- 804b3a704acc6c1de3570c00b28bab5e87d98fb3 # test data from secret_at_end_with_newline.txt +- 93b2258f798cbaa6aee235906f8c2084bd6a7ba1 # test/development data from generic-key.go +- cae0fd35b5a7739e0327891524fb492c3df4e3fb # test/development data from generic-key.go +- b683fb1b338f97bc9f8bedfb0a97a36f20180401 # test data from expectedReport.json +- b855af057434ebd96b15b26bf10fa5ad884441cc # test data from github-pat.txt +- 9316dcfddec5df1eac32708418e911a63893975b # test data from jwt.txt +- d377db8f8a2a3b35dbb7c890263d74228ed02d61 # test/development data from privateKey.go +- 8b0a2007799c48f25a1cbca16c667a4567d929d5 # test data from expectedReport.json +- 1a8ddd4b573b178bb4c600e328bf4f276a093453 # test data from expectedReportWithIgnoredResults.json +- be95aa8800d98dbe449131fd31fcdb7fc2467c02 # test data from expectedReportWithIgnoredResults.json +- ebe80c08713e426daef70870a2c8fd297044ef75 # test/development data from secrets.go +- 8070b246e99c8159005c89407a9fe8b1cbf9306c # test data from expectedReportWithValidation.json +- ee2dfecc5c4fd77432da57b0f696587da8c0c81d # test/development data from hardcodedPassword.go +- c53f00661d588b751750e877257b1ff491657ec5 # unit test from report_test.go +- 3a688342b545a23e6d38ab46bba4b4dd26308aea # test data from multi_line_secret.txt +- e17530df123c834f664cdc406cbbb57e91740bc0 # test data from secret_at_end.txt +- 2d4a9d309ba9a066aaf371b9dfc98b728d05857f # unit test from secrets_test.go +- beb1c909ff7ea6398ca76f862f7de7548f88f9cb # unit test from report_test.go +- 1b3a3d6efd8b92558f9061e93fce4692ae6a91d1 # test/development data from secrets.go +- dd8e8f39052ccd3b25714c680898e507b4828083 # test data from expectedReport.json +- cde73827d5258fbd22221c7fed24a5a045725374 # test data from expectedReport.json +- ee6810a0a0b9585030e60ab113580a84b3edffe3 # test data from expectedReport.json +- 52cb22a0de0fc5fecb1f6939d270cefb4ce9c44b # unit test from workers_test.go +- da8394731123098d9d76cf984c9e708dcba0dcf7 # test data from multi_line_secret_report.json +- e84d7f8fb310400b7354bc41b1af216fc3b575fc # test data from secret_at_end_with_newline.txt +- b876a9e44a321b3ccd4bbd32b9df0697eb6dd650 # test/development data from hardcodedPassword.go +- b8dd1bb0375f602fb7f482db9d1f5e69e6b76d0a # test/development data from authenticated_url.go +- 1420305630f33f23400755298dc5323dcaa8a303 # unit test from secrets_test.go +- ca5ce696e7ccb35a8e149d91af1c6fe74012a592 # unit test from engine_test.go +- b6eba187700446eeabec4c25dbf7b1178984e261 # test data from multi_line_secret_report.json +- 5b0d5d6001aa614c03ff318d891bd0fd0a4b4e02 # unit test from workers_test.go +- 14560534c6804eba03888b4dfed9d6c32f174777 # unit test from engine_test.go +- f58272317c7b1607b1a33c19ae010e545a0c9d15 # test data from expectedReportWithValidation.json +- 259567da22a11edd5165ee4cee2117df894d544c # test data from expectedReportWithValidation.json +- 385e4274a9dc85b1b60afe85457481b01ca1ab7d # test data from expectedReportWithIgnoredRule.json +- 90fff59e417eb8b4e2f7e7482bbc540d9807beb3 # test data from expectedReport.json +- c6be096b2bebd1b3c17e7ee4e0c8575b128581f9 # test data from expectedReport.json +- 8edacde6a8430d8991c5803f8688a275e6894bd1 # unit test from workers_test.go +- a0d8f3c3c995ab194f38cc53d81c6d2ffd971d59 # test/development data from rules.go +- 6381cab5b005f67a4b5cc1ac772344cf8504366c # test data from expectedReportWithIgnoredRule.json +- 94169e4306f2d1c83bd82c74ba73f8e77a3abaed # test data from expectedReportWithValidation.json +- 4f1c8414bb3bff2adc4843e475a245d17db4ab81 # test data from expectedReport.json +- f93ed1ccbbd914f36861efe4c09570bb89ec243c # test data from github-pat.txt +- 379ac3caac4f893f45059495a99b7ecc410bbfb4 # test/development data from privateKey.go +- 84df267a69adf004beb88feab37fe1ecd58fd427 # test/development data from hardcodedPassword.go +- c8feff0e8ec5261039566bed4072f933e87dbc3a # test data from jwt.txt +- c50a8f7e86dfa24988b0f0386206c4dace8cef88 # test data from expectedReportWithValidation.json +- 167ed4df2596183c766f4a3345974d9e41e9309d # test data from expectedReportWithValidation.json +- a31c45df71d91a1e1b00b3d51aff045a04a190f4 # test data from multi_line_secret_report.json +- c0feaad74e976139c8a8f91fc4f4211e83f46022 # test/development data from generic-key.go +- 48e7c1dca0d44704578346c437cbf44fac0b53d4 # test/development data from generic-key.go +- 521658722bb8f70fcf53d68ec1508506771c3223 # unit test from secrets_test.go +- 0a33793962e6d5e0d4e7ea6dcf01fee04ab98a2f # unit test from secrets_test.go +- 94704e6c1bee0eb9bdbadb2f8345b0c8cb646860 # test data from expectedReportWithValidation.json +- f626f0e70b2c857ea969a4d21159f04fd3d1f655 # test data from multi_line_secret.txt +- 07cc783bae9aadf70b98b02d67b060ddfcb5442e # unit test from secrets_test.go +- cb53aa1981c03d071821c20f7547a4143a2c862f # test data from multi_line_secret.txt +- 938b4333e9f0837644e89add6fd3c1184440225b # test/development data from generic-key.go +- 41a10d234379a32091aa04b4e0914475d4ad89f0 # unit test from extra_test.go +- 659717153ad0a563bc8a8b6f5eedd75169badbb5 # test/development data from authenticated_url.go +- 51d8e3f68ff5ccacb22ce27b1bc5cf166f8570e7 # test/development data from authenticated_url.go +- 1b94ae53900be23821fb1af608238c6ba7615e34 # test data from expectedReport.json +- 07167ae672cd487a2ff261021ff8038c3933cc60 # test data from expectedReportWithIgnoredResults.json +- 90900eacdae0dff52cc29537b386b41587b0110c # test data from github-pat.txt +- 233d0c4f93ac448e368b14cc227357b1b91152a8 # test data from expectedReportWithIgnoredRule.json +- d464b0d17e3e815753f42aa7f304156cf8d18df0 # test data from enginetest.go +- 77da2d7528dfceca9e57fc13fd9a9aa29923e794 # test data from enginetest.go +- 6c218984ee92a9591ff2b6549de7eab6c7c03c5e # test data from enginetest.go +- 7d5f35e1380a6489b65fbe15565d60e5397019de # test data from toml's files +- 075ab88033e5094007024de956355c253ebf06da # test data from toml's files +- d79b591ba4cae80f32a1269d9ea15ea54110b678 # test data from toml's files +- cebe152f53b7016c365c63eed80219cf26e4f5b0 # test data from toml's files +- 575e3c49aca4eec6aa381f88652360007b74b138 # test data from toml's files +- 1ffb338e34da94d95fb2e02075112474dfe428dc # test data from toml's files +- c9fa578f7e55343096a83f2a09945111117295de # test data from toml's files +- dada497649a1526011e77ed9f2fdea56a5649172 # test data from toml's files +- b884ec426eff369efb692c790269294c58e4692e # test data from toml's files +- 8cdbfa8df41140e9e89b95a00d47114fafdf9346 # test data from toml's files +- ded52b133cefcf863d0ffe8a4f654792f7b1425b # test data from toml's files +- 90e89edc154dac13b12025374d679e380a3b342b # test data from toml's files +- 83a7ac9ce51c275f23a90e1aaac3b2325582061e # test data from toml's files +- 3445d6af4dfea8abf4e44d3f3dd539f127e0a269 # test data from toml's files +- 026e6c6513f4d73a812a3f843f0aeabbf0916cb7 # test data from toml's files +- 9e50197e35d7f00db64c95d7febb8f05c95a82a2 # test data from toml's files +- 04d1301fa403b5731ff7594d79986e83751e23d9 # test data from toml's files +- 58a283269cf669dc46f0cfde893b3c4709796964 # test data from toml's files +- 1ec3c048d065a365ad027ac804bb0261b131c844 # test data from toml's files +- d093b1eb15d00ed495269328c7d60ae64301c6e9 # test data from toml's files +- db76779649f63f254708ec7862ce1bf152ef389b # test data from toml's files +- 299e8e01252e92e8268fb105c55cba41d61d14c7 # test data from toml's files +- 0a58ae877927fad8c4f26ebefda0e2ea74d83355 # test data from toml's files +- e3a0bf29eb184ef678a34bcb9c39d5b5be16780e # test data from toml's files +- a2bfb7441052f2ffb1ea0d54cbb656760254d062 # test data from toml's files +- 1708bcaf82860032acc133e572bad0c7e9171158 # test data from toml's files +- 8a03adb54b650e46dc131fc489e7b5a26f56ead3 # test data from toml's files +- 999a40b0a6b9d1aa3a0d12a0f13fa80f84500690 # test data from toml's files +- ef543c0bce63bd21a4d27df6a6d2762f31bcf9c7 # test data from toml's files +- 8d42615a78d80100c93cb211b696a9d8dc4cae52 # test data from toml's files +- 11a01f5f59e11504cda26fadc1cde1d7869e346e # test data from toml's files +- 772960543873c38fd49747b70d34302bc7744528 # test data from toml's files +- 152006c3dc742f77b2ee1de6575694a3e4393979 # test data from toml's files \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..5df3d8a3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,40 @@ +--- +name: Bug report +about: Create a bug report to help us improve +title: 'bug(): ' +labels: community, bug +assignees: '' + +--- + +Found a bug? You're welcome to [![GitHub Discussions](https://img.shields.io/badge/chat-discussions-blue.svg?style=flat-square)](https://github.com/Checkmarx/2ms/discussions) + +- **Please make sure to:** + - Describe in details what the problem is + - Attach a log file with relevant data preferably in DEBUG level (`--log-level=DEBUG`) + - Attach the scanned sample files, anonymize the data if the original file cannot be provided + - When attaching files to the issue make sure they are properly formatted + +### Expected Behavior + +(Which results are expected from 2ms?) + +### Actual Behavior + +(Formatted logs and samples helps us to better understand the issue) + +### Steps to Reproduce the Problem + +(Command line arguments and flags used) + +1. step 1 +2. step 2 +3. step 3 + +### Specifications + +(N/A if not applicable) + +- Version: +- Platform: +- Subsystem: diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..980f4f3b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea/feature for this project +title: 'feat(<scope>): <title starting with lowercase letter>' +labels: community, feature request +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when \[...\]) + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/scripts/pr-issue-info/get_title_types.py b/.github/scripts/pr-issue-info/get_title_types.py new file mode 100644 index 00000000..34ac4875 --- /dev/null +++ b/.github/scripts/pr-issue-info/get_title_types.py @@ -0,0 +1,13 @@ +import yaml +import os + +def yaml_to_regex(yaml_file): + with open(yaml_file, 'r') as f: + data = yaml.safe_load(f) + regex = '|'.join(data) + print(f"^({regex})\([a-z]+\): [a-z]") + + +if __name__ == "__main__": + file_path = os.environ['FILE_PATH'] + yaml_to_regex(file_path) \ No newline at end of file diff --git a/.github/scripts/pr-issue-info/issue-fail.md b/.github/scripts/pr-issue-info/issue-fail.md new file mode 100644 index 00000000..6ce5d2f6 --- /dev/null +++ b/.github/scripts/pr-issue-info/issue-fail.md @@ -0,0 +1,14 @@ +# Issue Title Guidelines + +Please, follow the guideline for an issue title: + +For **bug**: + +`bug(<scope>): <title starting with lowercase letter>` + +For **feature request**: + +`feat(<scope>): <title starting with lowercase letter>` + +Thank you! +*2ms Team* diff --git a/.github/scripts/pr-issue-info/title-fail.md b/.github/scripts/pr-issue-info/title-fail.md new file mode 100644 index 00000000..c4f6a9ab --- /dev/null +++ b/.github/scripts/pr-issue-info/title-fail.md @@ -0,0 +1,8 @@ +# Pull Request Title Guideline + +Please, follow the guideline for a pull request title: + +`<type>(<scope>): <title starting with lowercase letter>` + +Thank you! +*2ms Team* diff --git a/.github/workflows/validate-issues.yaml b/.github/workflows/validate-issues.yaml new file mode 100644 index 00000000..78b5f429 --- /dev/null +++ b/.github/workflows/validate-issues.yaml @@ -0,0 +1,120 @@ +name: validate-issues +on: + issues: + types: [opened, edited, reopened] +jobs: + title-check: + runs-on: ubuntu-latest + env: + BODY: ${{ github.event.issue.body }} + TITLE: ${{ github.event.issue.title }} + steps: + - name: Checkout code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false + sparse-checkout: | + .github/scripts/pr-issue-info/issue-fail.md + .github/scripts/pr-issue-info/get_title_types.py + .github/issue-title-types.yaml + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install dependencies + run: python3 -m pip install --upgrade pip pyyaml + - name: Check issue title + env: + FILE_PATH: .github/issue-title-types.yaml + run: | + regex=$(python3 .github/scripts/pr-issue-info/get_title_types.py) + echo "Title regex: $regex" + echo "$TITLE" | grep -Pq "$regex" || (echo "$ERROR_MSG" && echo "TITLE_CHECK_FAILED=true" >> $GITHUB_ENV) + - name: Check for comment tag + if: ${{ env.TITLE_CHECK_FAILED != 'true' }} + run: | + comments=$(curl -s -H "Authorization: token ${{ secrets.TWOMS_BOT_PAT}}" \ + -X GET "https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/comments") + if echo "$comments" | grep -q "title_check"; then + echo "TAG_EXISTS=true" >> $GITHUB_ENV + else + echo "TAG_EXISTS=false" >> $GITHUB_ENV + fi + - name: Delete comment if title is fixed + if: ${{ env.TAG_EXISTS == 'true' }} + uses: thollander/actions-comment-pull-request@e4a76dd2b0a3c2027c3fd84147a67c22ee4c90fa + with: + message: | + Deleting comment, please refresh the page... + comment-tag: title_check + mode: delete + github-token: ${{ secrets.TWOMS_BOT_PAT }} + - name: Add comment if title fails + if: env.TITLE_CHECK_FAILED == 'true' + uses: thollander/actions-comment-pull-request@e4a76dd2b0a3c2027c3fd84147a67c22ee4c90fa + with: + file-path: .github/scripts/pr-issue-info/issue-fail.md + comment-tag: title_check + mode: recreate + create-if-not-exists: true + github-token: ${{ secrets.TWOMS_BOT_PAT }} + - name: Workflow failed + if: env.TITLE_CHECK_FAILED == 'true' + run: exit 1 + labels-check: + runs-on: ubuntu-latest + env: + BODY: ${{ github.event.issue.body }} + LABELS: ${{ toJson(github.event.issue.labels) }} + TITLE: ${{ github.event.issue.title }} + steps: + - name: Checkout code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false + sparse-checkout: | + .github/scripts/pr-issue-info/get_keywords.py + .github/keywords.yaml + - name: Install JQ + run: sudo apt-get install jq + - name: Add feature or feature request label + run: | + if [[ "$TITLE" == feat* ]] || echo "$TITLE $BODY" | grep -iqP "feature request" || echo "$BODY" | grep -iqP "Is your feature request related to a problem? Please describe." || echo "$BODY" | grep -iqP "Describe the solution you'd like" || echo "$BODY" | grep -iqP "Describe alternatives you've considered" || echo "$BODY" | grep -iqP "Additional context"; then + if [[ "$IS_MEMBER" == "true" ]]; then + echo "Adding 'feature' label..." + curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -X POST -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels -d '{"labels": ["feature"]}' + else + echo "Adding 'feature request' label..." + curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -X POST -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels -d '{"labels": ["feature request"]}' + fi + else + if echo "$LABELS" | grep -q "feature request"; then + echo "Removing 'feature request' label..." + curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -X DELETE -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels/feature%20request + elif echo "$LABELS" | grep -q "feature"; then + echo "Removing 'feature' label..." + curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -X DELETE -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels/feature + fi + fi + - name: Add bug label + run: | + if echo "$TITLE $BODY" | grep -iqP "(\\b|_)bugs?(\\b|_)" || echo "$BODY" | grep -iqP "steps to reproduce" || echo "$BODY" | grep -iqP "actual behavior" || echo "$BODY" | grep -iqP "expected behavior"; then + echo "Adding 'bug' label..." + curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -X POST -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels -d '{"labels": ["bug"]}' + else + if echo "$LABELS" | grep -q "bug"; then + echo "Removing 'bug' label..." + curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -X DELETE -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels/bug + fi + fi + - name: Add bug label (extra) + run: | + if echo "$TITLE $BODY" | grep -iqP "(\\b|_)bugs?(\\b|_)" || echo "$BODY" | grep -iqP "steps to reproduce" || echo "$BODY" | grep -iqP "actual behavior" || echo "$BODY" | grep -iqP "expected behavior"; then + echo "Adding 'bug' label (extra)..." + curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -X POST -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels -d '{"labels": ["bug"]}' + else + if echo "$LABELS" | grep -q "bug"; then + echo "Removing 'bug' label (extra)..." + curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -X DELETE -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels/bug + fi + fi \ No newline at end of file diff --git a/engine/detect/baseline.go b/engine/detect/baseline.go new file mode 100644 index 00000000..ee859475 --- /dev/null +++ b/engine/detect/baseline.go @@ -0,0 +1,85 @@ +package detect + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/zricethezav/gitleaks/v8/report" +) + +// IsNew returns true if the finding is not present in the baseline slice. +func IsNew(finding *report.Finding, baseline []report.Finding) bool { + for i := range baseline { + if findingsEqualExceptFingerprint(finding, &baseline[i]) { + return false + } + } + return true +} + +// findingsEqualExceptFingerprint compares all fields except Fingerprint. +func findingsEqualExceptFingerprint(a, b *report.Finding) bool { + return a.Author == b.Author && + a.Commit == b.Commit && + a.Date == b.Date && + a.Description == b.Description && + a.Email == b.Email && + a.EndColumn == b.EndColumn && + a.EndLine == b.EndLine && + a.Entropy == b.Entropy && + a.File == b.File && + // Omit checking Fingerprint - if the format of the fingerprint changes, the users will see unexpected behavior + a.Match == b.Match && + a.Message == b.Message && + a.RuleID == b.RuleID && + a.Secret == b.Secret && + a.StartColumn == b.StartColumn && + a.StartLine == b.StartLine +} + +func LoadBaseline(baselinePath string) ([]report.Finding, error) { + bytes, err := os.ReadFile(baselinePath) + if err != nil { + return nil, fmt.Errorf("could not open %s", baselinePath) + } + + var previousFindings []report.Finding + err = json.Unmarshal(bytes, &previousFindings) + if err != nil { + return nil, fmt.Errorf("the format of the file %s is not supported", baselinePath) + } + + return previousFindings, nil +} + +func (d *Detector) AddBaseline(baselinePath, source string) error { + if baselinePath != "" { + absoluteSource, err := filepath.Abs(source) + if err != nil { + return err + } + + absoluteBaseline, err := filepath.Abs(baselinePath) + if err != nil { + return err + } + + relativeBaseline, err := filepath.Rel(absoluteSource, absoluteBaseline) + if err != nil { + return err + } + + baseline, err := LoadBaseline(baselinePath) + if err != nil { + return err + } + + d.baseline = baseline + baselinePath = relativeBaseline + } + + d.baselinePath = baselinePath + return nil +} diff --git a/engine/detect/baseline_test.go b/engine/detect/baseline_test.go new file mode 100644 index 00000000..50d7fa86 --- /dev/null +++ b/engine/detect/baseline_test.go @@ -0,0 +1,88 @@ +package detect + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/zricethezav/gitleaks/v8/report" +) + +func TestIsNew(t *testing.T) { + tests := []struct { + findings report.Finding + baseline []report.Finding + expect bool + }{ + { + findings: report.Finding{ + Author: "a", + Commit: "0000", + }, + baseline: []report.Finding{ + { + Author: "a", + Commit: "0000", + }, + }, + expect: false, + }, + { + findings: report.Finding{ + Author: "a", + Commit: "0000", + }, + baseline: []report.Finding{ + { + Author: "a", + Commit: "0002", + }, + }, + expect: true, + }, + { + findings: report.Finding{ + Author: "a", + Commit: "0000", + Tags: []string{"a", "b"}, + }, + baseline: []report.Finding{ + { + Author: "a", + Commit: "0000", + Tags: []string{"a", "c"}, + }, + }, + expect: false, // Updated tags doesn't make it a new finding + }, + } + for _, test := range tests { + assert.Equal(t, test.expect, IsNew(&test.findings, test.baseline)) + } +} + +func TestFileLoadBaseline(t *testing.T) { + tests := []struct { + Filename string + ExpectedError error + }{ + { + Filename: "../../tests/testData/baseline/baseline.csv", + ExpectedError: errors.New("the format of the file ../../tests/testData/baseline/baseline.csv is not supported"), + }, + { + Filename: "../../tests/testData/baseline/baseline.sarif", + ExpectedError: errors.New("the format of the file ../../tests/testData/baseline/baseline.sarif is not supported"), + }, + { + Filename: "../../tests/testData/baseline/notfound.json", + ExpectedError: errors.New("could not open ../../tests/testData/baseline/notfound.json"), + }, + } + + for _, test := range tests { + _, err := LoadBaseline(test.Filename) + assert.Equal(t, test.ExpectedError, err) + } +} diff --git a/engine/detect/detect.go b/engine/detect/detect.go new file mode 100644 index 00000000..72f0d16f --- /dev/null +++ b/engine/detect/detect.go @@ -0,0 +1,327 @@ +package detect + +import ( + "context" + "regexp" + "strings" + "sync" + + "github.com/zricethezav/gitleaks/v8/config" + "github.com/zricethezav/gitleaks/v8/report" + + ahocorasick "github.com/BobuSumisu/aho-corasick" + "github.com/fatih/semgroup" + + "github.com/rs/zerolog/log" +) + +const ( + gitleaksAllowSignature = "gitleaks:allow" + chunkSize = 10 * 1_000 // 10kb +) + +// Detector is the main detector struct +type Detector struct { + // Config is the configuration for the detector + Config config.Config + + // Redact is a flag to redact findings. This is exported + // so users using gitleaks as a library can set this flag + // without calling `detector.Start(cmd *cobra.Command)` + Redact uint + + // verbose is a flag to print findings + Verbose bool + + // files larger than this will be skipped + MaxTargetMegaBytes int + + // followSymlinks is a flag to enable scanning symlink files + FollowSymlinks bool + + // NoColor is a flag to disable color output + NoColor bool + + // IgnoreGitleaksAllow is a flag to ignore gitleaks:allow comments. + IgnoreGitleaksAllow bool + + // commitMap is used to keep track of commits that have been scanned. + // This is only used for logging purposes and git scans. + commitMap map[string]bool + + // findingMutex is to prevent concurrent access to the + // findings slice when adding findings. + findingMutex *sync.Mutex + + // findings is a slice of report.Findings. This is the result + // of the detector's scan which can then be used to generate a + // report. + findings []report.Finding + + // prefilter is a ahocorasick struct used for doing efficient string + // matching given a set of words (keywords from the rules in the config) + prefilter ahocorasick.Trie + + // a list of known findings that should be ignored + baseline []report.Finding + + // path to baseline + baselinePath string + + // gitleaksIgnore + gitleaksIgnore map[string]bool + + // Sema (https://github.com/fatih/semgroup) controls the concurrency + Sema *semgroup.Group +} + +// Fragment contains the data to be scanned +type Fragment struct { + // Raw is the raw content of the fragment + Raw string + + // FilePath is the path to the file if applicable + FilePath string + SymlinkFile string + + // CommitSHA is the SHA of the commit if applicable + CommitSHA string + + // newlineIndices is a list of indices of newlines in the raw content. + // This is used to calculate the line location of a finding + newlineIndices [][]int + + // keywords is a map of all the keywords contain within the contents + // of this fragment + keywords map[string]bool +} + +// NewDetector creates a new detector with the given config +func NewDetector(cfg *config.Config) *Detector { + return &Detector{ + commitMap: make(map[string]bool), + gitleaksIgnore: make(map[string]bool), + findingMutex: &sync.Mutex{}, + findings: make([]report.Finding, 0), + Config: *cfg, + prefilter: *ahocorasick.NewTrieBuilder().AddStrings(cfg.Keywords).Build(), + Sema: semgroup.NewGroup(context.Background(), 40), + } +} + +// DetectBytes scans the given bytes and returns a list of findings +func (d *Detector) DetectBytes(content []byte) []report.Finding { + return d.DetectString(string(content)) +} + +// DetectString scans the given string and returns a list of findings +func (d *Detector) DetectString(content string) []report.Finding { + frag := &Fragment{ + Raw: content, + } + return d.Detect(frag) +} + +// Detect scans the given fragment and returns a list of findings +// TODO: Refactor to remove iteration copies (gocritic) +func (d *Detector) Detect(fragment *Fragment) []report.Finding { + var findings []report.Finding + + // initiate fragment keywords + fragment.keywords = make(map[string]bool) + + // check if filepath is allowed + if fragment.FilePath != "" && (d.Config.Allowlist.PathAllowed(fragment.FilePath) || + fragment.FilePath == d.Config.Path || (d.baselinePath != "" && fragment.FilePath == d.baselinePath)) { + return findings + } + + // add newline indices for location calculation in detectRule + + fragment.newlineIndices = regexp.MustCompile("\n|$").FindAllStringIndex(fragment.Raw, -1) + + // build keyword map for prefiltering rules + normalizedRaw := strings.ToLower(fragment.Raw) + matches := d.prefilter.MatchString(normalizedRaw) + for _, m := range matches { + fragment.keywords[normalizedRaw[m.Pos():int(m.Pos())+len(m.Match())]] = true + } + + rulePtrs := make([]*config.Rule, 0, len(d.Config.Rules)) + for i := range d.Config.Rules { + rule := d.Config.Rules[i] + rulePtrs = append(rulePtrs, &rule) + } + for _, rulePtr := range rulePtrs { + if len(rulePtr.Keywords) == 0 { + findings = append(findings, d.detectRule(fragment, rulePtr)...) + continue + } + fragmentContainsKeyword := false + for _, k := range rulePtr.Keywords { + if _, ok := fragment.keywords[strings.ToLower(k)]; ok { + fragmentContainsKeyword = true + } + } + if fragmentContainsKeyword { + findings = append(findings, d.detectRule(fragment, rulePtr)...) + } + } + return findings +} + +// detectRule scans the given fragment for the given rule and returns a list of findings +func (d *Detector) detectRule(fragment *Fragment, rule *config.Rule) []report.Finding { + var findings []report.Finding + + if d.shouldSkipRule(fragment, rule) { + return findings + } + + matchIndices := d.getMatchIndices(fragment, rule) + for _, matchIndex := range matchIndices { + finding, ok := d.buildFinding(fragment, rule, matchIndex) + if !ok { + continue + } + findings = append(findings, finding) + } + return findings +} + +// shouldSkipRule centralizes early return checks for rule applicability +func (d *Detector) shouldSkipRule(fragment *Fragment, rule *config.Rule) bool { + if rule.Allowlist.CommitAllowed(fragment.CommitSHA) || + rule.Allowlist.PathAllowed(fragment.FilePath) { + return true + } + if rule.Path != nil && rule.Regex == nil { + if rule.Path.MatchString(fragment.FilePath) { + return true // handled as finding in buildFinding + } + } else if rule.Path != nil { + if !rule.Path.MatchString(fragment.FilePath) { + return true + } + } + if rule.Regex == nil { + return true + } + if d.MaxTargetMegaBytes > 0 { + rawLength := len(fragment.Raw) / 1000000 + if rawLength > d.MaxTargetMegaBytes { + log.Debug().Msgf("skipping file: %s scan due to size: %d", fragment.FilePath, rawLength) + return true + } + } + return false +} + +// getMatchIndices centralizes the retrieval of match indices +func (d *Detector) getMatchIndices(fragment *Fragment, rule *config.Rule) [][]int { + if rule.Regex == nil { + return nil + } + return rule.Regex.FindAllStringIndex(fragment.Raw, -1) +} + +// buildFinding centralizes the construction and filtering of findings +func (d *Detector) buildFinding(fragment *Fragment, rule *config.Rule, matchIndex []int) (report.Finding, bool) { + secret := strings.Trim(fragment.Raw[matchIndex[0]:matchIndex[1]], "\n") + loc := location(fragment, matchIndex) + if matchIndex[1] > loc.endLineIndex { + loc.endLineIndex = matchIndex[1] + } + finding := report.Finding{ + Description: rule.Description, + File: fragment.FilePath, + SymlinkFile: fragment.SymlinkFile, + RuleID: rule.RuleID, + StartLine: loc.startLine, + EndLine: loc.endLine, + StartColumn: loc.startColumn, + EndColumn: loc.endColumn, + Secret: secret, + Match: secret, + Tags: rule.Tags, + Line: fragment.Raw[loc.startLineIndex:loc.endLineIndex], + } + if strings.Contains(fragment.Raw[loc.startLineIndex:loc.endLineIndex], + gitleaksAllowSignature) && !d.IgnoreGitleaksAllow { + return finding, false + } + groups := rule.Regex.FindStringSubmatch(secret) + if !extractSecretGroup(rule, groups, &secret, &finding) { + return finding, false + } + if d.isFindingAllowlisted(rule, &finding) { + return finding, false + } + if d.isFindingStopworded(rule, &finding) { + return finding, false + } + if !d.isFindingEntropyValid(rule, &finding, secret) { + return finding, false + } + return finding, true +} + +// extractSecretGroup handles secret group extraction logic +func extractSecretGroup(rule *config.Rule, groups []string, secret *string, finding *report.Finding) bool { + if rule.SecretGroup == 0 { + if len(groups) == 2 { + *secret = groups[1] + finding.Secret = *secret + } + } else { + if len(groups) <= rule.SecretGroup || len(groups) == 0 { + return false + } + *secret = groups[rule.SecretGroup] + finding.Secret = *secret + } + return true +} + +// isFindingAllowlisted checks allowlist conditions for a finding +func (d *Detector) isFindingAllowlisted(rule *config.Rule, finding *report.Finding) bool { + allowlistTarget := finding.Secret + switch rule.Allowlist.RegexTarget { + case "match": + allowlistTarget = finding.Match + case "line": + allowlistTarget = finding.Line + } + globalAllowlistTarget := finding.Secret + switch d.Config.Allowlist.RegexTarget { + case "match": + globalAllowlistTarget = finding.Match + case "line": + globalAllowlistTarget = finding.Line + } + return rule.Allowlist.RegexAllowed(allowlistTarget) || + d.Config.Allowlist.RegexAllowed(globalAllowlistTarget) +} + +// isFindingStopworded checks stopword conditions for a finding +func (d *Detector) isFindingStopworded(rule *config.Rule, finding *report.Finding) bool { + return rule.Allowlist.ContainsStopWord(finding.Secret) || + d.Config.Allowlist.ContainsStopWord(finding.Secret) +} + +// isFindingEntropyValid checks entropy and generic rule digit conditions +func (d *Detector) isFindingEntropyValid(rule *config.Rule, finding *report.Finding, secret string) bool { + entropy := shannonEntropy(finding.Secret) + finding.Entropy = float32(entropy) + if rule.Entropy != 0.0 { + if entropy <= rule.Entropy { + return false + } + if strings.HasPrefix(rule.RuleID, "generic") { + if !containsDigit(secret) { + return false + } + } + } + return true +} diff --git a/engine/detect/detect_test.go b/engine/detect/detect_test.go new file mode 100644 index 00000000..e04eaac6 --- /dev/null +++ b/engine/detect/detect_test.go @@ -0,0 +1,332 @@ +package detect + +import ( + "fmt" + "path/filepath" + "testing" + + "github.com/spf13/viper" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/zricethezav/gitleaks/v8/config" + "github.com/zricethezav/gitleaks/v8/report" +) + +const configPath = "../../tests/testData/config/" + +func TestDetect(t *testing.T) { + tests := []struct { + cfgName string + baselinePath string + fragment Fragment + // NOTE: for expected findings, all line numbers will be 0 + // because line deltas are added _after_ the finding is created. + // I.e., if the finding is from a --no-git file, the line number will be + // increase by 1 in DetectFromFiles(). If the finding is from git, + // the line number will be increased by the patch delta. + expectedFindings []report.Finding + wantError error + }{ + { + cfgName: "simple", + fragment: Fragment{ + Raw: `awsToken := \"AKIALALEMEL33243OKIA\ // gitleaks:allow"`, + FilePath: "tmp.go", + }, + expectedFindings: []report.Finding{}, + }, + { + cfgName: "simple", + fragment: Fragment{ + Raw: `awsToken := \ + + \"AKIALALEMEL33243OKIA\ // gitleaks:allow" + + `, + FilePath: "tmp.go", + }, + expectedFindings: []report.Finding{}, + }, + { + cfgName: "simple", + fragment: Fragment{ + Raw: `awsToken := \"AKIALALEMEL33243OKIA\" + + // gitleaks:allow" + + `, + FilePath: "tmp.go", + }, + expectedFindings: []report.Finding{ + { + Description: "AWS Access Key", + Secret: "AKIALALEMEL33243OKIA", + Match: "AKIALALEMEL33243OKIA", + File: "tmp.go", + Line: `awsToken := \"AKIALALEMEL33243OKIA\"`, + RuleID: "aws-access-key", + Tags: []string{"key", "AWS"}, + StartLine: 0, + EndLine: 0, + StartColumn: 15, + EndColumn: 34, + Entropy: 3.1464393, + }, + }, + }, + { + cfgName: "escaped_character_group", + fragment: Fragment{ + Raw: `pypi-AgEIcHlwaS5vcmcAAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAAB`, + FilePath: "tmp.go", + }, + expectedFindings: []report.Finding{ + { + Description: "PyPI upload token", + Secret: "pypi-AgEIcHlwaS5vcmcAAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAAB", + Match: "pypi-AgEIcHlwaS5vcmcAAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAAB", + Line: `pypi-AgEIcHlwaS5vcmcAAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAAB`, + File: "tmp.go", + RuleID: "pypi-upload-token", + Tags: []string{"key", "pypi"}, + StartLine: 0, + EndLine: 0, + StartColumn: 1, + EndColumn: 86, + Entropy: 1.9606875, + }, + }, + }, + { + cfgName: "simple", + fragment: Fragment{ + Raw: `awsToken := \"AKIALALEMEL33243OLIA\"`, + FilePath: "tmp.go", + }, + expectedFindings: []report.Finding{ + { + Description: "AWS Access Key", + Secret: "AKIALALEMEL33243OLIA", + Match: "AKIALALEMEL33243OLIA", + Line: `awsToken := \"AKIALALEMEL33243OLIA\"`, + File: "tmp.go", + RuleID: "aws-access-key", + Tags: []string{"key", "AWS"}, + StartLine: 0, + EndLine: 0, + StartColumn: 15, + EndColumn: 34, + Entropy: 3.0841837, + }, + }, + }, + { + cfgName: "simple", + fragment: Fragment{ + Raw: `export BUNDLE_ENTERPRISE__CONTRIBSYS__COM=cafebabe:deadbeef;`, + FilePath: "tmp.sh", + }, + expectedFindings: []report.Finding{ + { + Description: "Sidekiq Secret", + Match: "BUNDLE_ENTERPRISE__CONTRIBSYS__COM=cafebabe:deadbeef;", + Secret: "cafebabe:deadbeef", + Line: `export BUNDLE_ENTERPRISE__CONTRIBSYS__COM=cafebabe:deadbeef;`, + File: "tmp.sh", + RuleID: "sidekiq-secret", + Tags: []string{}, + Entropy: 2.6098502, + StartLine: 0, + EndLine: 0, + StartColumn: 8, + EndColumn: 60, + }, + }, + }, + { + cfgName: "simple", + fragment: Fragment{ + Raw: `echo hello1; export BUNDLE_ENTERPRISE__CONTRIBSYS__COM="cafebabe:deadbeef" && echo hello2`, + FilePath: "tmp.sh", + }, + expectedFindings: []report.Finding{ + { + Description: "Sidekiq Secret", + Match: "BUNDLE_ENTERPRISE__CONTRIBSYS__COM=\"cafebabe:deadbeef\"", + Secret: "cafebabe:deadbeef", + File: "tmp.sh", + Line: `echo hello1; export BUNDLE_ENTERPRISE__CONTRIBSYS__COM="cafebabe:deadbeef" && echo hello2`, + RuleID: "sidekiq-secret", + Tags: []string{}, + Entropy: 2.6098502, + StartLine: 0, + EndLine: 0, + StartColumn: 21, + EndColumn: 74, + }, + }, + }, + { + cfgName: "simple", + fragment: Fragment{ + Raw: `url = "http://cafeb4b3:d3adb33f@enterprise.contribsys.com:80/path?param1=true¶m2=false#heading1"`, + FilePath: "tmp.sh", + }, + expectedFindings: []report.Finding{ + { + Description: "Sidekiq Sensitive URL", + Match: "http://cafeb4b3:d3adb33f@enterprise.contribsys.com:", + Secret: "cafeb4b3:d3adb33f", + File: "tmp.sh", + Line: `url = "http://cafeb4b3:d3adb33f@enterprise.contribsys.com:80/path?param1=true¶m2=false#heading1"`, + RuleID: "sidekiq-sensitive-url", + Tags: []string{}, + Entropy: 2.984234, + StartLine: 0, + EndLine: 0, + StartColumn: 8, + EndColumn: 58, + }, + }, + }, + { + cfgName: "allow_aws_re", + fragment: Fragment{ + Raw: `awsToken := \"AKIALALEMEL33243OLIA\"`, + FilePath: "tmp.go", + }, + expectedFindings: []report.Finding{}, + }, + { + cfgName: "allow_path", + fragment: Fragment{ + Raw: `awsToken := \"AKIALALEMEL33243OLIA\"`, + FilePath: "tmp.go", + }, + expectedFindings: []report.Finding{}, + }, + { + cfgName: "allow_commit", + fragment: Fragment{ + Raw: `awsToken := \"AKIALALEMEL33243OLIA\"`, + FilePath: "tmp.go", + CommitSHA: "allowthiscommit", + }, + expectedFindings: []report.Finding{}, + }, + { + cfgName: "entropy_group", + fragment: Fragment{ + Raw: `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`, + FilePath: "tmp.go", + }, + expectedFindings: []report.Finding{ + { + Description: "Discord API key", + Match: "Discord_Public_Key = \"e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5\"", + Secret: "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5", + Line: `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`, + File: "tmp.go", + RuleID: "discord-api-key", + Tags: []string{}, + Entropy: 3.7906237, + StartLine: 0, + EndLine: 0, + StartColumn: 7, + EndColumn: 93, + }, + }, + }, + { + cfgName: "generic_with_py_path", + fragment: Fragment{ + Raw: `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`, + FilePath: "tmp.go", + }, + expectedFindings: []report.Finding{}, + }, + { + cfgName: "generic_with_py_path", + fragment: Fragment{ + Raw: `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`, + FilePath: "tmp.py", + }, + expectedFindings: []report.Finding{ + { + Description: "Generic API Key", + Match: "Key = \"e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5\"", + Secret: "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5", + Line: `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`, + File: "tmp.py", + RuleID: "generic-api-key", + Tags: []string{}, + Entropy: 3.7906237, + StartLine: 0, + EndLine: 0, + StartColumn: 22, + EndColumn: 93, + }, + }, + }, + { + cfgName: "bad_entropy_group", + fragment: Fragment{ + Raw: `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`, + FilePath: "tmp.go", + }, + expectedFindings: []report.Finding{}, + wantError: fmt.Errorf("Discord API key invalid regex secret group 5, max regex secret group 3"), + }, + { + cfgName: "simple", + fragment: Fragment{ + Raw: `awsToken := \"AKIALALEMEL33243OLIA\"`, + FilePath: filepath.Join(configPath, "simple.toml"), + }, + expectedFindings: []report.Finding{}, + }, + { + cfgName: "allow_global_aws_re", + fragment: Fragment{ + Raw: `awsToken := \"AKIALALEMEL33243OLIA\"`, + FilePath: "tmp.go", + }, + expectedFindings: []report.Finding{}, + }, + { + cfgName: "generic_with_py_path", + fragment: Fragment{ + Raw: `const Discord_Public_Key = "load2523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`, + FilePath: "tmp.py", + }, + expectedFindings: []report.Finding{}, + }, + } + + for i, tt := range tests { + viper.Reset() + viper.AddConfigPath(configPath) + viper.SetConfigName(tt.cfgName) + viper.SetConfigType("toml") + err := viper.ReadInConfig() + require.NoError(t, err) + + var vc config.ViperConfig + err = viper.Unmarshal(&vc) + require.NoError(t, err) + cfg, err := vc.Translate() + cfg.Path = filepath.Join(configPath, tt.cfgName+".toml") + assert.Equal(t, tt.wantError, err) + d := NewDetector(&cfg) + d.baselinePath = tt.baselinePath + + findings := d.Detect(&tt.fragment) + if len(findings) != len(tt.expectedFindings) { + t.Logf("[DEBUG] Test case #%d: cfgName=%s, FilePath=%s", i, tt.cfgName, tt.fragment.FilePath) + t.Logf("[DEBUG] Expected findings: %+v", tt.expectedFindings) + t.Logf("[DEBUG] Actual findings: %+v", findings) + } + assert.ElementsMatch(t, tt.expectedFindings, findings) + } +} diff --git a/engine/detect/location.go b/engine/detect/location.go new file mode 100644 index 00000000..2b9acbda --- /dev/null +++ b/engine/detect/location.go @@ -0,0 +1,79 @@ +package detect + +// Location represents a location in a file +type Location struct { + startLine int + endLine int + startColumn int + endColumn int + startLineIndex int + endLineIndex int +} + +func location(fragment *Fragment, matchIndex []int) Location { + var ( + prevNewLine int + location Location + lineSet bool + _lineNum int + ) + + start := matchIndex[0] + end := matchIndex[1] + + // default startLineIndex to 0 + location.startLineIndex = 0 + + // Fixes: https://github.com/zricethezav/gitleaks/issues/1037 + // When a fragment does NOT have any newlines, a default "newline" + // will be counted to make the subsequent location calculation logic work + // for fragments will no newlines. + if len(fragment.newlineIndices) == 0 { + fragment.newlineIndices = [][]int{ + {len(fragment.Raw), len(fragment.Raw) + 1}, + } + } + + for lineNum, pair := range fragment.newlineIndices { + _lineNum = lineNum + newLineByteIndex := pair[0] + if prevNewLine <= start && start < newLineByteIndex { + lineSet = true + location.startLine = lineNum + location.endLine = lineNum + location.startColumn = (start - prevNewLine) + 1 // +1 because counting starts at 1 + location.startLineIndex = prevNewLine + location.endLineIndex = newLineByteIndex + } + if prevNewLine < end && end <= newLineByteIndex { + location.endLine = lineNum + location.endColumn = (end - prevNewLine) + location.endLineIndex = newLineByteIndex + } + prevNewLine = pair[0] + } + + if !lineSet { + // if lines never get set then that means the secret is most likely + // on the last line of the diff output and the diff output does not have + // a newline + location.startColumn = (start - prevNewLine) + 1 // +1 because counting starts at 1 + location.endColumn = (end - prevNewLine) + location.startLine = _lineNum + 1 + location.endLine = _lineNum + 1 + + // search for new line byte index + i := 0 + for end+i < len(fragment.Raw) { + if fragment.Raw[end+i] == '\n' { + break + } + if fragment.Raw[end+i] == '\r' { + break + } + i++ + } + location.endLineIndex = end + i + } + return location +} diff --git a/engine/detect/location_test.go b/engine/detect/location_test.go new file mode 100644 index 00000000..5efed2dc --- /dev/null +++ b/engine/detect/location_test.go @@ -0,0 +1,57 @@ +package detect + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// TestGetLocation tests the getLocation function. +func TestGetLocation(t *testing.T) { + tests := []struct { + linePairs [][]int + start int + end int + wantLocation Location + }{ + { + linePairs: [][]int{ + {0, 39}, + {40, 55}, + {56, 57}, + }, + start: 35, + end: 38, + wantLocation: Location{ + startLine: 1, + startColumn: 36, + endLine: 1, + endColumn: 38, + startLineIndex: 0, + endLineIndex: 40, + }, + }, + { + linePairs: [][]int{ + {0, 39}, + {40, 55}, + {56, 57}, + }, + start: 40, + end: 44, + wantLocation: Location{ + startLine: 2, + startColumn: 1, + endLine: 2, + endColumn: 4, + startLineIndex: 40, + endLineIndex: 56, + }, + }, + } + + for _, test := range tests { + loc := location(&Fragment{newlineIndices: test.linePairs}, []int{test.start, test.end}) + assert.Equal(t, test.wantLocation, loc) + } +} diff --git a/engine/detect/utils.go b/engine/detect/utils.go new file mode 100644 index 00000000..47c17098 --- /dev/null +++ b/engine/detect/utils.go @@ -0,0 +1,38 @@ +package detect + +import ( + "math" +) + +// shannonEntropy calculates the entropy of data using the formula defined here: +// https://en.wiktionary.org/wiki/Shannon_entropy +// Another way to think about what this is doing is calculating the number of bits +// needed to on average encode the data. So, the higher the entropy, the more random the data, the +// more bits needed to encode that data. +func shannonEntropy(data string) (entropy float64) { + if data == "" { + return 0 + } + charCounts := make(map[rune]int) + for _, char := range data { + charCounts[char]++ + } + + invLength := 1.0 / float64(len(data)) + for _, count := range charCounts { + freq := float64(count) * invLength + entropy -= freq * math.Log2(freq) + } + + return entropy +} + +func containsDigit(s string) bool { + for _, c := range s { + switch c { + case '1', '2', '3', '4', '5', '6', '7', '8', '9': + return true + } + } + return false +} diff --git a/engine/engine.go b/engine/engine.go index 06f49baf..40abad06 100644 --- a/engine/engine.go +++ b/engine/engine.go @@ -5,7 +5,9 @@ package engine import ( "bufio" "context" - "crypto/sha1" //nolint:gosec // SHA1 is used for ID generation only, not for security + "crypto/hkdf" + "crypto/sha256" + "encoding/hex" "fmt" "io" "os" @@ -14,6 +16,7 @@ import ( "text/tabwriter" "github.com/checkmarx/2ms/v3/engine/chunk" + "github.com/checkmarx/2ms/v3/engine/detect" "github.com/checkmarx/2ms/v3/engine/linecontent" "github.com/checkmarx/2ms/v3/engine/rules" "github.com/checkmarx/2ms/v3/engine/score" @@ -24,7 +27,6 @@ import ( "github.com/rs/zerolog/log" "github.com/spf13/cobra" "github.com/zricethezav/gitleaks/v8/config" - "github.com/zricethezav/gitleaks/v8/detect" "github.com/zricethezav/gitleaks/v8/report" ) @@ -54,7 +56,6 @@ type ctxKey string const ( customRegexRuleIdFormat = "custom-regex-%d" - CxFileEndMarker = ";cx-file-end" totalLinesKey ctxKey = "totalLines" linesInChunkKey ctxKey = "linesInChunk" ) @@ -89,7 +90,7 @@ func Init(engineConfig EngineConfig) (IEngine, error) { //nolint:gocritic // hug cfg.Rules = rulesToBeApplied cfg.Keywords = keywords - detector := detect.NewDetector(cfg) + detector := detect.NewDetector(&cfg) detector.MaxTargetMegaBytes = engineConfig.MaxTargetMegabytes return &Engine{ @@ -213,11 +214,10 @@ func (e *Engine) detectSecrets( secrets chan *secrets.Secret, pluginName string, ) error { - fragment.Raw += CxFileEndMarker + "\n" - - values := e.detector.Detect(*fragment) + values := e.detector.Detect(fragment) for _, value := range values { //nolint:gocritic // rangeValCopy: value is used immediately - secret, buildErr := buildSecret(ctx, item, value, pluginName) + isLastLine := value.EndLine == strings.Count(fragment.Raw, "\n") + secret, buildErr := buildSecret(ctx, item, value, pluginName, isLastLine) if buildErr != nil { return fmt.Errorf("failed to build secret: %w", buildErr) } @@ -318,32 +318,33 @@ func buildSecret( item plugins.ISourceItem, value report.Finding, //nolint:gocritic // hugeParam: value is heavy but needed pluginName string, + isLastLine bool, ) (*secrets.Secret, error) { gitInfo := item.GetGitInfo() - itemId := getFindingId(item, value) + itemId, err := getFindingId(item, &value) + if err != nil { + return nil, fmt.Errorf("failed to get finding ID: %w", err) + } + startLine, endLine, err := getStartAndEndLines(ctx, pluginName, gitInfo, value) if err != nil { return nil, fmt.Errorf("failed to get start and end lines for source %s: %w", item.GetSource(), err) } - - value.Line = strings.TrimSuffix(value.Line, CxFileEndMarker) hasNewline := strings.HasPrefix(value.Line, "\n") - if hasNewline { - value.Line = strings.TrimPrefix(value.Line, "\n") + value.Line = strings.Trim(value.Line, "\n\r") } - value.Line = strings.ReplaceAll(value.Line, "\r", "") lineContent, err := linecontent.GetLineContent(value.Line, value.Secret) if err != nil { return nil, fmt.Errorf("failed to get line content for source %s: %w", item.GetSource(), err) } - adjustedStartColumn := value.StartColumn - adjustedEndColumn := value.EndColumn if hasNewline { - adjustedStartColumn-- - adjustedEndColumn-- + value.StartColumn-- + if !isLastLine { + value.EndColumn-- + } } secret := &secrets.Secret{ @@ -351,9 +352,9 @@ func buildSecret( Source: item.GetSource(), RuleID: value.RuleID, StartLine: startLine, - StartColumn: adjustedStartColumn, + StartColumn: value.StartColumn, EndLine: endLine, - EndColumn: adjustedEndColumn, + EndColumn: value.EndColumn, Value: value.Secret, LineContent: lineContent, RuleDescription: value.Description, @@ -361,10 +362,22 @@ func buildSecret( return secret, nil } -func getFindingId(item plugins.ISourceItem, finding report.Finding) string { //nolint:gocritic // hugeParam: finding is heavy but needed - idParts := []string{item.GetID(), finding.RuleID, finding.Secret} - sha := sha1.Sum([]byte(strings.Join(idParts, "-"))) //nolint:gosec // SHA1 is used for ID generation only - return fmt.Sprintf("%x", sha) +func getFindingId(item plugins.ISourceItem, finding *report.Finding) (string, error) { + // Context includes only non-sensitive metadata + context := fmt.Sprintf("finding:%s:%s", item.GetID(), finding.RuleID) + + // Use secret hash as input key material + // to avoid errors in FIPS 140-only mode + // which requires the use of keys longer than 112 bits + secretHash := sha256.Sum256([]byte(finding.Secret)) + + // Use the newer HKDF API - Key function does both extract and expand + id, err := hkdf.Key(sha256.New, secretHash[:], nil, context, 20) + if err != nil { + return "", fmt.Errorf("HKDF derivation failed: %w", err) + } + + return hex.EncodeToString(id), nil } func getStartAndEndLines( diff --git a/engine/engine_test.go b/engine/engine_test.go index 9b8c8830..3789aab0 100644 --- a/engine/engine_test.go +++ b/engine/engine_test.go @@ -9,10 +9,12 @@ import ( "path/filepath" "strings" "testing" + "time" "go.uber.org/mock/gomock" "github.com/checkmarx/2ms/v3/engine/chunk" + "github.com/checkmarx/2ms/v3/engine/detect" "github.com/checkmarx/2ms/v3/engine/rules" "github.com/checkmarx/2ms/v3/engine/semaphore" "github.com/checkmarx/2ms/v3/lib/secrets" @@ -22,7 +24,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/zricethezav/gitleaks/v8/config" - "github.com/zricethezav/gitleaks/v8/detect" "github.com/zricethezav/gitleaks/v8/report" ) @@ -157,13 +158,13 @@ func TestSecrets(t *testing.T) { ShouldFind: false, }, { - Content: "--set imagePullSecretJfrog.password=AKCp8kqqfQbYifrbyvqusjyk6N3QKprXTv9B8HTitLbJzXT1kW7dDticXTsJpCrbqtizAwK4D \\", - Name: "JFROG Secret with keyword (real example)", + Content: "--docker-password=AKCp8kqX8yeKBTqgm2XExHsp8yVdJn6SAgQmS1nJMfMDmzxEqX74rUGhedaWu7Eovid3VsMwb", + Name: "JFROG Secret as kubectl argument", ShouldFind: true, }, { - Content: "--docker-password=AKCp8kqX8yeKBTqgm2XExHsp8yVdJn6SAgQmS1nJMfMDmzxEqX74rUGhedaWu7Eovid3VsMwb", - Name: "JFROG Secret as kubectl argument", + Content: "--set imagePullSecretJfrog.password=AKCp8kqqfQbYifrbyvqusjyk6N3QKprXTv9B8HTitLbJzXT1kW7dDticXTsJpCrbqtizAwK4D ", + Name: "JFROG Secret with keyword (real example)", ShouldFind: true, }, } @@ -180,19 +181,29 @@ func TestSecrets(t *testing.T) { } t.Run(name, func(t *testing.T) { fmt.Printf("Start test %s", name) - secretsChan := make(chan *secrets.Secret, 1) + secretsChan := make(chan *secrets.Secret, 2) err = detector.DetectFragment(item{content: &secret.Content}, secretsChan, fsPlugin.GetName()) if err != nil { return } - close(secretsChan) - s := <-secretsChan + var s *secrets.Secret + select { + case s = <-secretsChan: + case <-time.After(2 * time.Second): + // Fail if timeout is reached, unless we expect not to find a secret + if secret.ShouldFind { + t.Fatal("timeout waiting for secret on channel") + } + } if secret.ShouldFind { - assert.Equal(t, s.LineContent, secret.Content) + assert.NotNil(t, s, "expected to find a secret, but got nil") + if s != nil { + assert.Equal(t, secret.Content, s.LineContent) + } } else { - assert.Nil(t, s) + assert.Nil(t, s, "expected not to find a secret, but got one") } }) } @@ -315,7 +326,7 @@ func TestDetectFile(t *testing.T) { cfg.Rules = make(map[string]config.Rule) cfg.Keywords = []string{} - detector := detect.NewDetector(cfg) + detector := detect.NewDetector(&cfg) detector.MaxTargetMegaBytes = tc.maxMegabytes engine := &Engine{ rules: nil, @@ -415,7 +426,7 @@ func TestDetectChunks(t *testing.T) { cfg.Rules = make(map[string]config.Rule) cfg.Keywords = []string{} - detector := detect.NewDetector(cfg) + detector := detect.NewDetector(&cfg) engine := &Engine{ rules: nil, @@ -512,7 +523,7 @@ func TestSecretsColumnIndex(t *testing.T) { EndLine: 1, } - secret, err := buildSecret(context.Background(), mockItem, finding, fsPlugin.GetName()) + secret, err := buildSecret(context.Background(), mockItem, finding, fsPlugin.GetName(), false) require.NoError(t, err) assert.Equal(t, tt.expectedLineContent, secret.LineContent) @@ -522,6 +533,187 @@ func TestSecretsColumnIndex(t *testing.T) { } } +func TestGetFindingId(t *testing.T) { + // Test data setup + mockItem1 := &item{ + id: "test-item-1", + source: "test-source-1.txt", + } + + mockItem2 := &item{ + id: "test-item-2", + source: "test-source-2.txt", + } + + finding1 := &report.Finding{ + RuleID: "rule-id-1", + Secret: "my-secret-value", + } + + finding2 := &report.Finding{ + RuleID: "rule-id-2", + Secret: "my-secret-value", + } + + finding3 := &report.Finding{ + RuleID: "rule-id-1", + Secret: "different-secret-value", + } + + tests := []struct { + name string + item plugins.ISourceItem + finding *report.Finding + description string + }{ + { + name: "same_inputs_consistent_id", + item: mockItem1, + finding: finding1, + description: "Same inputs should always produce the same ID", + }, + { + name: "same_inputs_consistent_id_duplicate", + item: mockItem1, + finding: finding1, + description: "Duplicate test to verify consistency", + }, + { + name: "different_item_id_different_result", + item: mockItem2, + finding: finding1, + description: "Different item ID should produce different result", + }, + { + name: "different_rule_id_different_result", + item: mockItem1, + finding: finding2, + description: "Different rule ID should produce different result", + }, + { + name: "different_secret_different_result", + item: mockItem1, + finding: finding3, + description: "Different secret should produce different result", + }, + { + name: "empty_item_id", + item: &item{ + id: "", + source: "test-source.txt", + }, + finding: finding1, + description: "Empty item ID should still work", + }, + { + name: "empty_rule_id", + item: mockItem1, + finding: &report.Finding{ + RuleID: "", + Secret: "my-secret-value", + }, + description: "Empty rule ID should still work", + }, + { + name: "empty_secret", + item: mockItem1, + finding: &report.Finding{ + RuleID: "rule-id-1", + Secret: "", + }, + description: "Empty secret should still work", + }, + { + name: "unicode_characters", + item: &item{ + id: "test-item-unicodeπŸ”‘πŸš€πŸ”", + source: "test-source-πŸ”‘πŸš€πŸ”.txt", + }, + finding: &report.Finding{ + RuleID: "rule-unicode", + Secret: "secret-with-unicodeπŸ”‘πŸš€πŸ”", + }, + description: "Unicode characters should be handled properly", + }, + { + name: "special_characters", + item: &item{ + id: "test-item-special-!@#$%^&*()", + source: "test-source-special.txt", + }, + finding: &report.Finding{ + RuleID: "rule-special-!@#$%", + Secret: "secret-with-special-chars-[]{}|\\:;\"'<>?,./", + }, + description: "Special characters should be handled properly", + }, + { + name: "very_long_values", + item: &item{ + id: strings.Repeat("a", 1000), + source: "test-source-long.txt", + }, + finding: &report.Finding{ + RuleID: strings.Repeat("b", 1000), + Secret: strings.Repeat("c", 10000), + }, + description: "Very long values should be handled properly", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + id, err := getFindingId(tt.item, tt.finding) + + assert.NoError(t, err, tt.description) + assert.NotEmpty(t, id, "ID should not be empty") + assert.Len(t, id, 40, "ID should be 40 characters long (20 bytes as hex)") + assert.Regexp(t, "^[a-f0-9]+$", id, "ID should be valid lowercase hex") + }) + } + + t.Run("multiple_calls_consistency", func(t *testing.T) { + item := mockItem1 + finding := finding1 + + firstID, err := getFindingId(item, finding) + require.NoError(t, err) + + for i := 1; i < 10; i++ { + id, err := getFindingId(item, finding) + require.NoError(t, err) + assert.Equal(t, firstID, id, "Multiple calls with same inputs should produce identical results") + } + }) + + // Test that different combinations produce different IDs + t.Run("different_combinations_produce_different_ids", func(t *testing.T) { + combinations := []struct { + item plugins.ISourceItem + finding *report.Finding + }{ + {mockItem1, finding1}, + {mockItem1, finding2}, + {mockItem1, finding3}, + {mockItem2, finding1}, + {mockItem2, finding2}, + {mockItem2, finding3}, + } + + seenIDs := make(map[string]bool) + + for i, combo := range combinations { + id, err := getFindingId(combo.item, combo.finding) + require.NoError(t, err, "Combination %d should not error", i) + + assert.False(t, seenIDs[id], "ID %s should be unique, but was seen before for combination %d", id, i) + seenIDs[id] = true + } + + assert.Len(t, seenIDs, len(combinations), "All combinations should produce unique IDs") + }) +} + type item struct { content *string id string diff --git a/go.mod b/go.mod index 64c84036..015bcbd3 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,9 @@ module github.com/checkmarx/2ms/v3 go 1.24.4 require ( + github.com/BobuSumisu/aho-corasick v1.0.3 github.com/bwmarrin/discordgo v0.27.1 + github.com/fatih/semgroup v1.2.0 github.com/gitleaks/go-gitdiff v0.9.0 github.com/h2non/filetype v1.1.3 github.com/rs/zerolog v1.32.0 @@ -22,11 +24,9 @@ require ( ) require ( - github.com/BobuSumisu/aho-corasick v1.0.3 // indirect github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect github.com/charmbracelet/lipgloss v0.7.1 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect - github.com/fatih/semgroup v1.2.0 // indirect github.com/fsnotify/fsnotify v1.8.0 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-viper/mapstructure/v2 v2.3.0 // indirect diff --git a/pkg/scan_test.go b/pkg/scan_test.go index eab8d74b..16f01296 100644 --- a/pkg/scan_test.go +++ b/pkg/scan_test.go @@ -3,14 +3,15 @@ package scanner import ( "encoding/json" "fmt" + "os" + "testing" + "github.com/checkmarx/2ms/v3/cmd" "github.com/checkmarx/2ms/v3/engine/rules" "github.com/checkmarx/2ms/v3/lib/reporting" "github.com/checkmarx/2ms/v3/lib/secrets" "github.com/checkmarx/2ms/v3/lib/utils" "github.com/stretchr/testify/assert" - "os" - "testing" ) const ( @@ -113,8 +114,8 @@ func TestScan(t *testing.T) { testScanner := NewScanner() actualReport, err := testScanner.Scan(scanItems, ScanConfig{ IgnoreResultIds: []string{ - "a0cd293e6e122a1c7384d5a56781e39ba350c54b", - "40483a2b07fa3beaf234d1a0b5d0931d7b7ae9f7", + "dac14c6111d3a02a23c4fc31ee4759387a7395cd", + "0e9588a11a5355676b9baaa6e722211b8fea767d", }, }) assert.NoError(t, err, "scanner encountered an error") @@ -300,8 +301,8 @@ func TestScan(t *testing.T) { // scan 2 actualReport, err = testScanner.Scan(scanItems, ScanConfig{ IgnoreResultIds: []string{ - "a0cd293e6e122a1c7384d5a56781e39ba350c54b", - "40483a2b07fa3beaf234d1a0b5d0931d7b7ae9f7", + "dac14c6111d3a02a23c4fc31ee4759387a7395cd", + "0e9588a11a5355676b9baaa6e722211b8fea767d", }, }) assert.NoError(t, err, "scanner encountered an error") @@ -432,8 +433,8 @@ func TestScanDynamic(t *testing.T) { testScanner := NewScanner() actualReport, err := testScanner.ScanDynamic(itemsIn, ScanConfig{ IgnoreResultIds: []string{ - "a0cd293e6e122a1c7384d5a56781e39ba350c54b", - "40483a2b07fa3beaf234d1a0b5d0931d7b7ae9f7", + "dac14c6111d3a02a23c4fc31ee4759387a7395cd", + "0e9588a11a5355676b9baaa6e722211b8fea767d", }, }) assert.NoError(t, err, "scanner encountered an error") @@ -633,8 +634,8 @@ func TestScanDynamic(t *testing.T) { // scan 2 actualReport, err = testScanner.ScanDynamic(itemsIn2, ScanConfig{ IgnoreResultIds: []string{ - "a0cd293e6e122a1c7384d5a56781e39ba350c54b", - "40483a2b07fa3beaf234d1a0b5d0931d7b7ae9f7", + "dac14c6111d3a02a23c4fc31ee4759387a7395cd", + "0e9588a11a5355676b9baaa6e722211b8fea767d", }, }) assert.NoError(t, err, "scanner encountered an error") diff --git a/pkg/testData/expectedReport.json b/pkg/testData/expectedReport.json index 27202d45..d2399595 100644 --- a/pkg/testData/expectedReport.json +++ b/pkg/testData/expectedReport.json @@ -2,8 +2,8 @@ "totalItemsScanned" : 3, "totalSecretsFound" : 6, "results" : { - "40483a2b07fa3beaf234d1a0b5d0931d7b7ae9f7" : [ { - "id" : "40483a2b07fa3beaf234d1a0b5d0931d7b7ae9f7", + "0e9588a11a5355676b9baaa6e722211b8fea767d" : [ { + "id" : "0e9588a11a5355676b9baaa6e722211b8fea767d", "source" : "testData/secrets/github-pat.txt", "ruleId" : "github-pat", "startLine" : 1, @@ -15,8 +15,8 @@ "ruleDescription" : "Uncovered a GitHub Personal Access Token, potentially leading to unauthorized repository access and sensitive content exposure.", "cvssScore" : 8.2 } ], - "a0cd293e6e122a1c7384d5a56781e39ba350c54b" : [ { - "id" : "a0cd293e6e122a1c7384d5a56781e39ba350c54b", + "dac14c6111d3a02a23c4fc31ee4759387a7395cd" : [ { + "id" : "dac14c6111d3a02a23c4fc31ee4759387a7395cd", "source" : "testData/secrets/jwt.txt", "ruleId" : "jwt", "startLine" : 0, @@ -34,7 +34,7 @@ }, "cvssScore" : 8.2 }, { - "id" : "a0cd293e6e122a1c7384d5a56781e39ba350c54b", + "id" : "dac14c6111d3a02a23c4fc31ee4759387a7395cd", "source" : "testData/secrets/jwt.txt", "ruleId" : "jwt", "startLine" : 1, @@ -52,8 +52,8 @@ }, "cvssScore" : 8.2 } ], - "6949272451f77dc4a38d5f35d583cf56023cd2c1" : [ { - "id" : "6949272451f77dc4a38d5f35d583cf56023cd2c1", + "f42942cf20440b920aa2730fa7f3aa607f379aa5" : [ { + "id" : "f42942cf20440b920aa2730fa7f3aa607f379aa5", "source" : "testData/secrets/github-pat.txt", "ruleId" : "github-pat", "startLine" : 0, @@ -65,8 +65,8 @@ "ruleDescription" : "Uncovered a GitHub Personal Access Token, potentially leading to unauthorized repository access and sensitive content exposure.", "cvssScore" : 8.2 } ], - "f29abe9eacc233a8e5e9c7762bca48589d9c76a2" : [ { - "id" : "f29abe9eacc233a8e5e9c7762bca48589d9c76a2", + "4b0fb9bf4c96bd11404f2a3b187acbb621d8ca0c" : [ { + "id" : "4b0fb9bf4c96bd11404f2a3b187acbb621d8ca0c", "source" : "testData/secrets/jwt.txt", "ruleId" : "jwt", "startLine" : 0, @@ -84,8 +84,8 @@ }, "cvssScore" : 8.2 } ], - "fc17c755f40062dcb3f16eb6299f9afc7eccbc56" : [ { - "id" : "fc17c755f40062dcb3f16eb6299f9afc7eccbc56", + "e2cfea40ef825d8b520329982641deebc9c81418" : [ { + "id" : "e2cfea40ef825d8b520329982641deebc9c81418", "source" : "testData/secrets/github-pat.txt", "ruleId" : "github-pat", "startLine" : 0, diff --git a/pkg/testData/expectedReportWithIgnoredResults.json b/pkg/testData/expectedReportWithIgnoredResults.json index 3d99d9dc..96d2ef06 100644 --- a/pkg/testData/expectedReportWithIgnoredResults.json +++ b/pkg/testData/expectedReportWithIgnoredResults.json @@ -2,8 +2,8 @@ "totalItemsScanned" : 3, "totalSecretsFound" : 3, "results" : { - "6949272451f77dc4a38d5f35d583cf56023cd2c1" : [ { - "id" : "6949272451f77dc4a38d5f35d583cf56023cd2c1", + "f42942cf20440b920aa2730fa7f3aa607f379aa5" : [ { + "id" : "f42942cf20440b920aa2730fa7f3aa607f379aa5", "source" : "testData/secrets/github-pat.txt", "ruleId" : "github-pat", "startLine" : 0, @@ -15,8 +15,8 @@ "ruleDescription" : "Uncovered a GitHub Personal Access Token, potentially leading to unauthorized repository access and sensitive content exposure.", "cvssScore" : 8.2 } ], - "f29abe9eacc233a8e5e9c7762bca48589d9c76a2" : [ { - "id" : "f29abe9eacc233a8e5e9c7762bca48589d9c76a2", + "4b0fb9bf4c96bd11404f2a3b187acbb621d8ca0c" : [ { + "id" : "4b0fb9bf4c96bd11404f2a3b187acbb621d8ca0c", "source" : "testData/secrets/jwt.txt", "ruleId" : "jwt", "startLine" : 0, @@ -34,8 +34,8 @@ }, "cvssScore" : 8.2 } ], - "fc17c755f40062dcb3f16eb6299f9afc7eccbc56" : [ { - "id" : "fc17c755f40062dcb3f16eb6299f9afc7eccbc56", + "e2cfea40ef825d8b520329982641deebc9c81418" : [ { + "id" : "e2cfea40ef825d8b520329982641deebc9c81418", "source" : "testData/secrets/github-pat.txt", "ruleId" : "github-pat", "startLine" : 0, diff --git a/pkg/testData/expectedReportWithIgnoredRule.json b/pkg/testData/expectedReportWithIgnoredRule.json index a8b120ac..dc9dd699 100644 --- a/pkg/testData/expectedReportWithIgnoredRule.json +++ b/pkg/testData/expectedReportWithIgnoredRule.json @@ -2,8 +2,8 @@ "totalItemsScanned" : 3, "totalSecretsFound" : 3, "results" : { - "a0cd293e6e122a1c7384d5a56781e39ba350c54b" : [ { - "id" : "a0cd293e6e122a1c7384d5a56781e39ba350c54b", + "dac14c6111d3a02a23c4fc31ee4759387a7395cd" : [ { + "id" : "dac14c6111d3a02a23c4fc31ee4759387a7395cd", "source" : "testData/secrets/jwt.txt", "ruleId" : "jwt", "startLine" : 0, @@ -21,7 +21,7 @@ }, "cvssScore" : 8.2 }, { - "id" : "a0cd293e6e122a1c7384d5a56781e39ba350c54b", + "id" : "dac14c6111d3a02a23c4fc31ee4759387a7395cd", "source" : "testData/secrets/jwt.txt", "ruleId" : "jwt", "startLine" : 1, @@ -39,8 +39,8 @@ }, "cvssScore" : 8.2 } ], - "f29abe9eacc233a8e5e9c7762bca48589d9c76a2" : [ { - "id" : "f29abe9eacc233a8e5e9c7762bca48589d9c76a2", + "4b0fb9bf4c96bd11404f2a3b187acbb621d8ca0c" : [ { + "id" : "4b0fb9bf4c96bd11404f2a3b187acbb621d8ca0c", "source" : "testData/secrets/jwt.txt", "ruleId" : "jwt", "startLine" : 0, diff --git a/pkg/testData/expectedReportWithValidation.json b/pkg/testData/expectedReportWithValidation.json index 288f8cf1..88603b75 100644 --- a/pkg/testData/expectedReportWithValidation.json +++ b/pkg/testData/expectedReportWithValidation.json @@ -1,11 +1,11 @@ { "results": { - "40483a2b07fa3beaf234d1a0b5d0931d7b7ae9f7": [ + "0e9588a11a5355676b9baaa6e722211b8fea767d": [ { "cvssScore": 5.2, "endColumn": 102, "endLine": 1, - "id": "40483a2b07fa3beaf234d1a0b5d0931d7b7ae9f7", + "id": "0e9588a11a5355676b9baaa6e722211b8fea767d", "lineContent": " Text_Example = ghp_CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC", "ruleDescription": "Uncovered a GitHub Personal Access Token, potentially leading to unauthorized repository access and sensitive content exposure.", "ruleId": "github-pat", @@ -16,12 +16,12 @@ "value": "ghp_CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" } ], - "6949272451f77dc4a38d5f35d583cf56023cd2c1": [ + "f42942cf20440b920aa2730fa7f3aa607f379aa5": [ { "cvssScore": 5.2, "endColumn": 51, "endLine": 0, - "id": "6949272451f77dc4a38d5f35d583cf56023cd2c1", + "id": "f42942cf20440b920aa2730fa7f3aa607f379aa5", "lineContent": "TextExampleghp_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATextExampleghp_BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBTextExample", "ruleDescription": "Uncovered a GitHub Personal Access Token, potentially leading to unauthorized repository access and sensitive content exposure.", "ruleId": "github-pat", @@ -32,7 +32,7 @@ "value": "ghp_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" } ], - "a0cd293e6e122a1c7384d5a56781e39ba350c54b": [ + "dac14c6111d3a02a23c4fc31ee4759387a7395cd": [ { "cvssScore": 8.2, "endColumn": 232, @@ -43,7 +43,7 @@ "sub": "mockSub2" } }, - "id": "a0cd293e6e122a1c7384d5a56781e39ba350c54b", + "id": "dac14c6111d3a02a23c4fc31ee4759387a7395cd", "lineContent": "TextExample eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJtb2NrU3ViMSIsIm5hbWUiOiJtb2NrTmFtZTEifQ.dummysignature1 TextExample eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJtb2NrU3ViMiIsIm5hbWUiOiJtb2NrTmFtZTIifQ.dummysignature2 TextExample", "ruleDescription": "Uncovered a JSON Web Token, which may lead to unauthorized access to web applications and sensitive user data.", "ruleId": "jwt", @@ -63,7 +63,7 @@ "sub": "mockSub2" } }, - "id": "a0cd293e6e122a1c7384d5a56781e39ba350c54b", + "id": "dac14c6111d3a02a23c4fc31ee4759387a7395cd", "lineContent": " Text_Example = eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJtb2NrU3ViMiIsIm5hbWUiOiJtb2NrTmFtZTIifQ.dummysignature2", "ruleDescription": "Uncovered a JSON Web Token, which may lead to unauthorized access to web applications and sensitive user data.", "ruleId": "jwt", @@ -74,7 +74,7 @@ "value": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJtb2NrU3ViMiIsIm5hbWUiOiJtb2NrTmFtZTIifQ.dummysignature2" } ], - "f29abe9eacc233a8e5e9c7762bca48589d9c76a2": [ + "4b0fb9bf4c96bd11404f2a3b187acbb621d8ca0c": [ { "cvssScore": 8.2, "endColumn": 116, @@ -85,7 +85,7 @@ "sub": "mockSub1" } }, - "id": "f29abe9eacc233a8e5e9c7762bca48589d9c76a2", + "id": "4b0fb9bf4c96bd11404f2a3b187acbb621d8ca0c", "lineContent": "TextExample eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJtb2NrU3ViMSIsIm5hbWUiOiJtb2NrTmFtZTEifQ.dummysignature1 TextExample eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJtb2NrU3ViMiIsIm5hbWUiOiJtb2NrTmFtZTIifQ.dummysignature2 TextExample", "ruleDescription": "Uncovered a JSON Web Token, which may lead to unauthorized access to web applications and sensitive user data.", "ruleId": "jwt", @@ -96,12 +96,12 @@ "value": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJtb2NrU3ViMSIsIm5hbWUiOiJtb2NrTmFtZTEifQ.dummysignature1" } ], - "fc17c755f40062dcb3f16eb6299f9afc7eccbc56": [ + "e2cfea40ef825d8b520329982641deebc9c81418": [ { "cvssScore": 5.2, "endColumn": 102, "endLine": 0, - "id": "fc17c755f40062dcb3f16eb6299f9afc7eccbc56", + "id": "e2cfea40ef825d8b520329982641deebc9c81418", "lineContent": "TextExampleghp_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATextExampleghp_BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBTextExample", "ruleDescription": "Uncovered a GitHub Personal Access Token, potentially leading to unauthorized repository access and sensitive content exposure.", "ruleId": "github-pat", diff --git a/tests/e2e_test.go b/tests/e2e_test.go index c959d4a5..2346b4ef 100644 --- a/tests/e2e_test.go +++ b/tests/e2e_test.go @@ -24,6 +24,9 @@ type cli struct { func createCLI(outputDir string) (cli, error) { executable := path.Join(outputDir, "2ms") + if runtime.GOOS == "windows" { + executable += ".exe" + } lib, err := build.Import("github.com/checkmarx/2ms/v3", "", build.FindOnly) if err != nil { return cli{}, fmt.Errorf("failed to import 2ms: %s", err) @@ -93,8 +96,8 @@ func TestIntegration(t *testing.T) { t.Fatalf("failed to get report: %s", err) } - if len(report.Results) != 1 { - t.Errorf("expected one result, got %d", len(report.Results)) + if len(report.Results) != 2 { + t.Errorf("expected two results (multiple rules can match the same secret), got %d", len(report.Results)) } }) diff --git a/tests/testData/baseline/baseline.csv b/tests/testData/baseline/baseline.csv new file mode 100644 index 00000000..d3f95372 --- /dev/null +++ b/tests/testData/baseline/baseline.csv @@ -0,0 +1,2 @@ +RuleID,Commit,File,Secret,Match,StartLine,EndLine,StartColumn,EndColumn,Author,Message,Date,Email,Fingerprint +1,b,c,f,s,m,s,e,s,e,a,m,f,r,f \ No newline at end of file diff --git a/tests/testData/baseline/baseline.json b/tests/testData/baseline/baseline.json new file mode 100644 index 00000000..3a4c5427 --- /dev/null +++ b/tests/testData/baseline/baseline.json @@ -0,0 +1,40 @@ +[ + { + "Description": "PyPI upload token", + "StartLine": 32, + "EndLine": 32, + "StartColumn": 21, + "EndColumn": 106, + "Match": "************************", + "Secret": "************************", + "File": "detect/detect_test.go", + "Commit": "9326f35380636bcbe61e94b0584d1618c4b5c2c2", + "Entropy": 1.9606875, + "Author": "****", + "Email": "****", + "Date": "2022-03-07T14:33:06Z", + "Message": "Escape - character in regex character groups (#802)\n\n* fix char escape\n\n* add test\n\n* fix verbosity in make test", + "Tags": [], + "RuleID": "pypi-upload-token", + "Fingerprint": "9326f35380636bcbe61e94b0584d1618c4b5c2c2:detect/detect_test.go:pypi-upload-token:32" + }, + { + "Description": "PyPI upload token", + "StartLine": 33, + "EndLine": 33, + "StartColumn": 21, + "EndColumn": 106, + "Match": "************************", + "Secret": "************************", + "File": "detect/detect_test.go", + "Commit": "9326f35380636bcbe61e94b0584d1618c4b5c2c2", + "Entropy": 1.9606875, + "Author": "****", + "Email": "****", + "Date": "2022-03-07T14:33:06Z", + "Message": "Escape - character in regex character groups (#802)\n\n* fix char escape\n\n* add test\n\n* fix verbosity in make test", + "Tags": [], + "RuleID": "pypi-upload-token", + "Fingerprint": "9326f35380636bcbe61e94b0584d1618c4b5c2c2:detect/detect_test.go:pypi-upload-token:33" + } +] diff --git a/tests/testData/baseline/baseline.sarif b/tests/testData/baseline/baseline.sarif new file mode 100644 index 00000000..b2f84890 --- /dev/null +++ b/tests/testData/baseline/baseline.sarif @@ -0,0 +1,6 @@ +{ + "$schema": "https://json.schemastore.org/sarif-2.1.0.json", + "version": "2.1.0", + "runs": [ + ] +} diff --git a/tests/testData/config/allow_aws_re.toml b/tests/testData/config/allow_aws_re.toml new file mode 100644 index 00000000..d5afba9a --- /dev/null +++ b/tests/testData/config/allow_aws_re.toml @@ -0,0 +1,9 @@ +title = "simple config with allowlist for aws" + +[[rules]] + description = "AWS Access Key" + id = "aws-access-key" + regex = '''(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}''' + tags = ["key", "AWS"] + [rules.allowlist] + regexes = ['''AKIALALEMEL33243OLIA'''] diff --git a/tests/testData/config/allow_commit.toml b/tests/testData/config/allow_commit.toml new file mode 100644 index 00000000..6b421121 --- /dev/null +++ b/tests/testData/config/allow_commit.toml @@ -0,0 +1,9 @@ +title = "simple config with allowlist for a specific commit" + +[[rules]] + description = "AWS Access Key" + id = "aws-access-key" + regex = '''(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}''' + tags = ["key", "AWS"] + [rules.allowlist] + commits = ['''allowthiscommit'''] diff --git a/tests/testData/config/allow_global_aws_re.toml b/tests/testData/config/allow_global_aws_re.toml new file mode 100644 index 00000000..f595ce61 --- /dev/null +++ b/tests/testData/config/allow_global_aws_re.toml @@ -0,0 +1,8 @@ +[[rules]] + description = "AWS Access Key" + id = "aws-access-key" + regex = '''(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}''' + tags = ["key", "AWS"] + +[allowlist] + regexes = ['''AKIALALEMEL33243OLIA'''] diff --git a/tests/testData/config/allow_path.toml b/tests/testData/config/allow_path.toml new file mode 100644 index 00000000..ebdb0ffa --- /dev/null +++ b/tests/testData/config/allow_path.toml @@ -0,0 +1,9 @@ +title = "simple config with allowlist for .go files" + +[[rules]] + description = "AWS Access Key" + id = "aws-access-key" + regex = '''(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}''' + tags = ["key", "AWS"] + [rules.allowlist] + paths = ['''.go'''] diff --git a/tests/testData/config/bad_entropy_group.toml b/tests/testData/config/bad_entropy_group.toml new file mode 100644 index 00000000..8e4d1c25 --- /dev/null +++ b/tests/testData/config/bad_entropy_group.toml @@ -0,0 +1,8 @@ +title = "gitleaks config" + +[[rules]] +id = "discord-api-key" +description = "Discord API key" +regex = '''(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{64})['\"]''' +secretGroup = 5 +entropy = 3.5 diff --git a/tests/testData/config/entropy_group - Copy.toml b/tests/testData/config/entropy_group - Copy.toml new file mode 100644 index 00000000..eacfc50e --- /dev/null +++ b/tests/testData/config/entropy_group - Copy.toml @@ -0,0 +1,8 @@ +title = "gitleaks config" + +[[rules]] +id = "discord-api-key" +description = "Discord API key" +regex = '''(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{64})['\"]''' +secretGroup = 3 +entropy = 3.5 diff --git a/tests/testData/config/entropy_group.toml b/tests/testData/config/entropy_group.toml new file mode 100644 index 00000000..eacfc50e --- /dev/null +++ b/tests/testData/config/entropy_group.toml @@ -0,0 +1,8 @@ +title = "gitleaks config" + +[[rules]] +id = "discord-api-key" +description = "Discord API key" +regex = '''(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{64})['\"]''' +secretGroup = 3 +entropy = 3.5 diff --git a/tests/testData/config/escaped_character_group.toml b/tests/testData/config/escaped_character_group.toml new file mode 100644 index 00000000..b2803953 --- /dev/null +++ b/tests/testData/config/escaped_character_group.toml @@ -0,0 +1,8 @@ +title = "gitleaks config" +# https://learnxinyminutes.com/docs/toml/ for toml reference + +[[rules]] + id = "pypi-upload-token" + description = "PyPI upload token" + regex = '''pypi-AgEIcHlwaS5vcmc[A-Za-z0-9\-_]{50,1000}''' + tags = ["key", "pypi"] \ No newline at end of file diff --git a/tests/testData/config/generic_with_py_path - Copy.toml b/tests/testData/config/generic_with_py_path - Copy.toml new file mode 100644 index 00000000..a528893e --- /dev/null +++ b/tests/testData/config/generic_with_py_path - Copy.toml @@ -0,0 +1,36 @@ +title = "gitleaks config" + +[[rules]] +description = "Generic API Key" +id = "generic-api-key" +regex = '''(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]''' +path = '''.py''' +entropy = 3.7 +secretGroup = 4 + +[allowlist] +description = "global allow lists" +regexes = [ + '''219-09-9999''', + '''078-05-1120''', + '''(9[0-9]{2}|666)-\d{2}-\d{4}''', + '''process''', + '''getenv''', + '''\.env''', + '''env\(''', + '''env\.''', + '''setting''', + '''load''', + '''token''', + '''password''', + '''secret''', + '''api\_key''', + '''apikey''', + '''api\-key''', + ] +paths = [ + '''gitleaks.toml''', + '''(.*?)(jpg|gif|doc|pdf|bin|svg|socket)$''', + '''(go.mod|go.sum)$''' +] + diff --git a/tests/testData/config/generic_with_py_path.toml b/tests/testData/config/generic_with_py_path.toml new file mode 100644 index 00000000..a528893e --- /dev/null +++ b/tests/testData/config/generic_with_py_path.toml @@ -0,0 +1,36 @@ +title = "gitleaks config" + +[[rules]] +description = "Generic API Key" +id = "generic-api-key" +regex = '''(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]''' +path = '''.py''' +entropy = 3.7 +secretGroup = 4 + +[allowlist] +description = "global allow lists" +regexes = [ + '''219-09-9999''', + '''078-05-1120''', + '''(9[0-9]{2}|666)-\d{2}-\d{4}''', + '''process''', + '''getenv''', + '''\.env''', + '''env\(''', + '''env\.''', + '''setting''', + '''load''', + '''token''', + '''password''', + '''secret''', + '''api\_key''', + '''apikey''', + '''api\-key''', + ] +paths = [ + '''gitleaks.toml''', + '''(.*?)(jpg|gif|doc|pdf|bin|svg|socket)$''', + '''(go.mod|go.sum)$''' +] + diff --git a/tests/testData/config/simple.toml b/tests/testData/config/simple.toml new file mode 100644 index 00000000..44c7d138 --- /dev/null +++ b/tests/testData/config/simple.toml @@ -0,0 +1,222 @@ +title = "gitleaks config" +# https://learnxinyminutes.com/docs/toml/ for toml reference + +[[rules]] + description = "AWS Access Key" + id = "aws-access-key" + regex = '''(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}''' + tags = ["key", "AWS"] + +[[rules]] + description = "AWS Secret Key" + id = "aws-secret-key" + regex = '''(?i)aws_(.{0,20})?=?.[\'\"0-9a-zA-Z\/+]{40}''' + tags = ["key", "AWS"] + +[[rules]] + description = "AWS MWS key" + id = "aws-mws-key" + regex = '''amzn\.mws\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}''' + tags = ["key", "AWS", "MWS"] + +[[rules]] + description = "Facebook Secret Key" + id = "facebook-secret-key" + regex = '''(?i)(facebook|fb)(.{0,20})?(?-i)['\"][0-9a-f]{32}['\"]''' + tags = ["key", "Facebook"] + +[[rules]] + description = "Facebook Client ID" + id = "facebook-client-id" + regex = '''(?i)(facebook|fb)(.{0,20})?['\"][0-9]{13,17}['\"]''' + tags = ["key", "Facebook"] + +[[rules]] + description = "Twitter Secret Key" + id = "twitter-secret-key" + regex = '''(?i)twitter(.{0,20})?['\"][0-9a-z]{35,44}['\"]''' + tags = ["key", "Twitter"] + +[[rules]] + description = "Twitter Client ID" + id = "twitter-client-id" + regex = '''(?i)twitter(.{0,20})?['\"][0-9a-z]{18,25}['\"]''' + tags = ["client", "Twitter"] + +[[rules]] + description = "Github Personal Access Token" + id = "github-pat" + regex = '''ghp_[0-9a-zA-Z]{36}''' + tags = ["key", "Github"] +[[rules]] + description = "Github OAuth Access Token" + id = "github-oauth" + regex = '''gho_[0-9a-zA-Z]{36}''' + tags = ["key", "Github"] +[[rules]] + id = "github-app" + description = "Github App Token" + regex = '''(ghu|ghs)_[0-9a-zA-Z]{36}''' + tags = ["key", "Github"] +[[rules]] + id = "github-refresh" + description = "Github Refresh Token" + regex = '''ghr_[0-9a-zA-Z]{76}''' + tags = ["key", "Github"] + +[[rules]] + id = "linkedin-client" + description = "LinkedIn Client ID" + regex = '''(?i)linkedin(.{0,20})?(?-i)[0-9a-z]{12}''' + tags = ["client", "LinkedIn"] + +[[rules]] + id = "linkedin-secret" + description = "LinkedIn Secret Key" + regex = '''(?i)linkedin(.{0,20})?[0-9a-z]{16}''' + tags = ["secret", "LinkedIn"] + +[[rules]] + id = "slack" + description = "Slack" + regex = '''xox[baprs]-([0-9a-zA-Z]{10,48})?''' + tags = ["key", "Slack"] + +[[rules]] + id = "apkey" + description = "Asymmetric Private Key" + regex = '''-----BEGIN ((EC|PGP|DSA|RSA|OPENSSH) )?PRIVATE KEY( BLOCK)?-----''' + tags = ["key", "AsymmetricPrivateKey"] + +[[rules]] + id = "google" + description = "Google API key" + regex = '''AIza[0-9A-Za-z\-_]{35}''' + tags = ["key", "Google"] + +[[rules]] + id = "google" + description = "Google (GCP) Service Account" + regex = '''"type": "service_account"''' + tags = ["key", "Google"] + +[[rules]] + id = "heroku" + description = "Heroku API key" + regex = '''(?i)heroku(.{0,20})?[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}''' + tags = ["key", "Heroku"] + +[[rules]] + id = "mailchimp" + description = "MailChimp API key" + regex = '''(?i)(mailchimp|mc)(.{0,20})?[0-9a-f]{32}-us[0-9]{1,2}''' + tags = ["key", "Mailchimp"] + +[[rules]] + id = "mailgun" + description = "Mailgun API key" + regex = '''((?i)(mailgun|mg)(.{0,20})?)?key-[0-9a-z]{32}''' + tags = ["key", "Mailgun"] + +[[rules]] + id = "paypal" + description = "PayPal Braintree access token" + regex = '''access_token\$production\$[0-9a-z]{16}\$[0-9a-f]{32}''' + tags = ["key", "Paypal"] + +[[rules]] + id = "piacatic" + description = "Picatic API key" + regex = '''sk_live_[0-9a-z]{32}''' + tags = ["key", "Picatic"] + +[[rules]] + id = "sendgrid" + description = "SendGrid API Key" + regex = '''SG\.[\w_]{16,32}\.[\w_]{16,64}''' + tags = ["key", "SendGrid"] + +[[rules]] + description = "Sidekiq Secret" + id = "sidekiq-secret" + regex = '''(?i)(?:BUNDLE_ENTERPRISE__CONTRIBSYS__COM|BUNDLE_GEMS__CONTRIBSYS__COM)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:=|\|\|:|<=|=>|:)(?:'|\"|\s|=|\x60){0,5}([a-f0-9]{8}:[a-f0-9]{8})(?:['|\"|\n|\r|\s|\x60|;]|$)''' + secretGroup = 1 + keywords = [ + "bundle_enterprise__contribsys__com","bundle_gems__contribsys__com", + ] + +[[rules]] + description = "Sidekiq Sensitive URL" + id = "sidekiq-sensitive-url" + regex = '''(?i)\b(http(?:s??):\/\/)([a-f0-9]{8}:[a-f0-9]{8})@(?:gems.contribsys.com|enterprise.contribsys.com)(?:[\/|\#|\?|:]|$)''' + secretGroup = 2 + keywords = [ + "gems.contribsys.com","enterprise.contribsys.com", + ] + +[[rules]] + id = "slack-webhook" + description = "Slack Webhook" + regex = '''https://hooks.slack.com/services/T[a-zA-Z0-9_]{8}/B[a-zA-Z0-9_]{8,12}/[a-zA-Z0-9_]{24}''' + tags = ["key", "slack"] + +[[rules]] + id = "stripe" + description = "Stripe API key" + regex = '''(?i)stripe(.{0,20})?[sr]k_live_[0-9a-zA-Z]{24}''' + tags = ["key", "Stripe"] + +[[rules]] + id = "square" + description = "Square access token" + regex = '''sq0atp-[0-9A-Za-z\-_]{22}''' + tags = ["key", "square"] + +[[rules]] + id = "square-oauth" + description = "Square OAuth secret" + regex = '''sq0csp-[0-9A-Za-z\-_]{43}''' + tags = ["key", "square"] + +[[rules]] + id = "twilio" + description = "Twilio API key" + regex = '''(?i)twilio(.{0,20})?SK[0-9a-f]{32}''' + tags = ["key", "twilio"] + +[[rules]] + id = "dynatrace" + description = "Dynatrace ttoken" + regex = '''dt0[a-zA-Z]{1}[0-9]{2}\.[A-Z0-9]{24}\.[A-Z0-9]{64}''' + tags = ["key", "Dynatrace"] + +[[rules]] + id = "shopify" + description = "Shopify shared secret" + regex = '''shpss_[a-fA-F0-9]{32}''' + tags = ["key", "Shopify"] + +[[rules]] + id = "shopify-access" + description = "Shopify access token" + regex = '''shpat_[a-fA-F0-9]{32}''' + tags = ["key", "Shopify"] + +[[rules]] + id = "shopify-custom" + description = "Shopify custom app access token" + regex = '''shpca_[a-fA-F0-9]{32}''' + tags = ["key", "Shopify"] + +[[rules]] + id = "shopify-private" + description = "Shopify private app access token" + regex = '''shppa_[a-fA-F0-9]{32}''' + tags = ["key", "Shopify"] + +[[rules]] + id = "pypi" + description = "PyPI upload token" + regex = '''pypi-AgEIcHlwaS5vcmc[A-Za-z0-9-_]{50,1000}''' + tags = ["key", "pypi"] + diff --git a/tests/testData/expectedReport/multi_line_secret_report.json b/tests/testData/expectedReport/multi_line_secret_report.json index 6db9c2cf..9f05ef72 100644 --- a/tests/testData/expectedReport/multi_line_secret_report.json +++ b/tests/testData/expectedReport/multi_line_secret_report.json @@ -2,24 +2,24 @@ "totalItemsScanned": 1, "totalSecretsFound": 3, "results": { - "047d26912b890e89c7f01b7ec9e926390224e4f0": [ + "0a444c3960dbca51baeacf6d5193f64b5ddf0d66": [ { - "id": "047d26912b890e89c7f01b7ec9e926390224e4f0", + "id": "0a444c3960dbca51baeacf6d5193f64b5ddf0d66", "source": "testData/input/multi_line_secret.txt", "ruleId": "private-key", "startLine": 3, "endLine": 4, "lineContent": " -----BEGIN RSA PRIVATE KEY----- MIIBOgIBAAJBAKj34GkxFhD90vcNLYLInFEX6Ppy1tPf9Cnzj4p4WGeKLs1Pt8Qu KUpRKfFLfRYC9AIKjbJTWit+Cq\n vjWYzvQwECAwEAAQJAIJLixBy2qpFoS4DSmoEm o3qGy0t6z09AIJtH+5OeRV1be+N4cDYJKffGzDa88vQENZiRm0GRq6a+HPGQMd2k TQIhAKMSvzIBnni7ot/OSie2TmJLY4SwTQAevXysE2RbFDYdAiEBCUEaRQnMnbp79mxDXDf6AU0cN/RPBjb9qSHDcWZHGzUCIG2Es59z8ugGrDY+pxLQnwfotadxd+Uy v/Ow5T0q5gIJAiEAyS4RaI9YG8EWx/2w0T67ZUVAw8eOMB6BIUg0Xcu+3okCIBOs /5OiPgoTdSy7bcF9IGpSE8ZgGKzgYQVZeN97YE00 -----END RSA PRIVATE KEY-----", "startColumn": 9, - "endColumn": 376, + "endColumn": 377, "value": "-----BEGIN RSA PRIVATE KEY----- MIIBOgIBAAJBAKj34GkxFhD90vcNLYLInFEX6Ppy1tPf9Cnzj4p4WGeKLs1Pt8Qu KUpRKfFLfRYC9AIKjbJTWit+Cq\r\n vjWYzvQwECAwEAAQJAIJLixBy2qpFoS4DSmoEm o3qGy0t6z09AIJtH+5OeRV1be+N4cDYJKffGzDa88vQENZiRm0GRq6a+HPGQMd2k TQIhAKMSvzIBnni7ot/OSie2TmJLY4SwTQAevXysE2RbFDYdAiEBCUEaRQnMnbp79mxDXDf6AU0cN/RPBjb9qSHDcWZHGzUCIG2Es59z8ugGrDY+pxLQnwfotadxd+Uy v/Ow5T0q5gIJAiEAyS4RaI9YG8EWx/2w0T67ZUVAw8eOMB6BIUg0Xcu+3okCIBOs /5OiPgoTdSy7bcF9IGpSE8ZgGKzgYQVZeN97YE00 -----END RSA PRIVATE KEY-----", "ruleDescription": "Identified a Private Key, which may compromise cryptographic security and sensitive data encryption.", "cvssScore": 8.2 } ], - "58e5a02e5571db6dc1f9c0fdba8d86e254225bf1": [ + "a5ad0c6af65f27d470931b60d78c122e59dc3f07": [ { - "id": "58e5a02e5571db6dc1f9c0fdba8d86e254225bf1", + "id": "a5ad0c6af65f27d470931b60d78c122e59dc3f07", "source": "testData/input/multi_line_secret.txt", "ruleId": "generic-api-key", "startLine": 1, @@ -32,9 +32,9 @@ "cvssScore": 8.2 } ], - "ed47a9a9052d119d91763ce84d689370fdbccf1f": [ + "bc98bd8fae8e5b167ac3b692a75b1c9a794a0143": [ { - "id": "ed47a9a9052d119d91763ce84d689370fdbccf1f", + "id": "bc98bd8fae8e5b167ac3b692a75b1c9a794a0143", "source": "testData/input/multi_line_secret.txt", "ruleId": "generic-api-key", "startLine": 2, diff --git a/tests/testData/expectedReport/secret_at_end_report.json b/tests/testData/expectedReport/secret_at_end_report.json index 76431295..3eb9ffb6 100644 --- a/tests/testData/expectedReport/secret_at_end_report.json +++ b/tests/testData/expectedReport/secret_at_end_report.json @@ -2,32 +2,32 @@ "totalItemsScanned": 1, "totalSecretsFound": 2, "results": { - "6a3e642795e27b989c54ac0c91147fe8e9a405b4": [ + "4871552fe2d7c5767ac450e62651de0f39132be2": [ { - "id": "6a3e642795e27b989c54ac0c91147fe8e9a405b4", + "id": "4871552fe2d7c5767ac450e62651de0f39132be2", "source": "testData/input/secret_at_end.txt", "ruleId": "generic-api-key", - "startLine": 2, - "endLine": 2, - "lineContent": "\t\t`\"client_secret\" : \"6da89121079f83b2eb6acccf8219ea982c3d79bccc3e9c6a85856480661f8fde\",`", - "startColumn": 5, - "endColumn": 87, - "value": "6da89121079f83b2eb6acccf8219ea982c3d79bccc3e9c6a85856480661f8fde", + "startLine": 1, + "endLine": 1, + "lineContent": "`\"client_id\" : \"0afae57f3ccfd9d7f5767067bc48b30f719e271ba470488056e37ab35d4b6506\"`,", + "startColumn": 3, + "endColumn": 81, + "value": "0afae57f3ccfd9d7f5767067bc48b30f719e271ba470488056e37ab35d4b6506", "ruleDescription": "Detected a Generic API Key, potentially exposing access to various services and sensitive operations.", "cvssScore": 8.2 } ], - "84bc054139c2363b37538209055a2d9c23026fab": [ + "80fa9bfa31b3488b04b07983e636651b38bb1e11": [ { - "id": "84bc054139c2363b37538209055a2d9c23026fab", + "id": "80fa9bfa31b3488b04b07983e636651b38bb1e11", "source": "testData/input/secret_at_end.txt", "ruleId": "generic-api-key", - "startLine": 1, - "endLine": 1, - "lineContent": "`\"client_id\" : \"0afae57f3ccfd9d7f5767067bc48b30f719e271ba470488056e37ab35d4b6506\"`,", - "startColumn": 3, - "endColumn": 81, - "value": "0afae57f3ccfd9d7f5767067bc48b30f719e271ba470488056e37ab35d4b6506", + "startLine": 2, + "endLine": 2, + "lineContent": "\t\t`\"client_secret\" : \"6da89121079f83b2eb6acccf8219ea982c3d79bccc3e9c6a85856480661f8fde\",`", + "startColumn": 5, + "endColumn": 88, + "value": "6da89121079f83b2eb6acccf8219ea982c3d79bccc3e9c6a85856480661f8fde", "ruleDescription": "Detected a Generic API Key, potentially exposing access to various services and sensitive operations.", "cvssScore": 8.2 } diff --git a/tests/testData/expectedReport/secret_at_end_with_newline_report.json b/tests/testData/expectedReport/secret_at_end_with_newline_report.json index 2b634e9a..ce95bf07 100644 --- a/tests/testData/expectedReport/secret_at_end_with_newline_report.json +++ b/tests/testData/expectedReport/secret_at_end_with_newline_report.json @@ -2,32 +2,32 @@ "totalItemsScanned": 1, "totalSecretsFound": 2, "results": { - "6af9b6df67e2971f45e6e27d4e068c2a515d2961": [ + "8d4ce06e0e27b22695fe1b99b70bc5c6896da00a": [ { - "id": "6af9b6df67e2971f45e6e27d4e068c2a515d2961", + "id": "8d4ce06e0e27b22695fe1b99b70bc5c6896da00a", "source": "testData/input/secret_at_end_with_newline.txt", "ruleId": "generic-api-key", - "startLine": 2, - "endLine": 2, - "lineContent": "\t\t`\"client_secret\" : \"6da89121079f83b2eb6acccf8219ea982c3d79bccc3e9c6a85856480661f8fde\",`", - "startColumn": 5, - "endColumn": 87, - "value": "6da89121079f83b2eb6acccf8219ea982c3d79bccc3e9c6a85856480661f8fde", + "startLine": 1, + "endLine": 1, + "lineContent": "`\"client_id\" : \"0afae57f3ccfd9d7f5767067bc48b30f719e271ba470488056e37ab35d4b6506\"`,", + "startColumn": 3, + "endColumn": 81, + "value": "0afae57f3ccfd9d7f5767067bc48b30f719e271ba470488056e37ab35d4b6506", "ruleDescription": "Detected a Generic API Key, potentially exposing access to various services and sensitive operations.", "cvssScore": 8.2 } ], - "f4b4bf79a4000811227225e3c556ea3862cfcb1a": [ + "c7510bd9bcfa7887912dd28bd57aab89be736acd": [ { - "id": "f4b4bf79a4000811227225e3c556ea3862cfcb1a", + "id": "c7510bd9bcfa7887912dd28bd57aab89be736acd", "source": "testData/input/secret_at_end_with_newline.txt", "ruleId": "generic-api-key", - "startLine": 1, - "endLine": 1, - "lineContent": "`\"client_id\" : \"0afae57f3ccfd9d7f5767067bc48b30f719e271ba470488056e37ab35d4b6506\"`,", - "startColumn": 3, - "endColumn": 81, - "value": "0afae57f3ccfd9d7f5767067bc48b30f719e271ba470488056e37ab35d4b6506", + "startLine": 2, + "endLine": 2, + "lineContent": "\t\t`\"client_secret\" : \"6da89121079f83b2eb6acccf8219ea982c3d79bccc3e9c6a85856480661f8fde\",`", + "startColumn": 5, + "endColumn": 87, + "value": "6da89121079f83b2eb6acccf8219ea982c3d79bccc3e9c6a85856480661f8fde", "ruleDescription": "Detected a Generic API Key, potentially exposing access to various services and sensitive operations.", "cvssScore": 8.2 } diff --git a/tests/testData/input/multi_line_secret.txt b/tests/testData/input/multi_line_secret.txt index 75873920..4f550f7d 100644 --- a/tests/testData/input/multi_line_secret.txt +++ b/tests/testData/input/multi_line_secret.txt @@ -1,5 +1,4 @@ `"client_id" : "0afae57f3ccfd9d7f5767067bc48b30f719e271ba470488056e37ab35d4b6506"`, `"client_secret" : "6da89121079f83b2eb6acccf8219ea982c3d79bccc3e9c6a85856480661f8fde",` -----BEGIN RSA PRIVATE KEY----- MIIBOgIBAAJBAKj34GkxFhD90vcNLYLInFEX6Ppy1tPf9Cnzj4p4WGeKLs1Pt8Qu KUpRKfFLfRYC9AIKjbJTWit+Cq - vjWYzvQwECAwEAAQJAIJLixBy2qpFoS4DSmoEm o3qGy0t6z09AIJtH+5OeRV1be+N4cDYJKffGzDa88vQENZiRm0GRq6a+HPGQMd2k TQIhAKMSvzIBnni7ot/OSie2TmJLY4SwTQAevXysE2RbFDYdAiEBCUEaRQnMnbp79mxDXDf6AU0cN/RPBjb9qSHDcWZHGzUCIG2Es59z8ugGrDY+pxLQnwfotadxd+Uy v/Ow5T0q5gIJAiEAyS4RaI9YG8EWx/2w0T67ZUVAw8eOMB6BIUg0Xcu+3okCIBOs /5OiPgoTdSy7bcF9IGpSE8ZgGKzgYQVZeN97YE00 -----END RSA PRIVATE KEY----- - \ No newline at end of file + vjWYzvQwECAwEAAQJAIJLixBy2qpFoS4DSmoEm o3qGy0t6z09AIJtH+5OeRV1be+N4cDYJKffGzDa88vQENZiRm0GRq6a+HPGQMd2k TQIhAKMSvzIBnni7ot/OSie2TmJLY4SwTQAevXysE2RbFDYdAiEBCUEaRQnMnbp79mxDXDf6AU0cN/RPBjb9qSHDcWZHGzUCIG2Es59z8ugGrDY+pxLQnwfotadxd+Uy v/Ow5T0q5gIJAiEAyS4RaI9YG8EWx/2w0T67ZUVAw8eOMB6BIUg0Xcu+3okCIBOs /5OiPgoTdSy7bcF9IGpSE8ZgGKzgYQVZeN97YE00 -----END RSA PRIVATE KEY----- \ No newline at end of file From 9299774a31799bcedeee95a5b766375474621678 Mon Sep 17 00:00:00 2001 From: Miguel Neiva <miguel.neiva@checkmarx.com> Date: Mon, 4 Aug 2025 13:02:01 +0100 Subject: [PATCH 2/4] fix: ignore some secrets used for testing --- .2ms.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.2ms.yml b/.2ms.yml index 9d27b838..82de09a1 100644 --- a/.2ms.yml +++ b/.2ms.yml @@ -359,4 +359,15 @@ ignore-result: - 8d42615a78d80100c93cb211b696a9d8dc4cae52 # test data from toml's files - 11a01f5f59e11504cda26fadc1cde1d7869e346e # test data from toml's files - 772960543873c38fd49747b70d34302bc7744528 # test data from toml's files -- 152006c3dc742f77b2ee1de6575694a3e4393979 # test data from toml's files \ No newline at end of file +- 152006c3dc742f77b2ee1de6575694a3e4393979 # test data from toml's files +- 304660186ce2139e1d35519f110d874d42a9a9cc # engine/detect/detect_test.go - authenticated-url +- 8004c3c8e053958a857d534f5e5013573a1b5487 # engine/detect/detect_test.go - sidekiq-sensitive-url +- 1c18d9549224b0e87ca88d52ea29c28d16d88fc0 # engine/detect/detect_test.go - discord-api-token +- 76419bff3245573f476e8810dd51d2dc5505fae8 # engine/detect/detect_test.go - sidekiq-secret +- d7416e501be97e378bfe156bf46a712487901a7e # engine/detect/detect_test.go - aws-access-token +- be73c0549f927433e81284de4b3d1094fc3e0e20 # engine/engine_test.go - generic-api-key +- 234b995eeac64bf996c32397906ff8ce1aacbe10 # engine/engine_test.go - jfrog-api-key +- 08681ea90c9f89fb22de8022c2fd1c05c2065695 # tests/testData/config/allow_aws_re.toml - aws-access-token +- c38c565fc1422c74eff5a0b7a24629183e4bdc20 # tests/testData/config/allow_global_aws_re.toml - aws-access-token +- 7a688e807f1c42b1b9dadcea277e919d5a5c6bfb # engine/detect/detect_test.go - pypi-upload-token +- 54f32cfb047240ab6c052d06b205881f14b87e37 # engine/detect/detect_test.go - aws-access-token \ No newline at end of file From 097fe5d3e59239ca4a93dd278ca3882ebce72e25 Mon Sep 17 00:00:00 2001 From: Miguel Neiva <miguel.neiva@checkmarx.com> Date: Mon, 4 Aug 2025 13:07:32 +0100 Subject: [PATCH 3/4] fix: ignore some secrets used for testing --- .2ms.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.2ms.yml b/.2ms.yml index 82de09a1..a13138bb 100644 --- a/.2ms.yml +++ b/.2ms.yml @@ -370,4 +370,15 @@ ignore-result: - 08681ea90c9f89fb22de8022c2fd1c05c2065695 # tests/testData/config/allow_aws_re.toml - aws-access-token - c38c565fc1422c74eff5a0b7a24629183e4bdc20 # tests/testData/config/allow_global_aws_re.toml - aws-access-token - 7a688e807f1c42b1b9dadcea277e919d5a5c6bfb # engine/detect/detect_test.go - pypi-upload-token -- 54f32cfb047240ab6c052d06b205881f14b87e37 # engine/detect/detect_test.go - aws-access-token \ No newline at end of file +- 54f32cfb047240ab6c052d06b205881f14b87e37 # engine/detect/detect_test.go - aws-access-token +- 63724b4af9dd449dcbe623377e3314e1d9d5a2b0 # engine/engine_test.go - jfrog-api-key +- 68e6db68e336a98fe80d4ae52a2e53716fb1f42c # engine/detect/detect_test.go - sidekiq-secret +- c62aee57b319e53a5077e9a83b29ac8fe33bf8a2 # engine/detect/detect_test.go - aws-access-token +- ee892683cf62c5fcb40d8721e206f67b6bf27a77 # engine/detect/detect_test.go - pypi-upload-token +- c4cc5b5f0b3fb3928a392df5cbb807c15f708bb1 # engine/detect/detect_test.go - sidekiq-sensitive-url +- 9ee706d9ff5f204565c75ecbcddf468398fd0847 # engine/detect/detect_test.go - authenticated-url +- 5771ee546b0a7b940c4d7faeec4b49e698ebf56d # engine/engine_test.go - generic-api-key +- f96a7ed264f7a8cc81e1edee6b380615bc898a00 # tests/testData/config/allow_aws_re.toml - aws-access-token +- 1e16cbbc563653b822410e6f0e55ca8b8d4d33f8 # tests/testData/config/allow_global_aws_re.toml - aws-access-token +- 92c6131468686fe9c75a4926f40ce2419690eea9 # engine/detect/detect_test.go - aws-access-token +- 48121878ef71a52cf74d58d47281c7e6ef61f1eb # engine/detect/detect_test.go - discord-api-token \ No newline at end of file From 448b7cc99d2501dc7be15f4b2dbc4bfd8d699ece Mon Sep 17 00:00:00 2001 From: Miguel Neiva <miguel.neiva@checkmarx.com> Date: Mon, 4 Aug 2025 13:11:36 +0100 Subject: [PATCH 4/4] chore: push detecter logic to our side --- .2ms.yml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/.2ms.yml b/.2ms.yml index a13138bb..56c51e1c 100644 --- a/.2ms.yml +++ b/.2ms.yml @@ -381,4 +381,17 @@ ignore-result: - f96a7ed264f7a8cc81e1edee6b380615bc898a00 # tests/testData/config/allow_aws_re.toml - aws-access-token - 1e16cbbc563653b822410e6f0e55ca8b8d4d33f8 # tests/testData/config/allow_global_aws_re.toml - aws-access-token - 92c6131468686fe9c75a4926f40ce2419690eea9 # engine/detect/detect_test.go - aws-access-token -- 48121878ef71a52cf74d58d47281c7e6ef61f1eb # engine/detect/detect_test.go - discord-api-token \ No newline at end of file +- 48121878ef71a52cf74d58d47281c7e6ef61f1eb # engine/detect/detect_test.go - discord-api-token +- d71c196cba01dbc9d63e6dac903dc73cb89ab364 # tests/testData/config/allow_global_aws_re.toml - aws-access-token +- 23ec036979703e35bc6de6e245758c2c535a6050 # engine/detect/detect_test.go - generic-api-key (Discord_Public_Key) +- 88df9fdeb4a81cb1edc2339bcb4751628c49dfda # engine/engine_test.go - generic-api-key +- 722c75bcbb827536f81215baba9b28a1a13d3fda # engine/detect/detect_test.go - authenticated-url +- 5597e2e7ce843de5bf3431fd7946384e17cce968 # engine/detect/detect_test.go - discord-api-token +- 334b2420da8a61d313885b4d2944d71a675f431b # engine/detect/detect_test.go - pypi-upload-token +- b36ede0b35f4f865094cf43282010db4f520bf46 # engine/detect/detect_test.go - sidekiq-secret +- 0efff6637b75274b7a14d2318251b57d24fe42a9 # engine/detect/detect_test.go - sidekiq-sensitive-url +- 8d39148ef1417a3d8202a4076ccefa25a9551b63 # engine/detect/detect_test.go - aws-access-token +- f7f4241dd209ce6559b138eaf2c6e87961a7ada0 # engine/detect/detect_test.go - aws-access-token +- 3ef803586748159b09ce7b289532fde583fdf76b # engine/engine_test.go - generic-api-key (imagePullSecretJfrog) +- 4187d3ce4469a2ead5ffa2268c180d7a3cca2570 # engine/engine_test.go - jfrog-api-key +- 2277b1700fe84cca320612711e271f7bc21f5ca3 # tests/testData/config/allow_aws_re.toml - aws-access-token \ No newline at end of file