|
36 | 36 | from langchain_community.vectorstores import FAISS |
37 | 37 | from langchain_core.document_loaders.blob_loaders import Blob |
38 | 38 |
|
39 | | -from exploit_iq_commons.data_models.input import SourceDocumentsInfo |
40 | | -from exploit_iq_commons.embedding.go_segmenters_with_methods import GoSegmenterWithMethods |
41 | | -from exploit_iq_commons.embedding.js_extended_parser import ExtendedJavaScriptSegmenter |
42 | | -from exploit_iq_commons.embedding.source_code_git_loader import SourceCodeGitLoader |
43 | | -from exploit_iq_commons.embedding.transitive_code_searcher_tool import TransitiveCodeSearcher |
44 | | -from exploit_iq_commons.logging.loggers_factory import LoggingFactory |
| 39 | +from vuln_analysis.data_models.input import SourceDocumentsInfo |
| 40 | +from vuln_analysis.utils.go_segmenters_with_methods import GoSegmenterWithMethods |
| 41 | +from vuln_analysis.utils.python_segmenters_with_classes_methods import PythonSegmenterWithClassesMethods |
| 42 | +from vuln_analysis.utils.js_extended_parser import ExtendedJavaScriptSegmenter |
| 43 | +from vuln_analysis.utils.source_code_git_loader import SourceCodeGitLoader |
| 44 | +from vuln_analysis.utils.git_utils import sanitize_git_url_for_path |
| 45 | +from vuln_analysis.utils.transitive_code_searcher_tool import TransitiveCodeSearcher |
| 46 | +from vuln_analysis.logging.loggers_factory import LoggingFactory |
45 | 47 |
|
46 | 48 | if typing.TYPE_CHECKING: |
47 | 49 | from langchain_core.embeddings import Embeddings # pragma: no cover |
@@ -348,7 +350,11 @@ def get_repo_path(self, source_info: SourceDocumentsInfo): |
348 | 350 | Path |
349 | 351 | Returns the path to the git repository. |
350 | 352 | """ |
351 | | - return self._git_directory / PurePath(source_info.git_repo) |
| 353 | + # Sanitize the git repo URL to create a valid filesystem path |
| 354 | + # Remove protocol separators and path separators that could cause issues |
| 355 | + # Example: 'https://github.com/RHEcosystemAppEng/vulnerability-analysis' -> 'https.github.com.RHEcosystemAppEng.vulnerability-analysis' |
| 356 | + sanitized_repo_path = sanitize_git_url_for_path(source_info.git_repo) |
| 357 | + return self._git_directory / PurePath(sanitized_repo_path) |
352 | 358 |
|
353 | 359 | def collect_documents(self, source_info: SourceDocumentsInfo) -> list[Document]: |
354 | 360 | """ |
|
0 commit comments