Skip to content
This repository was archived by the owner on Apr 1, 2025. It is now read-only.

Commit c1486db

Browse files
authored
Merge pull request #230 from github/lingo
Switch over to using lingo for language detection
2 parents b7a52b4 + 0d78391 commit c1486db

File tree

7 files changed

+83
-88
lines changed

7 files changed

+83
-88
lines changed

Diff for: semantic.cabal

+2
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ common dependencies
7373
, unix ^>= 2.7.2.2
7474
, proto3-suite
7575
, proto3-wire
76+
, lingo >= 0.1.0.1
7677

7778
common executable-flags
7879
ghc-options: -threaded -rtsopts "-with-rtsopts=-N -A4m -n2m"
@@ -360,6 +361,7 @@ test-suite test
360361
, Data.Functor.Listable
361362
, Data.Graph.Spec
362363
, Data.Mergeable
364+
, Data.Language.Spec
363365
, Data.Range.Spec
364366
, Data.Scientific.Spec
365367
, Data.Semigroup.App.Spec

Diff for: src/Data/Blob/IO.hs

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ readBlobsFromDir :: MonadIO m => FilePath -> m [Blob]
3838
readBlobsFromDir path = liftIO . fmap catMaybes $
3939
findFilesInDir path supportedExts mempty >>= Async.mapConcurrently (readBlobFromFile . fileForPath)
4040

41-
-- | Read all blobs from the Git repo with Language.supportedExts
41+
-- | Read all blobs from a git repo
4242
readBlobsFromGitRepo :: MonadIO m => FilePath -> Git.OID -> [FilePath] -> [FilePath] -> m [Blob]
4343
readBlobsFromGitRepo path oid excludePaths includePaths = liftIO . fmap catMaybes $
4444
Git.lsTree path oid >>= Async.mapConcurrently (blobFromTreeEntry path)

Diff for: src/Data/Language.hs

+48-53
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
1-
{-# LANGUAGE DeriveAnyClass, DeriveGeneric, KindSignatures #-}
1+
{-# LANGUAGE DeriveAnyClass, DeriveGeneric, KindSignatures, LambdaCase #-}
22
module Data.Language
33
( Language (..)
44
, SLanguage (..)
55
, extensionsForLanguage
6-
, parseLanguage
76
, knownLanguage
87
, languageForFilePath
98
, pathIsMinified
10-
, languageForType
119
, supportedExts
1210
, codeNavLanguages
11+
, textToLanguage
12+
, languageToText
1313
) where
1414

1515
import Data.Aeson
16+
import qualified Data.Languages as Lingo
1617
import qualified Data.Text as T
18+
import qualified Data.Map.Strict as Map
1719
import Prologue
1820
import System.FilePath.Posix
1921

@@ -77,68 +79,61 @@ instance SLanguage 'PHP where
7779

7880
instance FromJSON Language where
7981
parseJSON = withText "Language" $ \l ->
80-
pure $ fromMaybe Unknown (parseLanguage l)
81-
82-
parseLanguage :: Text -> Maybe Language
83-
parseLanguage l = case T.toLower l of
84-
"go" -> Just Go
85-
"haskell" -> Just Haskell
86-
"java" -> Just Java
87-
"javascript" -> Just JavaScript
88-
"json" -> Just JSON
89-
"jsx" -> Just JSX
90-
"markdown" -> Just Markdown
91-
"python" -> Just Python
92-
"ruby" -> Just Ruby
93-
"typescript" -> Just TypeScript
94-
"php" -> Just PHP
95-
_ -> Nothing
82+
pure $ textToLanguage l
9683

9784
-- | Predicate failing on 'Unknown' and passing in all other cases.
9885
knownLanguage :: Language -> Bool
9986
knownLanguage = (/= Unknown)
10087

101-
-- | Returns a Language based on the file extension (including the ".").
102-
languageForType :: String -> Language
103-
languageForType mediaType = case mediaType of
104-
".java" -> Java
105-
".json" -> JSON
106-
".hs" -> Haskell
107-
".md" -> Markdown
108-
".rb" -> Ruby
109-
".go" -> Go
110-
".js" -> JavaScript
111-
".mjs" -> JavaScript
112-
".ts" -> TypeScript
113-
".tsx" -> TSX
114-
".jsx" -> JSX
115-
".py" -> Python
116-
".php" -> PHP
117-
".phpt" -> PHP
118-
_ -> Unknown
119-
12088
extensionsForLanguage :: Language -> [String]
121-
extensionsForLanguage language = case language of
122-
Go -> [".go"]
123-
Haskell -> [".hs"]
124-
JavaScript -> [".js", ".mjs"]
125-
PHP -> [".php", ".phpt"]
126-
Python -> [".py"]
127-
Ruby -> [".rb"]
128-
TypeScript -> [".ts"]
129-
TSX -> [".tsx", ".d.tsx"]
130-
JSX -> [".jsx"]
131-
_ -> []
132-
133-
-- | Return a language based on a FilePath's extension, or Nothing if extension is not found or not supported.
89+
extensionsForLanguage language = T.unpack <$> maybe mempty Lingo.languageExtensions (Map.lookup (languageToText language) Lingo.languages)
90+
91+
-- | Return a language based on a FilePath's extension.
13492
languageForFilePath :: FilePath -> Language
135-
languageForFilePath = languageForType . takeExtension
93+
languageForFilePath path = maybe Unknown (textToLanguage . Lingo.languageName) (Lingo.languageForPath path)
13694

13795
supportedExts :: [String]
138-
supportedExts = [".go", ".py", ".rb", ".js", ".mjs", ".ts", ".php", ".phpt"]
96+
supportedExts = foldr append mempty supportedLanguages
97+
where
98+
append (Just l) b = fmap T.unpack (Lingo.languageExtensions l) <> b
99+
append Nothing b = b
100+
supportedLanguages = fmap lookup (languageToText <$> codeNavLanguages)
101+
lookup k = Map.lookup k Lingo.languages
139102

140103
codeNavLanguages :: [Language]
141104
codeNavLanguages = [Go, Ruby, Python, JavaScript, TypeScript, PHP]
142105

143106
pathIsMinified :: FilePath -> Bool
144107
pathIsMinified = isExtensionOf ".min.js"
108+
109+
languageToText :: Language -> T.Text
110+
languageToText = \case
111+
Unknown -> "Unknown"
112+
Go -> "Go"
113+
Haskell -> "Haskell"
114+
Java -> "Java"
115+
JavaScript -> "JavaScript"
116+
JSON -> "JSON"
117+
JSX -> "JSX"
118+
Markdown -> "Markdown"
119+
Python -> "Python"
120+
Ruby -> "Ruby"
121+
TypeScript -> "TypeScript"
122+
TSX -> "TSX"
123+
PHP -> "PHP"
124+
125+
textToLanguage :: T.Text -> Language
126+
textToLanguage = \case
127+
"Go" -> Go
128+
"Haskell" -> Haskell
129+
"Java" -> Java
130+
"JavaScript" -> JavaScript
131+
"JSON" -> JSON
132+
"JSX" -> JSX
133+
"Markdown" -> Markdown
134+
"Python" -> Python
135+
"Ruby" -> Ruby
136+
"TypeScript" -> TypeScript
137+
"TSX" -> TSX
138+
"PHP" -> PHP
139+
_ -> Unknown

Diff for: src/Semantic/Api/Bridge.hs

+1-32
Original file line numberDiff line numberDiff line change
@@ -64,38 +64,7 @@ instance APIConvert Legacy.Span Data.Span where
6464
fromAPI Legacy.Span {..} = Data.Span <$> (start >>= preview bridging) <*> (end >>= preview bridging)
6565

6666
instance APIBridge T.Text Data.Language where
67-
bridging = iso apiLanguageToLanguage languageToApiLanguage where
68-
languageToApiLanguage :: Data.Language -> T.Text
69-
languageToApiLanguage = \case
70-
Data.Unknown -> "Unknown"
71-
Data.Go -> "Go"
72-
Data.Haskell -> "Haskell"
73-
Data.Java -> "Java"
74-
Data.JavaScript -> "JavaScript"
75-
Data.JSON -> "JSON"
76-
Data.JSX -> "JSX"
77-
Data.Markdown -> "Markdown"
78-
Data.Python -> "Python"
79-
Data.Ruby -> "Ruby"
80-
Data.TypeScript -> "TypeScript"
81-
Data.TSX -> "TSX"
82-
Data.PHP -> "PHP"
83-
84-
apiLanguageToLanguage :: T.Text -> Data.Language
85-
apiLanguageToLanguage = \case
86-
"Go" -> Data.Go
87-
"Haskell" -> Data.Haskell
88-
"Java" -> Data.Java
89-
"JavaScript" -> Data.JavaScript
90-
"JSON" -> Data.JSON
91-
"JSX" -> Data.JSX
92-
"Markdown" -> Data.Markdown
93-
"Python" -> Data.Python
94-
"Ruby" -> Data.Ruby
95-
"TypeScript" -> Data.TypeScript
96-
"TSX" -> Data.TSX
97-
"PHP" -> Data.PHP
98-
_ -> Data.Unknown
67+
bridging = iso Data.textToLanguage Data.languageToText
9968

10069
instance APIBridge API.Blob Data.Blob where
10170
bridging = iso apiBlobToBlob blobToApiBlob where

Diff for: src/Semantic/CLI.hs

+16-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import Control.Exception as Exc (displayException)
55
import Data.Blob
66
import Data.Blob.IO
77
import Data.Handle
8-
import Data.Language (languageForFilePath, parseLanguage)
8+
import qualified Data.Language as Language
99
import Data.List (intercalate, uncons)
1010
import Data.List.Split (splitWhen)
1111
import Data.Project
@@ -180,8 +180,22 @@ filePathReader = eitherReader parseFilePath
180180
parseFilePath arg = case splitWhen (== ':') arg of
181181
[a, b] | Just lang <- parseLanguage (T.pack b) -> Right (File a lang)
182182
| Just lang <- parseLanguage (T.pack a) -> Right (File b lang)
183-
[path] -> Right (File path (languageForFilePath path))
183+
[path] -> Right (File path (Language.languageForFilePath path))
184184
_ -> Left ("cannot parse `" <> arg <> "`\nexpecting FILE:LANGUAGE or just FILE")
185+
parseLanguage :: Text -> Maybe Language.Language
186+
parseLanguage l = case T.toLower l of
187+
"go" -> Just Language.Go
188+
"haskell" -> Just Language.Haskell
189+
"java" -> Just Language.Java
190+
"javascript" -> Just Language.JavaScript
191+
"json" -> Just Language.JSON
192+
"jsx" -> Just Language.JSX
193+
"markdown" -> Just Language.Markdown
194+
"python" -> Just Language.Python
195+
"ruby" -> Just Language.Ruby
196+
"typescript" -> Just Language.TypeScript
197+
"php" -> Just Language.PHP
198+
_ -> Nothing
185199

186200
options :: Eq a => [(String, a)] -> Mod OptionFields a -> Parser a
187201
options options fields = option (optionsReader options) (fields <> showDefaultWith (findOption options) <> metavar (intercalate "|" (fmap fst options)))

Diff for: test/Data/Language/Spec.hs

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
module Data.Language.Spec (testTree) where
2+
3+
import Data.Language
4+
import Test.Tasty
5+
import Test.Tasty.HUnit
6+
7+
testTree :: TestTree
8+
testTree = testGroup "Data.Language"
9+
[ testCase "supportedExts returns expected list" $
10+
supportedExts @=? [".go",".rb",".builder",".eye",".fcgi",".gemspec",".god",".jbuilder",".mspec",".pluginspec",".podspec",".rabl",".rake",".rbuild",".rbw",".rbx",".ru",".ruby",".spec",".thor",".watchr",".py",".bzl",".cgi",".fcgi",".gyp",".gypi",".lmi",".py3",".pyde",".pyi",".pyp",".pyt",".pyw",".rpy",".spec",".tac",".wsgi",".xpy",".js","._js",".bones",".es",".es6",".frag",".gs",".jake",".jsb",".jscad",".jsfl",".jsm",".jss",".mjs",".njs",".pac",".sjs",".ssjs",".xsjs",".xsjslib",".ts",".php",".aw",".ctp",".fcgi",".inc",".php3",".php4",".php5",".phps",".phpt"]
11+
, testCase "codeNavLanguages returns expected list" $
12+
codeNavLanguages @=? [Go, Ruby, Python, JavaScript, TypeScript, PHP]
13+
]

Diff for: test/Spec.hs

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import qualified Data.Abstract.Name.Spec
1414
import qualified Data.Abstract.Path.Spec
1515
import qualified Data.Functor.Classes.Generic.Spec
1616
import qualified Data.Graph.Spec
17+
import qualified Data.Language.Spec
1718
import qualified Data.Range.Spec
1819
import qualified Data.Scientific.Spec
1920
import qualified Data.Semigroup.App.Spec
@@ -46,6 +47,7 @@ tests :: (?session :: TaskSession) => [TestTree]
4647
tests =
4748
[ Integration.Spec.testTree
4849
, Semantic.CLI.Spec.testTree
50+
, Data.Language.Spec.testTree
4951
, Data.Source.Spec.testTree
5052
, Semantic.Stat.Spec.testTree
5153
]

0 commit comments

Comments
 (0)