11"""Utilities for manipulating git."""
22
3+ import copy
4+ import dataclasses
35import filecmp
46import fnmatch
57import io
1012import shutil
1113import subprocess
1214import sys
13- from typing import List , Sequence , Union
15+ from typing import Dict , List , Optional , Sequence , Union
1416
1517import git
1618
2628
2729from git_theta import async_utils
2830
31+ # These are the git attributes that git-theta currently uses to manage checked-in
32+ # files. Defined as a variable in case extra functionality ever requires more
33+ # attributes.
34+ THETA_ATTRIBUTES = ("filter" , "merge" , "diff" )
35+
2936
3037def get_git_repo ():
3138 """
@@ -107,7 +114,26 @@ def get_gitattributes_file(repo):
107114 return os .path .join (repo .working_dir , ".gitattributes" )
108115
109116
110- def read_gitattributes (gitattributes_file ):
117+ @dataclasses .dataclass
118+ class GitAttributes :
119+ """Git attributes for a file that matches pattern."""
120+
121+ pattern : str
122+ attributes : Dict [str , str ]
123+ raw : Optional [str ] = None
124+
125+ def __str__ (self ):
126+ if self .raw :
127+ return self .raw
128+ attrs = " " .join (f"{ k } ={ v } " if v else k for k , v in self .attributes .items ())
129+ return f"{ self .pattern } { attrs } "
130+
131+ def __eq__ (self , o ):
132+ raw_eq = self .raw == o .raw if self .raw and o .raw else True
133+ return self .pattern == o .pattern and self .attributes == o .attributes and raw_eq
134+
135+
136+ def read_gitattributes (gitattributes_file ) -> List [GitAttributes ]:
111137 """
112138 Read contents of this repo's .gitattributes file
113139
@@ -123,14 +149,33 @@ def read_gitattributes(gitattributes_file):
123149 """
124150 if os .path .exists (gitattributes_file ):
125151 with open (gitattributes_file , "r" ) as f :
126- return [line .rstrip ("\n " ) for line in f ]
152+ return [parse_gitattributes ( line .rstrip ("\n " ) ) for line in f ]
127153 else :
128154 return []
129155
130156
157+ def parse_gitattributes (gitattributes : str ) -> GitAttributes :
158+ # TODO: Fix for escaped patterns
159+ pattern , * attributes = gitattributes .split (" " )
160+ attrs = {}
161+ # Overwrite as we go to get the LAST attribute behavior
162+ for attribute in attributes :
163+ if "=" in attribute :
164+ key , value = attribute .split ("=" )
165+ # TODO: Update to handle unsetting attributes like "-diff". Currently we
166+ # just copy then as keys for printing but don't check their semantics,
167+ # for example a file with an unset diff does currently throw an error
168+ # when adding git-theta tracking.
169+ else :
170+ key = attribute
171+ value = None
172+ attrs [key ] = value
173+ return GitAttributes (pattern , attrs , gitattributes )
174+
175+
131176@file_or_name (gitattributes_file = "w" )
132177def write_gitattributes (
133- gitattributes_file : Union [str , io .FileIO ], attributes : List [str ]
178+ gitattributes_file : Union [str , io .FileIO ], attributes : List [GitAttributes ]
134179):
135180 """
136181 Write list of attributes to this repo's .gitattributes file
@@ -143,60 +188,112 @@ def write_gitattributes(
143188 attributes:
144189 Attributes to write to .gitattributes
145190 """
146- gitattributes_file .write ("\n " .join (attributes ))
191+ gitattributes_file .write ("\n " .join (map ( str , attributes ) ))
147192 # End file with newline.
148193 gitattributes_file .write ("\n " )
149194
150195
151- def add_theta_to_gitattributes (gitattributes : List [str ], path : str ) -> str :
152- """Add a filter=theta that covers file_name.
196+ def add_theta_to_gitattributes (
197+ gitattributes : List [GitAttributes ],
198+ path : str ,
199+ theta_attributes : Sequence [str ] = THETA_ATTRIBUTES ,
200+ ) -> List [GitAttributes ]:
201+ """Add git attributes required by git-theta for path.
202+
203+ If there is a pattern that covers the current file that applies the git-theta
204+ attributes, no new pattern is added. If there is a pattern that covers the
205+ current file and sets attributes used by git-theta an error is raised. If
206+ there is a pattern that sets non-overlapping attributes they are copied into
207+ a new path-specific pattern. If there is no match, a new path-specific
208+ pattern is always created.
153209
154210 Parameters
155211 ----------
156- gitattributes: A list of the lines from the gitattribute files .
212+ gitattributes: A list of parsed git attribute entries .
157213 path: The path to the model we are adding a filter to.
158214
215+ Raises
216+ ------
217+ ValueError
218+ `path` is covered by an active git attributes entry that sets merge,
219+ filter, or diff to a value other than "theta".
220+
159221 Returns
160222 -------
161- List[str ]
162- The lines to write to the new gitattribute file with a (possibly) new
163- filter=theta added that covers the given file .
223+ List[GitAttributes ]
224+ The git attributes write to the new gitattribute file with a (possibly)
225+ new ( filter|merge|diff) =theta added that covers `path` .
164226 """
165- pattern_found = False
166- new_gitattributes = []
167- for line in gitattributes :
168- # TODO(bdlester): Revisit this regex to see if it when the pattern
169- # is escaped due to having spaces in it.
170- match = re .match (r"^\s*(?P<pattern>[^\s]+)\s+(?P<attributes>.*)$" , line )
171- if match :
172- # If there is already a pattern that covers the file, add the filter
173- # to that.
174- if fnmatch .fnmatchcase (path , match .group ("pattern" )):
175- pattern_found = True
176- if not "filter=theta" in match .group ("attributes" ):
177- line = f"{ line .rstrip ()} filter=theta"
178- if not "merge=theta" in match .group ("attributes" ):
179- line = f"{ line .rstrip ()} merge=theta"
180- if not "diff=theta" in match .group ("attributes" ):
181- line = f"{ line .rstrip ()} diff=theta"
182- new_gitattributes .append (line )
183- # If we don't find a matching pattern, add a new line that covers just this
184- # specific file.
185- if not pattern_found :
186- new_gitattributes .append (f"{ path } filter=theta merge=theta diff=theta" )
187- return new_gitattributes
188-
189-
190- def get_gitattributes_tracked_patterns (gitattributes_file ):
227+ previous_attribute = None
228+ # Find if an active gitattribute entry applies to path
229+ for gitattribute in gitattributes [::- 1 ]:
230+ if fnmatch .fnmatchcase (path , gitattribute .pattern ):
231+ previous_attribute = gitattribute
232+ break
233+ # If path is already managed by a git attributes entry.
234+ if previous_attribute :
235+ # If all of the theta attributes are set, we don't do anything.
236+ if all (
237+ previous_attribute .attributes .get (attr ) == "theta"
238+ for attr in theta_attributes
239+ ):
240+ return gitattributes
241+ # If any of the attributes theta uses is set to something else, error out.
242+ if any (
243+ attr in previous_attribute .attributes
244+ and previous_attribute .attributes [attr ] != "theta"
245+ for attr in theta_attributes
246+ ):
247+ raise ValueError (
248+ f"Git Attributes used by git-theta are already set for { path } . "
249+ f"Found filter={ previous_attribute .attributes .get ('filter' )} , "
250+ f"diff={ previous_attribute .attributes .get ('diff' )} , "
251+ f"merge={ previous_attribute .attributes .get ('merge' )} ."
252+ )
253+ # If the old entry set other attributes, make sure they are preserved.
254+ attributes = (
255+ copy .deepcopy (previous_attribute .attributes ) if previous_attribute else {}
256+ )
257+ for attr in theta_attributes :
258+ attributes [attr ] = "theta"
259+ new_attribute = GitAttributes (path , attributes )
260+ gitattributes .append (new_attribute )
261+ return gitattributes
262+
263+
264+ def get_gitattributes_tracked_patterns (
265+ gitattributes_file , theta_attributes : Sequence [str ] = THETA_ATTRIBUTES
266+ ):
191267 gitattributes = read_gitattributes (gitattributes_file )
192268 theta_attributes = [
193- attribute for attribute in gitattributes if "filter=theta" in attribute
269+ attr
270+ for attr in gitattributes
271+ if attr .attributes .get (a ) == "theta"
272+ for a in theta_attributes
194273 ]
274+ return [attr .pattern for attr in theta_attributes ]
195275 # TODO: Correctly handle patterns with escaped spaces in them
196276 patterns = [attribute .split (" " )[0 ] for attribute in theta_attributes ]
197277 return patterns
198278
199279
280+ def is_theta_tracked (
281+ path : str ,
282+ gitattributes : List [GitAttributes ],
283+ theta_attributes : Sequence [str ] = THETA_ATTRIBUTES ,
284+ ) -> bool :
285+ """Check if `path` is tracked by git-theta based on `.gitattributes`.
286+
287+ Note: The last line that matches in .gitattributes is the active one so
288+ start from the end. If the first match (really last) does not have the
289+ theta filter active then the file is not tracked by Git-Theta.
290+ """
291+ for attr in gitattributes [::- 1 ]:
292+ if fnmatch .fnmatchcase (path , attr .pattern ):
293+ return all (attr .attributes .get (a ) == "theta" for a in theta_attributes )
294+ return False
295+
296+
200297def add_file (f , repo ):
201298 """
202299 Add file to git staging area
0 commit comments