Refactor `Linguist::Repository` to isolate Rugged usage by vdye · Pull Request #7094 · github-linguist/linguist · GitHub
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions lib/linguist/lazy_blob.rb
31 changes: 18 additions & 13 deletions lib/linguist/repository.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
require 'linguist/lazy_blob'
require 'rugged'
require 'linguist/source/repository'
require 'linguist/source/rugged'

module Linguist
# A Repository is an abstraction of a Grit::Repo or a basic file
Expand All @@ -23,14 +24,19 @@ def self.incremental(repo, commit_oid, old_commit_oid, old_stats, max_tree_size
# Public: Initialize a new Repository to be analyzed for language
# data
#
# repo - a Rugged::Repository object
# repo - a Linguist::Source::Repository object
# commit_oid - the sha1 of the commit that will be analyzed;
# this is usually the master branch
# max_tree_size - the maximum tree size to consider for analysis (default: MAX_TREE_SIZE)
#
# Returns a Repository
def initialize(repo, commit_oid, max_tree_size = MAX_TREE_SIZE)
@repository = repo
@repository = if repo.is_a? Linguist::Source::Repository
repo
else
# Allow this for backward-compatibility purposes
Linguist::Source::RuggedRepository.new(repo)
end
@commit_oid = commit_oid
@max_tree_size = max_tree_size

Expand Down Expand Up @@ -123,26 +129,25 @@ def cache
end

def read_index
attr_index = Rugged::Index.new
attr_index.read_tree(current_tree)
repository.index = attr_index
raise NotImplementedError, "read_index is deprecated" unless repository.is_a? Linguist::Source::RuggedRepository
repository.set_attribute_source(@commit_oid)
end

def current_tree
@tree ||= Rugged::Commit.lookup(repository, @commit_oid).tree
raise NotImplementedError, "current_tree is deprecated" unless repository.is_a? Linguist::Source::RuggedRepository
repository.get_tree(@commit_oid)
end

protected
def compute_stats(old_commit_oid, cache = nil)
return {} if current_tree.count_recursive(@max_tree_size) >= @max_tree_size
return {} if repository.get_tree_size(@commit_oid, @max_tree_size) >= @max_tree_size

old_tree = old_commit_oid && Rugged::Commit.lookup(repository, old_commit_oid).tree
read_index
diff = Rugged::Tree.diff(repository, old_tree, current_tree)
repository.set_attribute_source(@commit_oid)
diff = repository.diff(old_commit_oid, @commit_oid)

# Clear file map and fetch full diff if any .gitattributes files are changed
if cache && diff.each_delta.any? { |delta| File.basename(delta.new_file[:path]) == ".gitattributes" }
diff = Rugged::Tree.diff(repository, old_tree = nil, current_tree)
diff = repository.diff(nil, @commit_oid)
file_map = {}
else
file_map = cache ? cache.dup : {}
Expand All @@ -153,7 +158,7 @@ def compute_stats(old_commit_oid, cache = nil)
new = delta.new_file[:path]

file_map.delete(old)
next if delta.binary
next if delta.binary?

if [:added, :modified].include? delta.status
# Skip submodules and symlinks
Expand Down
72 changes: 72 additions & 0 deletions lib/linguist/source/diff.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
require 'linguist/generated'
require 'cgi'
require 'charlock_holmes'
require 'mini_mime'
require 'yaml'

module Linguist
module Source
# Diff is an interface representing a diff between two trees. It is composed
# of a collection of iterable deltas between before/after states of files.
class Diff
# A Delta represents a single file's before/after state in a diff.
class Delta
# Public: get the status of the file's "after" state as compared to
# "before". Valid status values include:
#
# - :added
# - :deleted
# - :modified
# - :renamed
# - :copied
# - :ignored
# - :untracked
# - :typechange
#
# Returns the status.
def status
raise NotImplementedError
end

# Public: determine whether the file delta is binary.
#
# Returns true if the delta is binary, false otherwise.
def binary?
raise NotImplementedError
end

# Public: get the metadata of the "before" file in the delta. The
# metadata is represented as a Hash with the keys:
#
# - :path (string)
# - :oid (string)
# - :mode (integer)
#
# Returns the entry metadata hash.
def old_file
raise NotImplementedError
end

# Public: get the metadata of the "after" file in the delta. The
# metadata is represented as a Hash with the keys:
#
# - :path (string)
# - :oid (string)
# - :mode (integer)
#
# Returns the entry metadata hash.
def new_file
raise NotImplementedError
end
end

# Public: iterate through each delta of the given diff. Yields a single
# delta to the given block.
#
# Returns nothing.
def each_delta
raise NotImplementedError
end
end
end
end
64 changes: 64 additions & 0 deletions lib/linguist/source/repository.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
module Linguist
module Source
# Repository is an interface for providing direct access to functionality in
# a repository of files whose contents can be scanned for language
# information.
class Repository
# Public: get the number of entries in the root tree of the given commit,
# with an optional maximum value.
#
# commit_id - the string unique identifier of the commit to analyze.
# limit - (Optional) the integer maximum number of tree entries to
# count.
#
# Returns the number of entries in the tree or 'limit', whichever is
# smaller.
def get_tree_size(commit_id, limit = nil)
raise NotImplementedError
end

# Public: set the commit whose .gitattributes file(s) should be used as
# the source of attribute information in 'load_attributes_for_path'.
#
# commit_id - the string unique identifier of the attribute source commit.
#
# Returns nothing.
def set_attribute_source(commit_id)
raise NotImplementedError
end

# Public: read the data and size information for the specified file blob.
#
# blob_id - the string unique identifier of the blob to read.
# max_size - the integer maximum size in bytes to read from the blob.
#
# Returns the (possibly truncated) byte string of blob content and
# the full, untruncated size of the blob.
def load_blob(blob_id, max_size)
raise NotImplementedError
end

# Public: look up the attribute values for a given path.
#
# path - the path for which we want attribute values.
# attr_names - the attributes to read for the given path.
#
# Returns a Hash mapping attribute names to their corresponding values.
def load_attributes_for_path(path, attr_names)
raise NotImplementedError
end

# Public: compute the diff between the given old and new commits.
#
# old_commit - the string unique identifier of the "before" state of the
# diff, or nil (representing an empty tree).
# new_commit - the string unique identifier of the "after" state of the
# diff, or nil (representing an empty tree).
#
# Returns a Source::Diff.
def diff(old_commit, new_commit)
raise NotImplementedError
end
end
end
end
95 changes: 95 additions & 0 deletions lib/linguist/source/rugged.rb
Loading