diff options
author | evan <evan@evanw.org> | 2018-05-23 10:20:53 -0500 |
---|---|---|
committer | evan <evan@evanw.org> | 2018-05-23 10:21:08 -0500 |
commit | 9f425f990946798767c10078907bbba187707f9f (patch) | |
tree | 91207c1b8a74093caeddf772b7e6e75abb9bb6e1 /vendor/github.com/renstrom/fuzzysearch/fuzzy | |
parent | 55e36ad06a2721cf677b6dffd4d5af611a954f23 (diff) |
switch fuzzysearch to upstream
Diffstat (limited to 'vendor/github.com/renstrom/fuzzysearch/fuzzy')
-rw-r--r-- | vendor/github.com/renstrom/fuzzysearch/fuzzy/fuzzy.go | 176 | ||||
-rw-r--r-- | vendor/github.com/renstrom/fuzzysearch/fuzzy/levenshtein.go | 43 |
2 files changed, 219 insertions, 0 deletions
diff --git a/vendor/github.com/renstrom/fuzzysearch/fuzzy/fuzzy.go b/vendor/github.com/renstrom/fuzzysearch/fuzzy/fuzzy.go new file mode 100644 index 0000000..76b1c6e --- /dev/null +++ b/vendor/github.com/renstrom/fuzzysearch/fuzzy/fuzzy.go @@ -0,0 +1,176 @@ +// Fuzzy searching allows for flexibly matching a string with partial input, +// useful for filtering data very quickly based on lightweight user input. +package fuzzy + +import ( + "unicode" + "unicode/utf8" +) + +var noop = func(r rune) rune { return r } + +// Match returns true if source matches target using a fuzzy-searching +// algorithm. Note that it doesn't implement Levenshtein distance (see +// RankMatch instead), but rather a simplified version where there's no +// approximation. The method will return true only if each character in the +// source can be found in the target and occurs after the preceding matches. +func Match(source, target string) bool { + return match(source, target, noop) +} + +// MatchFold is a case-insensitive version of Match. +func MatchFold(source, target string) bool { + return match(source, target, unicode.ToLower) +} + +func match(source, target string, fn func(rune) rune) bool { + lenDiff := len(target) - len(source) + + if lenDiff < 0 { + return false + } + + if lenDiff == 0 && source == target { + return true + } + +Outer: + for _, r1 := range source { + for i, r2 := range target { + if fn(r1) == fn(r2) { + target = target[i+utf8.RuneLen(r2):] + continue Outer + } + } + return false + } + + return true +} + +// Find will return a list of strings in targets that fuzzy matches source. +func Find(source string, targets []string) []string { + return find(source, targets, noop) +} + +// FindFold is a case-insensitive version of Find. +func FindFold(source string, targets []string) []string { + return find(source, targets, unicode.ToLower) +} + +func find(source string, targets []string, fn func(rune) rune) []string { + var matches []string + + for _, target := range targets { + if match(source, target, fn) { + matches = append(matches, target) + } + } + + return matches +} + +// RankMatch is similar to Match except it will measure the Levenshtein +// distance between the source and the target and return its result. If there +// was no match, it will return -1. +// Given the requirements of match, RankMatch only needs to perform a subset of +// the Levenshtein calculation, only deletions need be considered, required +// additions and substitutions would fail the match test. +func RankMatch(source, target string) int { + return rank(source, target, noop) +} + +// RankMatchFold is a case-insensitive version of RankMatch. +func RankMatchFold(source, target string) int { + return rank(source, target, unicode.ToLower) +} + +func rank(source, target string, fn func(rune) rune) int { + lenDiff := len(target) - len(source) + + if lenDiff < 0 { + return -1 + } + + if lenDiff == 0 && source == target { + return 0 + } + + runeDiff := 0 + +Outer: + for _, r1 := range source { + for i, r2 := range target { + if fn(r1) == fn(r2) { + target = target[i+utf8.RuneLen(r2):] + continue Outer + } else { + runeDiff++ + } + } + return -1 + } + + // Count up remaining char + for len(target) > 0 { + target = target[utf8.RuneLen(rune(target[0])):] + runeDiff++ + } + + return runeDiff +} + +// RankFind is similar to Find, except it will also rank all matches using +// Levenshtein distance. +func RankFind(source string, targets []string) Ranks { + var r Ranks + + for index, target := range targets { + if match(source, target, noop) { + distance := LevenshteinDistance(source, target) + r = append(r, Rank{source, target, distance, index}) + } + } + return r +} + +// RankFindFold is a case-insensitive version of RankFind. +func RankFindFold(source string, targets []string) Ranks { + var r Ranks + + for index, target := range targets { + if match(source, target, unicode.ToLower) { + distance := LevenshteinDistance(source, target) + r = append(r, Rank{source, target, distance, index}) + } + } + return r +} + +type Rank struct { + // Source is used as the source for matching. + Source string + + // Target is the word matched against. + Target string + + // Distance is the Levenshtein distance between Source and Target. + Distance int + + // Location of Target in original list + OriginalIndex int +} + +type Ranks []Rank + +func (r Ranks) Len() int { + return len(r) +} + +func (r Ranks) Swap(i, j int) { + r[i], r[j] = r[j], r[i] +} + +func (r Ranks) Less(i, j int) bool { + return r[i].Distance < r[j].Distance +} diff --git a/vendor/github.com/renstrom/fuzzysearch/fuzzy/levenshtein.go b/vendor/github.com/renstrom/fuzzysearch/fuzzy/levenshtein.go new file mode 100644 index 0000000..237923d --- /dev/null +++ b/vendor/github.com/renstrom/fuzzysearch/fuzzy/levenshtein.go @@ -0,0 +1,43 @@ +package fuzzy + +// LevenshteinDistance measures the difference between two strings. +// The Levenshtein distance between two words is the minimum number of +// single-character edits (i.e. insertions, deletions or substitutions) +// required to change one word into the other. +// +// This implemention is optimized to use O(min(m,n)) space and is based on the +// optimized C version found here: +// http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#C +func LevenshteinDistance(s, t string) int { + r1, r2 := []rune(s), []rune(t) + column := make([]int, len(r1)+1) + + for y := 1; y <= len(r1); y++ { + column[y] = y + } + + for x := 1; x <= len(r2); x++ { + column[0] = x + + for y, lastDiag := 1, x-1; y <= len(r1); y++ { + oldDiag := column[y] + cost := 0 + if r1[y-1] != r2[x-1] { + cost = 1 + } + column[y] = min(column[y]+1, column[y-1]+1, lastDiag+cost) + lastDiag = oldDiag + } + } + + return column[len(r1)] +} + +func min(a, b, c int) int { + if a < b && a < c { + return a + } else if b < c { + return b + } + return c +} |