Leetcode 839. Similar String Groups

题目

839. Similar String Groups

  • Difficulty: Hard
  • Total Accepted: 4.7K
  • Total Submissions: 14K

Two strings X and Y are similar if we can swap two letters (in different positions) of X, so that it equals Y.

For example, "tars" and "rats" are similar (swapping at positions 0 and 2), and "rats" and "arts" are similar, but "star" is not similar to "tars", "rats", or "arts".

Together, these form two connected groups by similarity: {"tars", "rats", "arts"} and {"star"}. Notice that "tars" and "arts" are in the same group even though they are not similar. Formally, each group is such that a word is in the group if and only if it is similar to at least one other word in the group.

We are given a list A of strings. Every string in A is an anagram of every other string in A. How many groups are there?

Example 1:

Input: ["tars","rats","arts","star"]
Output: 2

Note:

  1. A.length <= 2000
  2. A[i].length <= 1000
  3. A.length * A[i].length <= 20000
  4. All words in A consist of lowercase letters only.
  5. All words in A have the same length and are anagrams of each other.
  6. The judging time limit has been increased for this question.

解题报告

AC 截图

题目大意

对于任意两个字符串,若其中一个能够由另一个字符串通过交换字符串中两个字符的位置来得到,那么就称二者 相似。我们将这种相似关系视作而两个字符串间有一条边连通。如此以来,我们构建了一个由字符串作为节点,以相似关系作为边的无向图。本题的问题就是该无向图中存在几个连通分量。

解题思路

此类连通性问题一般可以通过两种方法求解。一种是遍历,即是通过 深度优先搜索 或者 广度优先搜索 来发现每一个连通分量。另一种是通过 并查集 来构建连通分量。在这里,我们首先考虑通过遍历求解(我们选取的是深度优先搜索),但是很不幸……

超时题解

结果截图

C++ 代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
static const auto runfirst = []() {
std::ios::sync_with_stdio(false);
std::cin.tie(nullptr);
std::cout.tie(nullptr);
return nullptr;
}();

int group[2000];

class Solution {
public:
int size; // string size

int numSimilarGroups(vector<string>& A) {
if (A.empty()) return 0;
size = A[0].size();
return traverseMethod(A);
}

int traverseMethod(vector<string>& A) {
unordered_set<string> unvisitedStrings(A.begin(), A.end());
queue<string> toVisit;
int result = 0;

while (!unvisitedStrings.empty()) {
// Push the first string into the queue //
++result;
toVisit.push(*unvisitedStrings.begin());
unvisitedStrings.erase(unvisitedStrings.begin());

// Find the connected component //
while (!toVisit.empty()) {
// Get the front string //
string toTest(toVisit.front());
toVisit.pop();

// Traverse all possible results from toVisit.front() //
for (int i = 0; i < size; ++i) {
for (int t = i + 1; t < size; ++t) {
if (toTest[i] != toTest[t]) {
swap(toTest[i], toTest[t]);
if (unvisitedStrings.count(toTest)) {
toVisit.push(toTest);
unvisitedStrings.erase(toTest);
if (unvisitedStrings.empty()) return result;
}
swap(toTest[i], toTest[t]);
}
}
}
}
}

return result;
}

};

结果分析

遍历算法虽然简单,但是其时间复杂度非常高。以上面的实现为例,其时间复杂度高达 O(size * size * length * log(length)),对于像结果截图中那样超长的字符串显然就无能为力地超时了。因而我们考虑使用并查集方法:

题解1

结果截图

C++ 代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
static const auto runfirst = []() {
std::ios::sync_with_stdio(false);
std::cin.tie(nullptr);
std::cout.tie(nullptr);
return nullptr;
}();

int group[2000];

class Solution {
public:

int size; // string size
int length; // vector length

int numSimilarGroups(vector<string>& A) {
if (A.empty()) return 0;
size = A[0].size();
length = A.size();
return unionFindingMethod(A);
}

int findGroup(int index) {
if (group[index] == index) return index;
return group[index] = findGroup(group[index]);
}

bool isSimilar(const string& str1, const string& str2) {
int different = 0;

for (int i = 0; i < size; ++i) {
if (str1[i] != str2[i]) {
if (different > 1)
return false;
++different;
}
}

return true;
}

int unionFindingMethod(vector<string>& A) {
int result = 0;
length = A.size();
// int groupI, groupT;

// Initially, each node belongs to its own group //
for (int i = 0; i < length; ++i) group[i] = i;

for (int i = 0; i < length; ++i) {
for (int t = i + 1; t < length; ++t) {
// groupI = findGroup(i);
// groupT = findGroup(t);

if (isSimilar(A[i], A[t])) {
group[findGroup(i)] = findGroup(t);
}
}
}

for (int i = 0; i < length; ++i)
result += (group[i] == i);

return result;
}

};

结果分析

虽然并查集方法在规定的时间内成功通过了所有的测试样例,但是通过分析我们发现,其时间复杂度实际上是 O(length * length * size),因而相对于遍历方法,并查集方法更适合于较大 size 的情景。注意到题目中的 Note,我们知道 size * length <= 20000,因而对于较大的 size,其 length 则会较小,所以我们可以通过判断 size 的大小来决定使用哪种方法进行我们的查找。

题解2(最优解)

结果截图

C++ 代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
static const auto runfirst = []() {
std::ios::sync_with_stdio(false);
std::cin.tie(nullptr);
std::cout.tie(nullptr);
return nullptr;
}();

int group[2000];

class Solution {
public:

int size; // string size
int length; // vector length

int numSimilarGroups(vector<string>& A) {
if (A.empty()) return 0;
size = A[0].size();
length = A.size();
if (size < 20)
return traverseMethod(A);
else
return unionFindingMethod(A);
}

int findGroup(int index) {
if (group[index] == index) return index;
return group[index] = findGroup(group[index]);
}

bool isSimilar(const string& str1, const string& str2) {
int different = 0;

for (int i = 0; i < size; ++i) {
if (str1[i] != str2[i]) {
if (different > 1)
return false;
++different;
}
}

return true;
}

int unionFindingMethod(vector<string>& A) {
int result = 0;
length = A.size();
// int groupI, groupT;

// Initially, each node belongs to its own group //
for (int i = 0; i < length; ++i) group[i] = i;

for (int i = 0; i < length; ++i) {
for (int t = i + 1; t < length; ++t) {
// groupI = findGroup(i);
// groupT = findGroup(t);

if (isSimilar(A[i], A[t])) {
group[findGroup(i)] = findGroup(t);
}
}
}

for (int i = 0; i < length; ++i)
result += (group[i] == i);

return result;
}

int traverseMethod(vector<string>& A) {
unordered_set<string> unvisitedStrings(A.begin(), A.end());
queue<string> toVisit;
int result = 0;

while (!unvisitedStrings.empty()) {
// Push the first string into the queue //
++result;
toVisit.push(*unvisitedStrings.begin());
unvisitedStrings.erase(unvisitedStrings.begin());

// Find the connected component //
while (!toVisit.empty()) {
// Get the front string //
string toTest(toVisit.front());
toVisit.pop();

// Traverse all possible results from toVisit.front() //
for (int i = 0; i < size; ++i) {
for (int t = i + 1; t < size; ++t) {
if (toTest[i] != toTest[t]) {
swap(toTest[i], toTest[t]);
if (unvisitedStrings.count(toTest)) {
toVisit.push(toTest);
unvisitedStrings.erase(toTest);
if (unvisitedStrings.empty()) return result;
}
swap(toTest[i], toTest[t]);
}
}
}
}
}

return result;
}

};

结果分析

虽然我们对于 size 的判断一定程度上确定了 length 的大小,但是我们不能忽略 sizelength 都较小的情况。因而我们加入对 length 的判断,从而超越了原本的最优解:

Even Better

结果截图

C++ 代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
static const auto runfirst = []() {
std::ios::sync_with_stdio(false);
std::cin.tie(nullptr);
std::cout.tie(nullptr);
return nullptr;
}();

int group[2000];

class Solution {
public:

int size; // string size
int length; // vector length

int numSimilarGroups(vector<string>& A) {
if (A.empty()) return 0;
size = A[0].size();
length = A.size();
if (size > 19 || length < 50)
return unionFindingMethod(A);
else
return traverseMethod(A);
}

int findGroup(int index) {
if (group[index] == index) return index;
return group[index] = findGroup(group[index]);
}

bool isSimilar(const string& str1, const string& str2) {
int different = 0;

for (int i = 0; i < size; ++i) {
if (str1[i] != str2[i]) {
if (different > 1)
return false;
++different;
}
}

return true;
}

int unionFindingMethod(vector<string>& A) {
int result = 0;
length = A.size();

// Initially, each node belongs to its own group //
for (int i = 0; i < length; ++i) group[i] = i;

for (int i = 0; i < length; ++i) {
for (int t = i + 1; t < length; ++t) {
if (isSimilar(A[i], A[t])) {
group[findGroup(t)] = findGroup(i);
}
}
}

for (int i = 0; i < length; ++i)
result += (group[i] == i);

return result;
}

int traverseMethod(vector<string>& A) {
// Time complexity: O(size * size * length * log(length))
unordered_set<string> unvisitedStrings(A.begin(), A.end());
queue<string> toVisit;
int result = 0;

while (!unvisitedStrings.empty()) {
// Push the first string into the queue //
++result;
toVisit.push(*unvisitedStrings.begin());
unvisitedStrings.erase(unvisitedStrings.begin());

// Find the connected component //
while (!toVisit.empty()) {
// Get the front string //
string toTest(toVisit.front());
toVisit.pop();

// Traverse all possible results from toVisit.front() //
for (int i = 0; i < size; ++i) {
for (int t = i + 1; t < size; ++t) {
if (toTest[i] != toTest[t]) {
swap(toTest[i], toTest[t]);
if (unvisitedStrings.count(toTest)) {
toVisit.push(toTest);
unvisitedStrings.erase(toTest);
if (unvisitedStrings.empty()) return result;
}
swap(toTest[i], toTest[t]);
}
}
}
}
}

return result;
}

};
Your browser is out-of-date!

Update your browser to view this website correctly. Update my browser now

×