Coverage for tests / test_duplication / test_core.py: 100%
221 statements
« prev ^ index » next coverage.py v7.13.3, created at 2026-02-08 15:04 -0800
« prev ^ index » next coverage.py v7.13.3, created at 2026-02-08 15:04 -0800
1"""Tests for AST-based code duplication detection (Type 1 & Type 2)."""
3from analyzers.python import analyze_source as analyze_py
4from analyzers.typescript import analyze_source as analyze_ts
5from analyzers.go import analyze_source as analyze_go
6from metrics.duplication.core import find_duplicates
9# ── Python helpers ────────────────────────────────────────────────────
12def _py_hashes(code: str):
13 """Return (type1_hash, type2_hash) of the first function in Python code."""
14 result = analyze_py(code)
15 assert result.functions, f"No functions found in:\n{code}"
16 f = result.functions[0]
17 return f.type1_hash, f.type2_hash
20# ── Type 1: Exact clones ─────────────────────────────────────────────
23def test_py_identical_functions_same_type1():
24 code = """
25def foo(x, y):
26 if x > y:
27 return x
28 return y
29"""
30 h1a, _ = _py_hashes(code)
31 h1b, _ = _py_hashes(code)
32 assert h1a is not None
33 assert h1a == h1b
36def test_py_comments_ignored_type1():
37 code_a = """
38def foo(x, y):
39 if x > y:
40 return x
41 return y
42"""
43 code_b = """
44def foo(x, y):
45 # this is a comment
46 if x > y:
47 return x # inline comment
48 return y
49"""
50 h1a, _ = _py_hashes(code_a)
51 h1b, _ = _py_hashes(code_b)
52 assert h1a is not None
53 assert h1a == h1b
56def test_py_different_names_different_type1():
57 code_a = """
58def foo(x, y):
59 if x > y:
60 return x
61 return y
62"""
63 code_b = """
64def bar(a, b):
65 if a > b:
66 return a
67 return b
68"""
69 h1a, _ = _py_hashes(code_a)
70 h1b, _ = _py_hashes(code_b)
71 assert h1a is not None and h1b is not None
72 assert h1a != h1b
75def test_py_docstrings_ignored_type1():
76 code_a = """
77def foo(x, y):
78 if x > y:
79 return x
80 return y
81"""
82 code_b = """
83def foo(x, y):
84 \"\"\"Return the larger value.\"\"\"
85 if x > y:
86 return x
87 return y
88"""
89 h1a, _ = _py_hashes(code_a)
90 h1b, _ = _py_hashes(code_b)
91 assert h1a is not None
92 assert h1a == h1b
95def test_py_docstrings_ignored_type2():
96 code_a = """
97def foo(x, y):
98 if x > y:
99 return x
100 return y
101"""
102 code_b = """
103def bar(a, b):
104 \"\"\"Return the larger value.\"\"\"
105 if a > b:
106 return a
107 return b
108"""
109 _, h2a = _py_hashes(code_a)
110 _, h2b = _py_hashes(code_b)
111 assert h2a is not None
112 assert h2a == h2b
115def test_py_comments_ignored_type2():
116 code_a = """
117def foo(x, y):
118 if x > y:
119 return x
120 return y
121"""
122 code_b = """
123def foo(x, y):
124 # this is a comment
125 if x > y:
126 return x # inline comment
127 return y
128"""
129 _, h2a = _py_hashes(code_a)
130 _, h2b = _py_hashes(code_b)
131 assert h2a is not None
132 assert h2a == h2b
135# ── Type 2: Parametric clones ────────────────────────────────────────
138def test_py_renamed_identifiers_same_type2():
139 code_a = """
140def foo(x, y):
141 if x > y:
142 return x
143 return y
144"""
145 code_b = """
146def bar(a, b):
147 if a > b:
148 return a
149 return b
150"""
151 _, h2a = _py_hashes(code_a)
152 _, h2b = _py_hashes(code_b)
153 assert h2a is not None and h2b is not None
154 assert h2a == h2b
157def test_py_different_literals_same_type2():
158 code_a = """
159def foo(x):
160 if x > 10:
161 return 20
162 return 30
163"""
164 code_b = """
165def foo(x):
166 if x > 99:
167 return 200
168 return 300
169"""
170 _, h2a = _py_hashes(code_a)
171 _, h2b = _py_hashes(code_b)
172 assert h2a is not None and h2b is not None
173 assert h2a == h2b
176def test_py_structurally_different_functions():
177 code_a = """
178def foo(x, y):
179 if x > y:
180 return x
181 return y
182"""
183 code_b = """
184def foo(x, y, z):
185 while x > y:
186 x = x - z
187 return x
188"""
189 h1a, h2a = _py_hashes(code_a)
190 h1b, h2b = _py_hashes(code_b)
191 assert h1a != h1b
192 assert h2a != h2b
195# ── Trivial functions ────────────────────────────────────────────────
198def test_py_trivial_function_returns_none():
199 code = """
200def foo():
201 pass
202"""
203 h1, h2 = _py_hashes(code)
204 assert h1 is None
205 assert h2 is None
208# ── TypeScript ────────────────────────────────────────────────────────
211def _ts_hashes(code: str):
212 """Return (type1_hash, type2_hash) of the first function in TypeScript code."""
213 result = analyze_ts(code)
214 assert result.functions, f"No functions found in:\n{code}"
215 f = result.functions[0]
216 return f.type1_hash, f.type2_hash
219def test_ts_identical_functions_same_type1():
220 code = """
221function max(x: number, y: number): number {
222 if (x > y) {
223 return x;
224 }
225 return y;
226}
227"""
228 h1a, _ = _ts_hashes(code)
229 h1b, _ = _ts_hashes(code)
230 assert h1a is not None
231 assert h1a == h1b
234def test_ts_renamed_identifiers_same_type2():
235 code_a = """
236function max(x: number, y: number): number {
237 if (x > y) {
238 return x;
239 }
240 return y;
241}
242"""
243 code_b = """
244function biggest(a: number, b: number): number {
245 if (a > b) {
246 return a;
247 }
248 return b;
249}
250"""
251 h1a, h2a = _ts_hashes(code_a)
252 h1b, h2b = _ts_hashes(code_b)
253 assert h1a != h1b
254 assert h2a is not None and h2b is not None
255 assert h2a == h2b
258def test_ts_different_literals_same_type2():
259 code_a = """
260function check(x: number): number {
261 if (x > 10) {
262 return 20;
263 }
264 return 30;
265}
266"""
267 code_b = """
268function check(x: number): number {
269 if (x > 99) {
270 return 200;
271 }
272 return 300;
273}
274"""
275 _, h2a = _ts_hashes(code_a)
276 _, h2b = _ts_hashes(code_b)
277 assert h2a is not None and h2b is not None
278 assert h2a == h2b
281def test_ts_trivial_function_returns_none():
282 code = """
283function noop() {}
284"""
285 h1, h2 = _ts_hashes(code)
286 assert h1 is None
287 assert h2 is None
290# ── Go ────────────────────────────────────────────────────────────────
293def _go_hashes(code: str):
294 """Return (type1_hash, type2_hash) of the first function in Go code."""
295 result = analyze_go(code)
296 assert result.functions, f"No functions found in:\n{code}"
297 f = result.functions[0]
298 return f.type1_hash, f.type2_hash
301def test_go_identical_functions_same_type1():
302 code = """
303package main
305func max(x int, y int) int {
306 if x > y {
307 return x
308 }
309 return y
310}
311"""
312 h1a, _ = _go_hashes(code)
313 h1b, _ = _go_hashes(code)
314 assert h1a is not None
315 assert h1a == h1b
318def test_go_renamed_identifiers_same_type2():
319 code_a = """
320package main
322func max(x int, y int) int {
323 if x > y {
324 return x
325 }
326 return y
327}
328"""
329 code_b = """
330package main
332func biggest(a int, b int) int {
333 if a > b {
334 return a
335 }
336 return b
337}
338"""
339 h1a, h2a = _go_hashes(code_a)
340 h1b, h2b = _go_hashes(code_b)
341 assert h1a != h1b
342 assert h2a is not None and h2b is not None
343 assert h2a == h2b
346def test_go_different_literals_same_type2():
347 code_a = """
348package main
350func check(x int) int {
351 if x > 10 {
352 return 20
353 }
354 return 30
355}
356"""
357 code_b = """
358package main
360func check(x int) int {
361 if x > 99 {
362 return 200
363 }
364 return 300
365}
366"""
367 _, h2a = _go_hashes(code_a)
368 _, h2b = _go_hashes(code_b)
369 assert h2a is not None and h2b is not None
370 assert h2a == h2b
373def test_go_trivial_function_returns_none():
374 code = """
375package main
377func noop() {}
378"""
379 h1, h2 = _go_hashes(code)
380 assert h1 is None
381 assert h2 is None
384# ── find_duplicates integration ──────────────────────────────────────
387def test_find_duplicates_groups_type1():
388 code = """
389def foo(x, y):
390 if x > y:
391 return x
392 return y
394def bar(x, y):
395 if x > y:
396 return x
397 return y
398"""
399 result = analyze_py(code)
400 result.path = "test.py"
401 duplicates = find_duplicates([result])
402 type1 = [c for c in duplicates if c.dup_type == 1]
403 assert len(type1) == 1
404 assert len(type1[0].members) == 2
407def test_find_duplicates_groups_type2_only():
408 code = """
409def foo(x, y):
410 if x > y:
411 return x
412 return y
414def bar(a, b):
415 if a > b:
416 return a
417 return b
418"""
419 result = analyze_py(code)
420 result.path = "test.py"
421 duplicates = find_duplicates([result])
422 type1 = [c for c in duplicates if c.dup_type == 1]
423 type2 = [c for c in duplicates if c.dup_type == 2]
424 assert len(type1) == 0
425 assert len(type2) == 1
426 assert len(type2[0].members) == 2
429def test_find_duplicates_type1_suppresses_type2():
430 """When two functions are exact clones (Type 1), they shouldn't also appear as Type 2."""
431 code = """
432def foo(x, y):
433 if x > y:
434 return x
435 return y
437def bar(x, y):
438 if x > y:
439 return x
440 return y
441"""
442 result = analyze_py(code)
443 result.path = "test.py"
444 duplicates = find_duplicates([result])
445 type1 = [c for c in duplicates if c.dup_type == 1]
446 type2 = [c for c in duplicates if c.dup_type == 2]
447 assert len(type1) == 1
448 assert len(type2) == 0
451def test_find_duplicates_no_clusters_for_unique():
452 code = """
453def foo(x, y):
454 if x > y:
455 return x
456 return y
458def bar(x, y, z):
459 while x > y:
460 x = x - z
461 return x
462"""
463 result = analyze_py(code)
464 result.path = "test.py"
465 duplicates = find_duplicates([result])
466 assert len(duplicates) == 0
469def test_find_duplicates_type2_excludes_1_liners():
470 """Type 2 duplications should exclude 1-liners from counting as duplicates."""
471 code = """
472def foo(x):
473 return x
475def bar(x):
476 return x
477"""
478 result = analyze_py(code)
479 result.path = "test.py"
480 duplicates = find_duplicates([result])
481 type2 = [c for c in duplicates if c.dup_type == 2]
482 assert len(type2) == 0
485def test_find_duplicates_type2_detects_2_liners():
486 """Type 2 duplications should still detect functions with 2+ lines as duplicates."""
487 code = """
488def foo(x, y):
489 if x > y:
490 return x
491 return y
493def bar(a, b):
494 if a > b:
495 return a
496 return b
497"""
498 result = analyze_py(code)
499 result.path = "test.py"
500 duplicates = find_duplicates([result])
501 type2 = [c for c in duplicates if c.dup_type == 2]
502 assert len(type2) == 1
503 assert len(type2[0].members) == 2
506def test_find_duplicates_type2_excludes_multi_line_single_statement():
507 """Multi-line split of a single statement should still be filtered as 1-liner."""
508 code = """
509def foo(x, y):
510 return (
511 x + y + x + y + 1
512 )
514def bar(a, b):
515 return (
516 a + b + a + b + 1
517 )
518"""
519 result = analyze_py(code)
520 result.path = "test.py"
521 duplicates = find_duplicates([result])
522 type2 = [c for c in duplicates if c.dup_type == 2]
523 assert len(type2) == 0
526def test_go_statement_counting():
527 """Type 2 detection should work correctly for Go with statement_list."""
528 code = """
529package main
531func max(x int, y int) int {
532 if x > y {
533 return x
534 }
535 return y
536}
538func min(a int, b int) int {
539 if a < b {
540 return a
541 }
542 return b
543}
544"""
545 result = analyze_go(code)
546 result.path = "test.go"
547 duplicates = find_duplicates([result])
548 type2 = [c for c in duplicates if c.dup_type == 2]
549 assert len(type2) == 1
550 assert len(type2[0].members) == 2
553def test_ts_statement_counting():
554 """Type 2 detection should work correctly for TypeScript."""
555 code = """
556function max(x: number, y: number): number {
557 if (x > y) {
558 return x;
559 }
560 return y;
561}
563function min(a: number, b: number): number {
564 if (a < b) {
565 return a;
566 }
567 return b;
568}
569"""
570 result = analyze_ts(code)
571 result.path = "test.ts"
572 duplicates = find_duplicates([result])
573 type2 = [c for c in duplicates if c.dup_type == 2]
574 assert len(type2) == 1
575 assert len(type2[0].members) == 2
578def test_find_duplicates_cross_file():
579 code_a = """
580def foo(x, y):
581 if x > y:
582 return x
583 return y
584"""
585 code_b = """
586def bar(a, b):
587 if a > b:
588 return a
589 return b
590"""
591 result_a = analyze_py(code_a)
592 result_a.path = "file_a.py"
593 result_b = analyze_py(code_b)
594 result_b.path = "file_b.py"
595 duplicates = find_duplicates([result_a, result_b])
596 type2 = [c for c in duplicates if c.dup_type == 2]
597 assert len(type2) == 1
598 files = {fpath for fpath, _ in type2[0].members}
599 assert files == {"file_a.py", "file_b.py"}