Coverage for tests / test_duplication / test_core.py: 100%

221 statements  

« prev     ^ index     » next       coverage.py v7.13.3, created at 2026-02-08 15:04 -0800

1"""Tests for AST-based code duplication detection (Type 1 & Type 2).""" 

2 

3from analyzers.python import analyze_source as analyze_py 

4from analyzers.typescript import analyze_source as analyze_ts 

5from analyzers.go import analyze_source as analyze_go 

6from metrics.duplication.core import find_duplicates 

7 

8 

9# ── Python helpers ──────────────────────────────────────────────────── 

10 

11 

12def _py_hashes(code: str): 

13 """Return (type1_hash, type2_hash) of the first function in Python code.""" 

14 result = analyze_py(code) 

15 assert result.functions, f"No functions found in:\n{code}" 

16 f = result.functions[0] 

17 return f.type1_hash, f.type2_hash 

18 

19 

20# ── Type 1: Exact clones ───────────────────────────────────────────── 

21 

22 

23def test_py_identical_functions_same_type1(): 

24 code = """ 

25def foo(x, y): 

26 if x > y: 

27 return x 

28 return y 

29""" 

30 h1a, _ = _py_hashes(code) 

31 h1b, _ = _py_hashes(code) 

32 assert h1a is not None 

33 assert h1a == h1b 

34 

35 

36def test_py_comments_ignored_type1(): 

37 code_a = """ 

38def foo(x, y): 

39 if x > y: 

40 return x 

41 return y 

42""" 

43 code_b = """ 

44def foo(x, y): 

45 # this is a comment 

46 if x > y: 

47 return x # inline comment 

48 return y 

49""" 

50 h1a, _ = _py_hashes(code_a) 

51 h1b, _ = _py_hashes(code_b) 

52 assert h1a is not None 

53 assert h1a == h1b 

54 

55 

56def test_py_different_names_different_type1(): 

57 code_a = """ 

58def foo(x, y): 

59 if x > y: 

60 return x 

61 return y 

62""" 

63 code_b = """ 

64def bar(a, b): 

65 if a > b: 

66 return a 

67 return b 

68""" 

69 h1a, _ = _py_hashes(code_a) 

70 h1b, _ = _py_hashes(code_b) 

71 assert h1a is not None and h1b is not None 

72 assert h1a != h1b 

73 

74 

75def test_py_docstrings_ignored_type1(): 

76 code_a = """ 

77def foo(x, y): 

78 if x > y: 

79 return x 

80 return y 

81""" 

82 code_b = """ 

83def foo(x, y): 

84 \"\"\"Return the larger value.\"\"\" 

85 if x > y: 

86 return x 

87 return y 

88""" 

89 h1a, _ = _py_hashes(code_a) 

90 h1b, _ = _py_hashes(code_b) 

91 assert h1a is not None 

92 assert h1a == h1b 

93 

94 

95def test_py_docstrings_ignored_type2(): 

96 code_a = """ 

97def foo(x, y): 

98 if x > y: 

99 return x 

100 return y 

101""" 

102 code_b = """ 

103def bar(a, b): 

104 \"\"\"Return the larger value.\"\"\" 

105 if a > b: 

106 return a 

107 return b 

108""" 

109 _, h2a = _py_hashes(code_a) 

110 _, h2b = _py_hashes(code_b) 

111 assert h2a is not None 

112 assert h2a == h2b 

113 

114 

115def test_py_comments_ignored_type2(): 

116 code_a = """ 

117def foo(x, y): 

118 if x > y: 

119 return x 

120 return y 

121""" 

122 code_b = """ 

123def foo(x, y): 

124 # this is a comment 

125 if x > y: 

126 return x # inline comment 

127 return y 

128""" 

129 _, h2a = _py_hashes(code_a) 

130 _, h2b = _py_hashes(code_b) 

131 assert h2a is not None 

132 assert h2a == h2b 

133 

134 

135# ── Type 2: Parametric clones ──────────────────────────────────────── 

136 

137 

138def test_py_renamed_identifiers_same_type2(): 

139 code_a = """ 

140def foo(x, y): 

141 if x > y: 

142 return x 

143 return y 

144""" 

145 code_b = """ 

146def bar(a, b): 

147 if a > b: 

148 return a 

149 return b 

150""" 

151 _, h2a = _py_hashes(code_a) 

152 _, h2b = _py_hashes(code_b) 

153 assert h2a is not None and h2b is not None 

154 assert h2a == h2b 

155 

156 

157def test_py_different_literals_same_type2(): 

158 code_a = """ 

159def foo(x): 

160 if x > 10: 

161 return 20 

162 return 30 

163""" 

164 code_b = """ 

165def foo(x): 

166 if x > 99: 

167 return 200 

168 return 300 

169""" 

170 _, h2a = _py_hashes(code_a) 

171 _, h2b = _py_hashes(code_b) 

172 assert h2a is not None and h2b is not None 

173 assert h2a == h2b 

174 

175 

176def test_py_structurally_different_functions(): 

177 code_a = """ 

178def foo(x, y): 

179 if x > y: 

180 return x 

181 return y 

182""" 

183 code_b = """ 

184def foo(x, y, z): 

185 while x > y: 

186 x = x - z 

187 return x 

188""" 

189 h1a, h2a = _py_hashes(code_a) 

190 h1b, h2b = _py_hashes(code_b) 

191 assert h1a != h1b 

192 assert h2a != h2b 

193 

194 

195# ── Trivial functions ──────────────────────────────────────────────── 

196 

197 

198def test_py_trivial_function_returns_none(): 

199 code = """ 

200def foo(): 

201 pass 

202""" 

203 h1, h2 = _py_hashes(code) 

204 assert h1 is None 

205 assert h2 is None 

206 

207 

208# ── TypeScript ──────────────────────────────────────────────────────── 

209 

210 

211def _ts_hashes(code: str): 

212 """Return (type1_hash, type2_hash) of the first function in TypeScript code.""" 

213 result = analyze_ts(code) 

214 assert result.functions, f"No functions found in:\n{code}" 

215 f = result.functions[0] 

216 return f.type1_hash, f.type2_hash 

217 

218 

219def test_ts_identical_functions_same_type1(): 

220 code = """ 

221function max(x: number, y: number): number { 

222 if (x > y) { 

223 return x; 

224 } 

225 return y; 

226} 

227""" 

228 h1a, _ = _ts_hashes(code) 

229 h1b, _ = _ts_hashes(code) 

230 assert h1a is not None 

231 assert h1a == h1b 

232 

233 

234def test_ts_renamed_identifiers_same_type2(): 

235 code_a = """ 

236function max(x: number, y: number): number { 

237 if (x > y) { 

238 return x; 

239 } 

240 return y; 

241} 

242""" 

243 code_b = """ 

244function biggest(a: number, b: number): number { 

245 if (a > b) { 

246 return a; 

247 } 

248 return b; 

249} 

250""" 

251 h1a, h2a = _ts_hashes(code_a) 

252 h1b, h2b = _ts_hashes(code_b) 

253 assert h1a != h1b 

254 assert h2a is not None and h2b is not None 

255 assert h2a == h2b 

256 

257 

258def test_ts_different_literals_same_type2(): 

259 code_a = """ 

260function check(x: number): number { 

261 if (x > 10) { 

262 return 20; 

263 } 

264 return 30; 

265} 

266""" 

267 code_b = """ 

268function check(x: number): number { 

269 if (x > 99) { 

270 return 200; 

271 } 

272 return 300; 

273} 

274""" 

275 _, h2a = _ts_hashes(code_a) 

276 _, h2b = _ts_hashes(code_b) 

277 assert h2a is not None and h2b is not None 

278 assert h2a == h2b 

279 

280 

281def test_ts_trivial_function_returns_none(): 

282 code = """ 

283function noop() {} 

284""" 

285 h1, h2 = _ts_hashes(code) 

286 assert h1 is None 

287 assert h2 is None 

288 

289 

290# ── Go ──────────────────────────────────────────────────────────────── 

291 

292 

293def _go_hashes(code: str): 

294 """Return (type1_hash, type2_hash) of the first function in Go code.""" 

295 result = analyze_go(code) 

296 assert result.functions, f"No functions found in:\n{code}" 

297 f = result.functions[0] 

298 return f.type1_hash, f.type2_hash 

299 

300 

301def test_go_identical_functions_same_type1(): 

302 code = """ 

303package main 

304 

305func max(x int, y int) int { 

306 if x > y { 

307 return x 

308 } 

309 return y 

310} 

311""" 

312 h1a, _ = _go_hashes(code) 

313 h1b, _ = _go_hashes(code) 

314 assert h1a is not None 

315 assert h1a == h1b 

316 

317 

318def test_go_renamed_identifiers_same_type2(): 

319 code_a = """ 

320package main 

321 

322func max(x int, y int) int { 

323 if x > y { 

324 return x 

325 } 

326 return y 

327} 

328""" 

329 code_b = """ 

330package main 

331 

332func biggest(a int, b int) int { 

333 if a > b { 

334 return a 

335 } 

336 return b 

337} 

338""" 

339 h1a, h2a = _go_hashes(code_a) 

340 h1b, h2b = _go_hashes(code_b) 

341 assert h1a != h1b 

342 assert h2a is not None and h2b is not None 

343 assert h2a == h2b 

344 

345 

346def test_go_different_literals_same_type2(): 

347 code_a = """ 

348package main 

349 

350func check(x int) int { 

351 if x > 10 { 

352 return 20 

353 } 

354 return 30 

355} 

356""" 

357 code_b = """ 

358package main 

359 

360func check(x int) int { 

361 if x > 99 { 

362 return 200 

363 } 

364 return 300 

365} 

366""" 

367 _, h2a = _go_hashes(code_a) 

368 _, h2b = _go_hashes(code_b) 

369 assert h2a is not None and h2b is not None 

370 assert h2a == h2b 

371 

372 

373def test_go_trivial_function_returns_none(): 

374 code = """ 

375package main 

376 

377func noop() {} 

378""" 

379 h1, h2 = _go_hashes(code) 

380 assert h1 is None 

381 assert h2 is None 

382 

383 

384# ── find_duplicates integration ────────────────────────────────────── 

385 

386 

387def test_find_duplicates_groups_type1(): 

388 code = """ 

389def foo(x, y): 

390 if x > y: 

391 return x 

392 return y 

393 

394def bar(x, y): 

395 if x > y: 

396 return x 

397 return y 

398""" 

399 result = analyze_py(code) 

400 result.path = "test.py" 

401 duplicates = find_duplicates([result]) 

402 type1 = [c for c in duplicates if c.dup_type == 1] 

403 assert len(type1) == 1 

404 assert len(type1[0].members) == 2 

405 

406 

407def test_find_duplicates_groups_type2_only(): 

408 code = """ 

409def foo(x, y): 

410 if x > y: 

411 return x 

412 return y 

413 

414def bar(a, b): 

415 if a > b: 

416 return a 

417 return b 

418""" 

419 result = analyze_py(code) 

420 result.path = "test.py" 

421 duplicates = find_duplicates([result]) 

422 type1 = [c for c in duplicates if c.dup_type == 1] 

423 type2 = [c for c in duplicates if c.dup_type == 2] 

424 assert len(type1) == 0 

425 assert len(type2) == 1 

426 assert len(type2[0].members) == 2 

427 

428 

429def test_find_duplicates_type1_suppresses_type2(): 

430 """When two functions are exact clones (Type 1), they shouldn't also appear as Type 2.""" 

431 code = """ 

432def foo(x, y): 

433 if x > y: 

434 return x 

435 return y 

436 

437def bar(x, y): 

438 if x > y: 

439 return x 

440 return y 

441""" 

442 result = analyze_py(code) 

443 result.path = "test.py" 

444 duplicates = find_duplicates([result]) 

445 type1 = [c for c in duplicates if c.dup_type == 1] 

446 type2 = [c for c in duplicates if c.dup_type == 2] 

447 assert len(type1) == 1 

448 assert len(type2) == 0 

449 

450 

451def test_find_duplicates_no_clusters_for_unique(): 

452 code = """ 

453def foo(x, y): 

454 if x > y: 

455 return x 

456 return y 

457 

458def bar(x, y, z): 

459 while x > y: 

460 x = x - z 

461 return x 

462""" 

463 result = analyze_py(code) 

464 result.path = "test.py" 

465 duplicates = find_duplicates([result]) 

466 assert len(duplicates) == 0 

467 

468 

469def test_find_duplicates_type2_excludes_1_liners(): 

470 """Type 2 duplications should exclude 1-liners from counting as duplicates.""" 

471 code = """ 

472def foo(x): 

473 return x 

474 

475def bar(x): 

476 return x 

477""" 

478 result = analyze_py(code) 

479 result.path = "test.py" 

480 duplicates = find_duplicates([result]) 

481 type2 = [c for c in duplicates if c.dup_type == 2] 

482 assert len(type2) == 0 

483 

484 

485def test_find_duplicates_type2_detects_2_liners(): 

486 """Type 2 duplications should still detect functions with 2+ lines as duplicates.""" 

487 code = """ 

488def foo(x, y): 

489 if x > y: 

490 return x 

491 return y 

492 

493def bar(a, b): 

494 if a > b: 

495 return a 

496 return b 

497""" 

498 result = analyze_py(code) 

499 result.path = "test.py" 

500 duplicates = find_duplicates([result]) 

501 type2 = [c for c in duplicates if c.dup_type == 2] 

502 assert len(type2) == 1 

503 assert len(type2[0].members) == 2 

504 

505 

506def test_find_duplicates_type2_excludes_multi_line_single_statement(): 

507 """Multi-line split of a single statement should still be filtered as 1-liner.""" 

508 code = """ 

509def foo(x, y): 

510 return ( 

511 x + y + x + y + 1 

512 ) 

513 

514def bar(a, b): 

515 return ( 

516 a + b + a + b + 1 

517 ) 

518""" 

519 result = analyze_py(code) 

520 result.path = "test.py" 

521 duplicates = find_duplicates([result]) 

522 type2 = [c for c in duplicates if c.dup_type == 2] 

523 assert len(type2) == 0 

524 

525 

526def test_go_statement_counting(): 

527 """Type 2 detection should work correctly for Go with statement_list.""" 

528 code = """ 

529package main 

530 

531func max(x int, y int) int { 

532 if x > y { 

533 return x 

534 } 

535 return y 

536} 

537 

538func min(a int, b int) int { 

539 if a < b { 

540 return a 

541 } 

542 return b 

543} 

544""" 

545 result = analyze_go(code) 

546 result.path = "test.go" 

547 duplicates = find_duplicates([result]) 

548 type2 = [c for c in duplicates if c.dup_type == 2] 

549 assert len(type2) == 1 

550 assert len(type2[0].members) == 2 

551 

552 

553def test_ts_statement_counting(): 

554 """Type 2 detection should work correctly for TypeScript.""" 

555 code = """ 

556function max(x: number, y: number): number { 

557 if (x > y) { 

558 return x; 

559 } 

560 return y; 

561} 

562 

563function min(a: number, b: number): number { 

564 if (a < b) { 

565 return a; 

566 } 

567 return b; 

568} 

569""" 

570 result = analyze_ts(code) 

571 result.path = "test.ts" 

572 duplicates = find_duplicates([result]) 

573 type2 = [c for c in duplicates if c.dup_type == 2] 

574 assert len(type2) == 1 

575 assert len(type2[0].members) == 2 

576 

577 

578def test_find_duplicates_cross_file(): 

579 code_a = """ 

580def foo(x, y): 

581 if x > y: 

582 return x 

583 return y 

584""" 

585 code_b = """ 

586def bar(a, b): 

587 if a > b: 

588 return a 

589 return b 

590""" 

591 result_a = analyze_py(code_a) 

592 result_a.path = "file_a.py" 

593 result_b = analyze_py(code_b) 

594 result_b.path = "file_b.py" 

595 duplicates = find_duplicates([result_a, result_b]) 

596 type2 = [c for c in duplicates if c.dup_type == 2] 

597 assert len(type2) == 1 

598 files = {fpath for fpath, _ in type2[0].members} 

599 assert files == {"file_a.py", "file_b.py"}