Skip to content

Mangle

Utilities for modifying assay CSV files.

This module provides functions for modifying assay CSV files, including reassigning people who performed assays.

mangle_assays(assays_dir, people_file)

Create 'raw' assay files by mangling data of pristine files.

Parameters:

Name Type Description Default
assays_dir str

Directory containing assay CSV files

required
people_file str

Path to the people JSON file

required

Raises:

Type Description
ValueError

If people data cannot be loaded or no people are found

Source code in src/snailz/mangle.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def mangle_assays(assays_dir: str, people_file: str) -> None:
    """Create 'raw' assay files by mangling data of pristine files.

    Parameters:
        assays_dir: Directory containing assay CSV files
        people_file: Path to the people JSON file

    Raises:
        ValueError: If people data cannot be loaded or no people are found
    """
    people = _load_people(people_file)

    for filename in Path(assays_dir).glob("*_assay.csv"):
        with open(filename, "r") as stream:
            original = [row for row in csv.reader(stream)]

        mangled = _mangle_assay(filename, people, original)

        output_file = str(filename).replace("_assay.csv", "_raw.csv")
        with open(output_file, "w") as stream:
            csv.writer(stream).writerows(mangled)

_load_people(people_file)

Read people and rearrange to {ident: data} dictionary.

Source code in src/snailz/mangle.py
36
37
38
39
40
41
42
43
def _load_people(people_file: str) -> dict[str, dict]:
    """Read people and rearrange to {ident: data} dictionary."""
    try:
        with open(people_file, "r") as reader:
            people_data = json.load(reader)
            return {p["ident"]: p for p in people_data["individuals"]}
    except Exception as e:
        raise ValueError(f"Error loading people data: {str(e)}")

_mangle_assay(filename, people, data)

Mangle a single assay file.

Source code in src/snailz/mangle.py
46
47
48
49
50
51
52
53
54
def _mangle_assay(
    filename: str, people: dict[str, dict], data: list[list]
) -> list[list]:
    """Mangle a single assay file."""
    manglers = [_mangle_id, _mangle_indent, _mangle_person]
    num_mangles = random.randint(0, len(manglers))
    for func in random.sample(manglers, num_mangles):
        data = func(filename, data, people)
    return data

_mangle_id(filename, data, people)

Convert ID field to string.

Source code in src/snailz/mangle.py
57
58
59
60
61
62
63
def _mangle_id(filename: str, data: list[list], people: dict[str, dict]) -> list[list]:
    """Convert ID field to string."""
    for row in data:
        if any(x == "id" for x in row):
            i = row.index("id")
            row[i + 1] = f"'{row[i + 1]}'"
    return data

_mangle_indent(filename, data, people)

Indent data portion.

Source code in src/snailz/mangle.py
66
67
68
69
70
def _mangle_indent(
    filename: str, data: list[list], people: dict[str, dict]
) -> list[list]:
    """Indent data portion."""
    return [([""] + row) if row[0].isdigit() else (row + [""]) for row in data]

_mangle_person(filename, data, people)

Replace person identifier with name.

Source code in src/snailz/mangle.py
73
74
75
76
77
78
79
80
81
def _mangle_person(
    filename: str, data: list[list], people: dict[str, dict]
) -> list[list]:
    """Replace person identifier with name."""
    for row in data:
        if row[0] == "performed_by":
            person = people[row[1]]
            row[1] = f"{person['personal']} {person['family']}"
    return data