Skip to content

Specimens

Generate snail specimens.

Point dataclass

A 2D point with x and y coordinates.

Source code in src/snailz/specimens.py
28
29
30
31
32
33
@dataclass
class Point:
    """A 2D point with x and y coordinates."""

    x: int | None = None
    y: int | None = None

Individual dataclass

A single specimen with genome, mass, site location and unique identifier.

Source code in src/snailz/specimens.py
36
37
38
39
40
41
42
43
@dataclass
class Individual:
    """A single specimen with genome, mass, site location and unique identifier."""

    genome: str
    ident: str
    mass: float
    site: Point

Specimens dataclass

Keep track of generated specimens.

Source code in src/snailz/specimens.py
46
47
48
49
50
51
52
53
54
55
@dataclass
class Specimens:
    """Keep track of generated specimens."""

    individuals: list[Individual]
    loci: list[int]
    params: dict[str, object]
    reference: str
    susceptible_base: str
    susceptible_locus: int

specimens_check(params)

Check specimen generation parameters.

Parameters:

Name Type Description Default
params dict[str, object]

Dictionary containing specimen generation parameters

required

Raises:

Type Description
ValueError

If parameters are missing, have wrong types, or have invalid values

Source code in src/snailz/specimens.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def specimens_check(params: dict[str, object]) -> None:
    """Check specimen generation parameters.

    Parameters:
        params: Dictionary containing specimen generation parameters

    Raises:
        ValueError: If parameters are missing, have wrong types, or have invalid values
    """
    utils.check_keys_and_types(SPECIMENS_PARAMS, params)

    for name in ["length", "min_mass", "mutations", "number"]:
        utils.require(0 < params[name], f"{name} must be positive")
    utils.require(
        0 <= params["mutations"] <= params["length"],
        "mutations must be between 0 and length",
    )
    utils.require(
        params["min_mass"] < params["max_mass"],
        "max_mass must be greater than min_mass",
    )

specimens_generate(params)

Generate specimens with random genomes and masses.

Each genome is a string of bases of the same length. One locus is randomly chosen as "significant", and a specific mutation there predisposes the snail to mass changes. Other mutations are added randomly at other loci.

Parameters:

Name Type Description Default
params dict[str, object]

Dictionary containing specimen generation parameters

required

Returns:

Type Description
Specimens

Specimens object containing the generated specimens and parameters

Source code in src/snailz/specimens.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def specimens_generate(params: dict[str, object]) -> Specimens:
    """Generate specimens with random genomes and masses.

    Each genome is a string of bases of the same length. One locus
    is randomly chosen as "significant", and a specific mutation there
    predisposes the snail to mass changes. Other mutations are added
    randomly at other loci.

    Parameters:
        params: Dictionary containing specimen generation parameters

    Returns:
        Specimens object containing the generated specimens and parameters
    """
    specimens_check(params)
    loci = _make_loci(params)
    reference = _make_reference_genome(params)
    susc_loc = _choose_one(loci)
    susc_base = reference[susc_loc]
    genomes = [_make_genome(reference, loci) for i in range(params["number"])]
    masses = _make_masses(params, genomes, susc_loc, susc_base)

    # Generate unique identifiers
    identifiers = _make_idents(params["number"])

    individuals = [
        Individual(genome=g, mass=m, site=Point(), ident=i)
        for g, m, i in zip(genomes, masses, identifiers)
    ]

    return Specimens(
        individuals=individuals,
        loci=loci,
        params=params,
        reference=reference,
        susceptible_base=susc_base,
        susceptible_locus=susc_loc,
    )

specimens_to_csv(specimens, filename)

Write specimens data as CSV.

Parameters:

Name Type Description Default
specimens Specimens

A Specimens object containing specimen data

required
filename str | None

Path to output file, or None to write to standard output

required
Side effects

Either writes to the specified output file or prints to stdout

Source code in src/snailz/specimens.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
def specimens_to_csv(specimens: Specimens, filename: str | None) -> None:
    """Write specimens data as CSV.

    Parameters:
        specimens: A Specimens object containing specimen data
        filename: Path to output file, or None to write to standard output

    Side effects:
        Either writes to the specified output file or prints to stdout
    """
    stream = sys.stdout if filename is None else open(filename, "w", newline="")
    writer = csv.writer(stream)
    writer.writerow(["ident", "x", "y", "genome", "mass"])
    for indiv in specimens.individuals:
        writer.writerow(
            [indiv.ident, indiv.site.x, indiv.site.y, indiv.genome, indiv.mass]
        )
    if stream is not sys.stdout:
        stream.close()

mutate_masses(grid, specimens, mut_scale, specific_index=None)

Mutate mass based on grid values and genetic susceptibility.

For each specimen, choose a random cell from the grid and modify the mass if the cell's value is non-zero and the genome is susceptible. Records the chosen site coordinates for each specimen regardless of whether mutation occurs.

Parameters:

Name Type Description Default
grid Grid

A Grid object containing pollution values

required
specimens Specimens

A Specimens object with individuals to potentially mutate

required
mut_scale float

Scaling factor for mutation effect

required
specific_index int | None

Optional index to mutate only a specific specimen

None
Side effects

Modifies specimen masses in-place for susceptible individuals Updates site coordinates for all individuals

Source code in src/snailz/specimens.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
def mutate_masses(
    grid: Grid,
    specimens: Specimens,
    mut_scale: float,
    specific_index: int | None = None,
) -> None:
    """Mutate mass based on grid values and genetic susceptibility.

    For each specimen, choose a random cell from the grid and modify
    the mass if the cell's value is non-zero and the genome is
    susceptible. Records the chosen site coordinates for each specimen
    regardless of whether mutation occurs.

    Parameters:
        grid: A Grid object containing pollution values
        specimens: A Specimens object with individuals to potentially mutate
        mut_scale: Scaling factor for mutation effect
        specific_index: Optional index to mutate only a specific specimen

    Side effects:
        Modifies specimen masses in-place for susceptible individuals
        Updates site coordinates for all individuals
    """
    grid_size = len(grid.grid)
    susc_locus = specimens.susceptible_locus
    susc_base = specimens.susceptible_base

    if specific_index is not None:
        indices = [specific_index]
    else:
        indices = range(len(specimens.individuals))

    for i in indices:
        individual = specimens.individuals[i]
        x = random.randrange(grid_size)
        y = random.randrange(grid_size)

        individual.site.x = x
        individual.site.y = y

        if grid.grid[x][y] > 0 and individual.genome[susc_locus] == susc_base:
            individual.mass = mutate_mass(individual.mass, mut_scale, grid.grid[x][y])

mutate_mass(original, mut_scale, cell_value)

Mutate a single mass.

Parameters:

Name Type Description Default
original float

The original mass value

required
mut_scale float

Scaling factor for mutation effect

required
cell_value int

The grid cell value affecting the mutation

required

Returns:

Type Description
float

The mutated mass value, rounded to PRECISION decimal places

Source code in src/snailz/specimens.py
186
187
188
189
190
191
192
193
194
195
196
197
def mutate_mass(original: float, mut_scale: float, cell_value: int) -> float:
    """Mutate a single mass.

    Parameters:
        original: The original mass value
        mut_scale: Scaling factor for mutation effect
        cell_value: The grid cell value affecting the mutation

    Returns:
        The mutated mass value, rounded to PRECISION decimal places
    """
    return round(original * (1 + (mut_scale * cell_value)), utils.PRECISION)

_choose_one(values)

Choose a single random item from a collection.

Parameters:

Name Type Description Default
values list[int]

A sequence to choose from

required

Returns:

Type Description
int

A randomly selected item from the values sequence

Source code in src/snailz/specimens.py
200
201
202
203
204
205
206
207
208
209
def _choose_one(values: list[int]) -> int:
    """Choose a single random item from a collection.

    Parameters:
        values: A sequence to choose from

    Returns:
        A randomly selected item from the values sequence
    """
    return random.choices(values, k=1)[0]

_choose_other(values, exclude)

Choose a value at random except for the excluded values.

Parameters:

Name Type Description Default
values str

A collection to choose from

required
exclude str

Value or collection of values to exclude from the choice

required

Returns:

Type Description
str

A randomly selected item from values that isn't in exclude

Source code in src/snailz/specimens.py
212
213
214
215
216
217
218
219
220
221
222
223
def _choose_other(values: str, exclude: str) -> str:
    """Choose a value at random except for the excluded values.

    Parameters:
        values: A collection to choose from
        exclude: Value or collection of values to exclude from the choice

    Returns:
        A randomly selected item from values that isn't in exclude
    """
    candidates = list(sorted(set(values) - set(exclude)))
    return candidates[random.randrange(len(candidates))]

_make_genome(reference, loci)

Make an individual genome by mutating the reference genome.

Parameters:

Name Type Description Default
reference str

Reference genome string to base the new genome on

required
loci list[int]

List of positions that can be mutated

required

Returns:

Type Description
str

A new genome string with random mutations at some loci

Source code in src/snailz/specimens.py
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
def _make_genome(reference: str, loci: list[int]) -> str:
    """Make an individual genome by mutating the reference genome.

    Parameters:
        reference: Reference genome string to base the new genome on
        loci: List of positions that can be mutated

    Returns:
        A new genome string with random mutations at some loci
    """
    result = list(reference)
    num_mutations = random.randint(1, len(loci))
    for loc in random.sample(range(len(loci)), num_mutations):
        result[loc] = _choose_other(BASES, reference[loc])
    return "".join(result)

_make_idents(count)

Create unique specimen identifiers.

Each identifier is a 6-character string where: - First two chars are the same uppercase letters for all specimens - Remaining four chars are random uppercase letters and digits, unique for each specimen

Parameters:

Name Type Description Default
count int

Number of identifiers to generate

required

Returns:

Type Description
list[str]

List of unique specimen identifiers

Source code in src/snailz/specimens.py
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
def _make_idents(count: int) -> list[str]:
    """Create unique specimen identifiers.

    Each identifier is a 6-character string where:
    - First two chars are the same uppercase letters for all specimens
    - Remaining four chars are random uppercase letters and digits, unique for each specimen

    Parameters:
        count: Number of identifiers to generate

    Returns:
        List of unique specimen identifiers
    """
    prefix = "".join(random.choices(string.ascii_uppercase, k=2))
    chars = string.ascii_uppercase + string.digits
    gen = utils.UniqueIdGenerator(
        "specimens", lambda: f"{prefix}{''.join(random.choices(chars, k=4))}"
    )
    return [gen.next() for _ in range(count)]

_make_loci(params)

Make a list of mutable loci positions.

Parameters:

Name Type Description Default
params dict[str, object]

Dictionary with 'length' (genome length) and 'mutations' (number of mutable positions)

required

Returns:

Type Description
list[int]

A list of randomly selected positions that can be mutated

Source code in src/snailz/specimens.py
264
265
266
267
268
269
270
271
272
273
def _make_loci(params: dict[str, object]) -> list[int]:
    """Make a list of mutable loci positions.

    Parameters:
        params: Dictionary with 'length' (genome length) and 'mutations' (number of mutable positions)

    Returns:
        A list of randomly selected positions that can be mutated
    """
    return random.sample(list(range(params["length"])), params["mutations"])

_make_masses(params, genomes, susceptible_locus, susceptible_base)

Generate random masses for specimens.

Parameters:

Name Type Description Default
params dict[str, object]

Dictionary with 'min_mass' and 'max_mass' parameters

required
genomes list[str]

List of genome strings

required
susceptible_locus int

Position that determines susceptibility

required
susceptible_base str

Base that makes a specimen susceptible

required

Returns:

Type Description
list[float]

List of randomly generated mass values between min_mass and max_mass,

list[float]

rounded to PRECISION decimal places

Source code in src/snailz/specimens.py
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
def _make_masses(
    params: dict[str, object],
    genomes: list[str],
    susceptible_locus: int,
    susceptible_base: str,
) -> list[float]:
    """Generate random masses for specimens.

    Parameters:
        params: Dictionary with 'min_mass' and 'max_mass' parameters
        genomes: List of genome strings
        susceptible_locus: Position that determines susceptibility
        susceptible_base: Base that makes a specimen susceptible

    Returns:
        List of randomly generated mass values between min_mass and max_mass,
        rounded to PRECISION decimal places
    """
    min_mass = params["min_mass"]
    max_mass = params["max_mass"]
    return [round(random.uniform(min_mass, max_mass), utils.PRECISION) for _ in genomes]

_make_reference_genome(params)

Make a random reference genome.

Parameters:

Name Type Description Default
params dict[str, object]

Dictionary with 'length' parameter for the genome length

required

Returns:

Type Description
str

A randomly generated genome string of the specified length

Source code in src/snailz/specimens.py
299
300
301
302
303
304
305
306
307
308
def _make_reference_genome(params: dict[str, object]) -> str:
    """Make a random reference genome.

    Parameters:
        params: Dictionary with 'length' parameter for the genome length

    Returns:
        A randomly generated genome string of the specified length
    """
    return "".join(random.choices(BASES, k=params["length"]))