version 1.0;

extension tract_registry tract_core;
extension tract_registry tract_pulse;

fragment scan_body_0(
    i"fastlstm1.c": tensor<scalar>,
    i"fastlstm1.r": tensor<scalar>,
    i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256": tensor<scalar>,
    i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768": tensor<scalar>,
    i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512": tensor<scalar>,
    i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024": tensor<scalar>,
    i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.0..256.slice": tensor<scalar>,
    i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.256..512.slice": tensor<scalar>,
    i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.512..768.slice": tensor<scalar>,
    i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.768..1024.slice": tensor<scalar>,
    i"fastlstm1.four_parts.split-1-over-1.0..256.slice": tensor<scalar>,
    i"fastlstm1.four_parts.split-1-over-1.256..512.slice": tensor<scalar>,
    i"fastlstm1.four_parts.split-1-over-1.512..768.slice": tensor<scalar>,
    i"fastlstm1.four_parts.split-1-over-1.768..1024.slice": tensor<scalar>,
    i"fastlstm1.h_new.W.split-1-over-1.0..128.slice": tensor<scalar>,
    i"fastlstm1.h_new.split-1-over-1.0..128.slice": tensor<scalar>,
    i"fastlstm1.peephole0.mul.fix-rank-0-1": tensor<scalar>,
    i"fastlstm1.peephole1.mul.fix-rank-0-1": tensor<scalar>,
    i"fastlstm1.peephole2.mul.fix-rank-0-1": tensor<scalar>
) -> (i"fastlstm1.c_new": tensor<scalar>, i"fastlstm1.r_new": tensor<scalar>, i"fastlstm1.h_new.W.split-over-1.128..256.prop_axis.a.input_0": tensor<scalar>)
{
  i"fastlstm1.peephole0.mul" = mul(i"fastlstm1.peephole0.mul.fix-rank-0-1", i"fastlstm1.c");
  i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-over-1.0..256" = tract_core_einsum([i"fastlstm1.r", i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.0..256.slice"], expr = "mka,kn->bn", acc = "f32", output = "");
  i"fastlstm1.four_parts.W.concat-einsum-k.add-1.split-over-1.0..256" = add(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256", i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-over-1.0..256");
  i"fastlstm1.four_parts.split-over-1.0..256" = add(i"fastlstm1.four_parts.W.concat-einsum-k.add-1.split-over-1.0..256", i"fastlstm1.four_parts.split-1-over-1.0..256.slice");
  i"fastlstm1.peephole0.output" = add(i"fastlstm1.peephole0.mul", i"fastlstm1.four_parts.split-over-1.0..256");
  i"fastlstm1.peephole0.output.nolin" = sigmoid(i"fastlstm1.peephole0.output");
  i"fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.prop_axis.a.output" = squeeze(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768", axes = [0]);
  i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-over-1.512..768" = tract_core_einsum([i"fastlstm1.r", i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.512..768.slice"], expr = "mka,kn->bn", acc = "f32", output = "");
  i"fastlstm1.four_parts.W.concat-einsum-k.add-1.split-over-1.512..768" = add(i"fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.prop_axis.a.output", i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-over-1.512..768");
  i"fastlstm1.four_parts.split-over-1.512..768" = add(i"fastlstm1.four_parts.W.concat-einsum-k.add-1.split-over-1.512..768", i"fastlstm1.four_parts.split-1-over-1.512..768.slice");
  i"fastlstm1.four_parts.j.nolin" = tanh(i"fastlstm1.four_parts.split-over-1.512..768");
  i"fastlstm1.c_update" = mul(i"fastlstm1.peephole0.output.nolin", i"fastlstm1.four_parts.j.nolin");
  i"fastlstm1.peephole1.mul" = mul(i"fastlstm1.peephole1.mul.fix-rank-0-1", i"fastlstm1.c");
  i"fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.prop_axis.a.output" = squeeze(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512", axes = [0]);
  i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-over-1.256..512" = tract_core_einsum([i"fastlstm1.r", i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.256..512.slice"], expr = "mka,kn->bn", acc = "f32", output = "");
  i"fastlstm1.four_parts.W.concat-einsum-k.add-1.split-over-1.256..512" = add(i"fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.prop_axis.a.output", i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-over-1.256..512");
  i"fastlstm1.four_parts.split-over-1.256..512" = add(i"fastlstm1.four_parts.W.concat-einsum-k.add-1.split-over-1.256..512", i"fastlstm1.four_parts.split-1-over-1.256..512.slice");
  i"fastlstm1.peephole1.output" = add(i"fastlstm1.peephole1.mul", i"fastlstm1.four_parts.split-over-1.256..512");
  i"fastlstm1.peephole1.output.nolin" = sigmoid(i"fastlstm1.peephole1.output");
  i"fastlstm1.c_prop" = mul(i"fastlstm1.peephole1.output.nolin", i"fastlstm1.c");
  i"fastlstm1.c_new" = add(i"fastlstm1.c_update", i"fastlstm1.c_prop");
  i"fastlstm1.tanh_c" = tanh(i"fastlstm1.c_new");
  i"fastlstm1.peephole2.mul" = mul(i"fastlstm1.peephole2.mul.fix-rank-0-1", i"fastlstm1.c_new");
  i"fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.prop_axis.a.output" = squeeze(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024", axes = [0]);
  i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-over-1.768..1024" = tract_core_einsum([i"fastlstm1.r", i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.768..1024.slice"], expr = "mka,kn->bn", acc = "f32", output = "");
  i"fastlstm1.four_parts.W.concat-einsum-k.add-1.split-over-1.768..1024" = add(i"fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.prop_axis.a.output", i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-over-1.768..1024");
  i"fastlstm1.four_parts.split-over-1.768..1024" = add(i"fastlstm1.four_parts.W.concat-einsum-k.add-1.split-over-1.768..1024", i"fastlstm1.four_parts.split-1-over-1.768..1024.slice");
  i"fastlstm1.peephole2.output" = add(i"fastlstm1.peephole2.mul", i"fastlstm1.four_parts.split-over-1.768..1024");
  i"fastlstm1.peephole2.output.nolin" = sigmoid(i"fastlstm1.peephole2.output");
  i"fastlstm1.m" = mul(i"fastlstm1.tanh_c", i"fastlstm1.peephole2.output.nolin");
  i"fastlstm1.h_new.W.split-over-1.0..128" = tract_core_einsum([i"fastlstm1.m", i"fastlstm1.h_new.W.split-1-over-1.0..128.slice"], expr = "bk,kn->mna", acc = "f32", output = "");
  i"fastlstm1.h_new.split-over-1.0..128" = add(i"fastlstm1.h_new.W.split-over-1.0..128", i"fastlstm1.h_new.split-1-over-1.0..128.slice");
  i"fastlstm1.h_new.W.split-over-1.128..256.prop_axis.a.input_0" = unsqueeze(i"fastlstm1.m", axes = [0]);
  i"fastlstm1.r_new" = i"fastlstm1.h_new.split-over-1.0..128";
}

fragment scan_body_1(
    i"fastlstm2.c": tensor<scalar>,
    i"fastlstm2.r": tensor<scalar>,
    i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256": tensor<scalar>,
    i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768": tensor<scalar>,
    i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512": tensor<scalar>,
    i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024": tensor<scalar>,
    i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.0..256.slice": tensor<scalar>,
    i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.256..512.slice": tensor<scalar>,
    i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.512..768.slice": tensor<scalar>,
    i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.768..1024.slice": tensor<scalar>,
    i"fastlstm2.four_parts.split-1-over-1.0..256.slice": tensor<scalar>,
    i"fastlstm2.four_parts.split-1-over-1.256..512.slice": tensor<scalar>,
    i"fastlstm2.four_parts.split-1-over-1.512..768.slice": tensor<scalar>,
    i"fastlstm2.four_parts.split-1-over-1.768..1024.slice": tensor<scalar>,
    i"fastlstm2.h_new.W.split-1-over-1.0..128.slice": tensor<scalar>,
    i"fastlstm2.h_new.split-1-over-1.0..128.slice": tensor<scalar>,
    i"fastlstm2.peephole0.mul.fix-rank-0-1": tensor<scalar>,
    i"fastlstm2.peephole1.mul.fix-rank-0-1": tensor<scalar>,
    i"fastlstm2.peephole2.mul.fix-rank-0-1": tensor<scalar>
) -> (i"fastlstm2.c_new": tensor<scalar>, i"fastlstm2.r_new": tensor<scalar>, i"fastlstm2.h_new.W.split-over-1.128..256.prop_axis.a.input_0": tensor<scalar>)
{
  i"fastlstm2.peephole0.mul" = mul(i"fastlstm2.peephole0.mul.fix-rank-0-1", i"fastlstm2.c");
  i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-over-1.0..256" = tract_core_einsum([i"fastlstm2.r", i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.0..256.slice"], expr = "mka,kn->bn", acc = "f32", output = "");
  i"fastlstm2.four_parts.W.concat-einsum-k.add-1.split-over-1.0..256" = add(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256", i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-over-1.0..256");
  i"fastlstm2.four_parts.split-over-1.0..256" = add(i"fastlstm2.four_parts.W.concat-einsum-k.add-1.split-over-1.0..256", i"fastlstm2.four_parts.split-1-over-1.0..256.slice");
  i"fastlstm2.peephole0.output" = add(i"fastlstm2.peephole0.mul", i"fastlstm2.four_parts.split-over-1.0..256");
  i"fastlstm2.peephole0.output.nolin" = sigmoid(i"fastlstm2.peephole0.output");
  i"fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.prop_axis.a.output" = squeeze(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768", axes = [0]);
  i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-over-1.512..768" = tract_core_einsum([i"fastlstm2.r", i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.512..768.slice"], expr = "mka,kn->bn", acc = "f32", output = "");
  i"fastlstm2.four_parts.W.concat-einsum-k.add-1.split-over-1.512..768" = add(i"fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.prop_axis.a.output", i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-over-1.512..768");
  i"fastlstm2.four_parts.split-over-1.512..768" = add(i"fastlstm2.four_parts.W.concat-einsum-k.add-1.split-over-1.512..768", i"fastlstm2.four_parts.split-1-over-1.512..768.slice");
  i"fastlstm2.four_parts.j.nolin" = tanh(i"fastlstm2.four_parts.split-over-1.512..768");
  i"fastlstm2.c_update" = mul(i"fastlstm2.peephole0.output.nolin", i"fastlstm2.four_parts.j.nolin");
  i"fastlstm2.peephole1.mul" = mul(i"fastlstm2.peephole1.mul.fix-rank-0-1", i"fastlstm2.c");
  i"fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.prop_axis.a.output" = squeeze(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512", axes = [0]);
  i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-over-1.256..512" = tract_core_einsum([i"fastlstm2.r", i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.256..512.slice"], expr = "mka,kn->bn", acc = "f32", output = "");
  i"fastlstm2.four_parts.W.concat-einsum-k.add-1.split-over-1.256..512" = add(i"fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.prop_axis.a.output", i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-over-1.256..512");
  i"fastlstm2.four_parts.split-over-1.256..512" = add(i"fastlstm2.four_parts.W.concat-einsum-k.add-1.split-over-1.256..512", i"fastlstm2.four_parts.split-1-over-1.256..512.slice");
  i"fastlstm2.peephole1.output" = add(i"fastlstm2.peephole1.mul", i"fastlstm2.four_parts.split-over-1.256..512");
  i"fastlstm2.peephole1.output.nolin" = sigmoid(i"fastlstm2.peephole1.output");
  i"fastlstm2.c_prop" = mul(i"fastlstm2.peephole1.output.nolin", i"fastlstm2.c");
  i"fastlstm2.c_new" = add(i"fastlstm2.c_update", i"fastlstm2.c_prop");
  i"fastlstm2.tanh_c" = tanh(i"fastlstm2.c_new");
  i"fastlstm2.peephole2.mul" = mul(i"fastlstm2.peephole2.mul.fix-rank-0-1", i"fastlstm2.c_new");
  i"fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.prop_axis.a.output" = squeeze(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024", axes = [0]);
  i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-over-1.768..1024" = tract_core_einsum([i"fastlstm2.r", i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.768..1024.slice"], expr = "mka,kn->bn", acc = "f32", output = "");
  i"fastlstm2.four_parts.W.concat-einsum-k.add-1.split-over-1.768..1024" = add(i"fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.prop_axis.a.output", i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-over-1.768..1024");
  i"fastlstm2.four_parts.split-over-1.768..1024" = add(i"fastlstm2.four_parts.W.concat-einsum-k.add-1.split-over-1.768..1024", i"fastlstm2.four_parts.split-1-over-1.768..1024.slice");
  i"fastlstm2.peephole2.output" = add(i"fastlstm2.peephole2.mul", i"fastlstm2.four_parts.split-over-1.768..1024");
  i"fastlstm2.peephole2.output.nolin" = sigmoid(i"fastlstm2.peephole2.output");
  i"fastlstm2.m" = mul(i"fastlstm2.tanh_c", i"fastlstm2.peephole2.output.nolin");
  i"fastlstm2.h_new.W.split-over-1.0..128" = tract_core_einsum([i"fastlstm2.m", i"fastlstm2.h_new.W.split-1-over-1.0..128.slice"], expr = "bk,kn->mna", acc = "f32", output = "");
  i"fastlstm2.h_new.split-over-1.0..128" = add(i"fastlstm2.h_new.W.split-over-1.0..128", i"fastlstm2.h_new.split-1-over-1.0..128.slice");
  i"fastlstm2.h_new.W.split-over-1.128..256.prop_axis.a.input_0" = unsqueeze(i"fastlstm2.m", axes = [0]);
  i"fastlstm2.r_new" = i"fastlstm2.h_new.split-over-1.0..128";
}

fragment tract_core_properties(
) -> (properties: (string, tensor<scalar>)[])
{
  properties = [("pulse.delay", tract_core_cast([6], to = "i64")), ("pulse.input_axes", tract_core_cast([0], to = "i64")), ("pulse.output_axes", tract_core_cast([0], to = "i64")), ("tract_nnef_ser_version", "0.19.3-pre"), ("tract_nnef_format_version", "beta1")];
}

graph network(input) -> (output) {
  input = external(shape = [24, 40]);
  i"lda.output.delay" = tract_pulse_delay(input, axis = 0, delay = 0, overlap = 4);
  i"lda.output.add_n" = unsqueeze(i"lda.output.delay", axes = [0]);
  i"lda.kernel.0" = variable<scalar>(label = "lda.kernel.0", shape = [200, 40, 5]);
  i"lda.bias.0" = variable<scalar>(label = "lda.bias.0", shape = [200]);
  i"lda.output_input" = transpose(i"lda.output.add_n", axes = [0, 2, 1]);
  i"lda.output_conv" = conv(i"lda.output_input", i"lda.kernel.0", i"lda.bias.0", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"lda.output" = transpose(i"lda.output_conv", axes = [0, 2, 1]);
  i"lda.output.rm_n" = squeeze(i"lda.output", axes = [0]);
  i"tdnn1.affine.output.einsum.fix_a.0" = unsqueeze(i"lda.output.rm_n", axes = [0]);
  i"tdnn1.affine.output.einsum.fix_b.0" = variable<scalar>(label = "tdnn1.affine.output.einsum.fix_b.0", shape = [1, 256, 200]);
  i"tdnn1.affine.output.einsum" = matmul(i"tdnn1.affine.output.einsum.fix_b.0", i"tdnn1.affine.output.einsum.fix_a.0", transposeA = false, transposeB = true);
  i"tdnn1.affine.output.bias.reshape" = variable<scalar>(label = "tdnn1.affine.output.bias.reshape", shape = [1, 256, 1]);
  i"tdnn1.affine.output" = add(i"tdnn1.affine.output.einsum", i"tdnn1.affine.output.bias.reshape");
  i"tdnn1.relu.output.low.cst" = [[[0.0]]];
  i"tdnn1.relu.output.low" = max(i"tdnn1.affine.output", i"tdnn1.relu.output.low.cst");
  i"tdnn1.renorm.reduced.sum.sqr" = square(i"tdnn1.relu.output.low");
  i"tdnn1.renorm.reduced.sum.sum" = sum_reduce(i"tdnn1.renorm.reduced.sum.sqr", axes = [1]);
  i"tdnn1.renorm.reduced.sum.card.fix-rank-1-2" = [[[0.00390625]]];
  i"tdnn1.renorm.reduced.sum.card" = mul(i"tdnn1.renorm.reduced.sum.sum", i"tdnn1.renorm.reduced.sum.card.fix-rank-1-2");
  i"tdnn1.renorm.output-recip" = rsqrt(i"tdnn1.renorm.reduced.sum.card");
  i"tdnn1.renorm.output" = mul(i"tdnn1.relu.output.low", i"tdnn1.renorm.output-recip");
  i"tdnn2.affine.output.delay" = tract_pulse_delay(i"tdnn1.renorm.output", axis = 2, delay = 0, overlap = 2);
  i"tdnn2.affine.kernel.0" = variable<scalar>(label = "tdnn2.affine.kernel.0", shape = [256, 256, 3]);
  i"tdnn2.affine.bias.0" = variable<scalar>(label = "tdnn2.affine.bias.0", shape = [256]);
  i"tdnn2.affine.output_conv" = conv(i"tdnn2.affine.output.delay", i"tdnn2.affine.kernel.0", i"tdnn2.affine.bias.0", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"tdnn2.affine.output" = i"tdnn2.affine.output_conv";
  i"tdnn2.relu.output.low" = max(i"tdnn2.affine.output", i"tdnn1.relu.output.low.cst");
  i"tdnn2.renorm.reduced.sum.sqr" = square(i"tdnn2.relu.output.low");
  i"tdnn2.renorm.reduced.sum.sum" = sum_reduce(i"tdnn2.renorm.reduced.sum.sqr", axes = [1]);
  i"tdnn2.renorm.reduced.sum.card" = mul(i"tdnn2.renorm.reduced.sum.sum", i"tdnn1.renorm.reduced.sum.card.fix-rank-1-2");
  i"tdnn2.renorm.output-recip" = rsqrt(i"tdnn2.renorm.reduced.sum.card");
  i"tdnn2.renorm.output" = mul(i"tdnn2.relu.output.low", i"tdnn2.renorm.output-recip");
  i"tdnn3.affine.kernel.0" = variable<scalar>(label = "tdnn3.affine.kernel.0", shape = [256, 256, 3]);
  i"tdnn3.affine.bias.0" = variable<scalar>(label = "tdnn3.affine.bias.0", shape = [256]);
  i"tdnn3.affine.output_conv" = conv(i"tdnn2.renorm.output", i"tdnn3.affine.kernel.0", i"tdnn3.affine.bias.0", dilation = [1], stride = [3], border = "constant", groups = 1, padding = [(0, 0)]);
  i"tdnn3.affine.output" = i"tdnn3.affine.output_conv";
  i"tdnn3.relu.output.low" = max(i"tdnn3.affine.output", i"tdnn1.relu.output.low.cst");
  i"tdnn3.renorm.reduced.sum.sqr" = square(i"tdnn3.relu.output.low");
  i"tdnn3.renorm.reduced.sum.sum" = sum_reduce(i"tdnn3.renorm.reduced.sum.sqr", axes = [1]);
  i"tdnn3.renorm.reduced.sum.card" = mul(i"tdnn3.renorm.reduced.sum.sum", i"tdnn1.renorm.reduced.sum.card.fix-rank-1-2");
  i"tdnn3.renorm.output-recip" = rsqrt(i"tdnn3.renorm.reduced.sum.card");
  i"tdnn3.renorm.output" = mul(i"tdnn3.relu.output.low", i"tdnn3.renorm.output-recip");
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256.fix_b.0" = variable<scalar>(label = "fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256.fix_b.0", shape = [1, 256, 256]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256" = matmul(i"tdnn3.renorm.output", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256.fix_b.0", transposeA = true, transposeB = false);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256.fix_c.0" = squeeze(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256", axes = [0]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_a.0" = unsqueeze(i"tdnn3.renorm.output", axes = [0]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_b.1" = variable<scalar>(label = "fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_b.1", shape = [1, 1, 256, 256]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768" = matmul(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_a.0", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_b.1", transposeA = true, transposeB = false);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_c.0" = squeeze(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768", axes = [1]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_c.1" = transpose(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_c.0", axes = [1, 0, 2]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_a.0" = unsqueeze(i"tdnn3.renorm.output", axes = [0]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_b.1" = variable<scalar>(label = "fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_b.1", shape = [1, 1, 256, 256]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512" = matmul(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_a.0", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_b.1", transposeA = true, transposeB = false);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_c.0" = squeeze(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512", axes = [1]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_c.1" = transpose(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_c.0", axes = [1, 0, 2]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_a.0" = unsqueeze(i"tdnn3.renorm.output", axes = [0]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_b.1" = variable<scalar>(label = "fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_b.1", shape = [1, 1, 256, 256]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024" = matmul(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_a.0", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_b.1", transposeA = true, transposeB = false);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_c.0" = squeeze(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024", axes = [1]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_c.1" = transpose(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_c.0", axes = [1, 0, 2]);
  i"tap.tap.fastlstm1.c_init.0-35/0-82/0" = variable<scalar>(label = "tap.tap.fastlstm1.c_init.0-35/0-82/0", shape = [1, 256]);
  i"tap.fastlstm1.r_init.0-36/0" = variable<scalar>(label = "tap.fastlstm1.r_init.0-36/0", shape = [1, 128, 1]);
  i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.0..256.slice" = variable<scalar>(label = "fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.0..256.slice", shape = [128, 256]);
  i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.256..512.slice" = variable<scalar>(label = "fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.256..512.slice", shape = [128, 256]);
  i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.512..768.slice" = variable<scalar>(label = "fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.512..768.slice", shape = [128, 256]);
  i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.768..1024.slice" = variable<scalar>(label = "fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.768..1024.slice", shape = [128, 256]);
  i"fastlstm1.four_parts.split-1-over-1.0..256.slice" = variable<scalar>(label = "fastlstm1.four_parts.split-1-over-1.0..256.slice", shape = [1, 256]);
  i"fastlstm1.four_parts.split-1-over-1.256..512.slice" = variable<scalar>(label = "fastlstm1.four_parts.split-1-over-1.256..512.slice", shape = [1, 256]);
  i"fastlstm1.four_parts.split-1-over-1.512..768.slice" = variable<scalar>(label = "fastlstm1.four_parts.split-1-over-1.512..768.slice", shape = [1, 256]);
  i"fastlstm1.four_parts.split-1-over-1.768..1024.slice" = variable<scalar>(label = "fastlstm1.four_parts.split-1-over-1.768..1024.slice", shape = [1, 256]);
  i"fastlstm1.h_new.W.split-1-over-1.0..128.slice" = variable<scalar>(label = "fastlstm1.h_new.W.split-1-over-1.0..128.slice", shape = [256, 128]);
  i"fastlstm1.h_new.split-1-over-1.0..128.slice" = variable<scalar>(label = "fastlstm1.h_new.split-1-over-1.0..128.slice", shape = [1, 128, 1]);
  i"fastlstm1.peephole0.mul.fix-rank-0-1" = variable<scalar>(label = "fastlstm1.peephole0.mul.fix-rank-0-1", shape = [1, 256]);
  i"fastlstm1.peephole1.mul.fix-rank-0-1" = variable<scalar>(label = "fastlstm1.peephole1.mul.fix-rank-0-1", shape = [1, 256]);
  i"fastlstm1.peephole2.mul.fix-rank-0-1" = variable<scalar>(label = "fastlstm1.peephole2.mul.fix-rank-0-1", shape = [1, 256]);
  ( i"fastlstm1.c_final", i"fastlstm1.c_final_1" ) = tract_core_scan(body = "scan_body_0", scan = [("fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256.fix_c.0", 0, 1), ("fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_c.1", 0, 1), ("fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_c.1", 0, 1), ("fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_c.1", 0, 1)], full = [("fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.0..256.slice", i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.0..256.slice"), ("fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.256..512.slice", i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.256..512.slice"), ("fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.512..768.slice", i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.512..768.slice"), ("fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.768..1024.slice", i"fastlstm1.four_parts.W.concat-einsum-k.256..384.split-1-over-1.768..1024.slice"), ("fastlstm1.four_parts.split-1-over-1.0..256.slice", i"fastlstm1.four_parts.split-1-over-1.0..256.slice"), ("fastlstm1.four_parts.split-1-over-1.256..512.slice", i"fastlstm1.four_parts.split-1-over-1.256..512.slice"), ("fastlstm1.four_parts.split-1-over-1.512..768.slice", i"fastlstm1.four_parts.split-1-over-1.512..768.slice"), ("fastlstm1.four_parts.split-1-over-1.768..1024.slice", i"fastlstm1.four_parts.split-1-over-1.768..1024.slice"), ("fastlstm1.h_new.W.split-1-over-1.0..128.slice", i"fastlstm1.h_new.W.split-1-over-1.0..128.slice"), ("fastlstm1.h_new.split-1-over-1.0..128.slice", i"fastlstm1.h_new.split-1-over-1.0..128.slice"), ("fastlstm1.peephole0.mul.fix-rank-0-1", i"fastlstm1.peephole0.mul.fix-rank-0-1"), ("fastlstm1.peephole1.mul.fix-rank-0-1", i"fastlstm1.peephole1.mul.fix-rank-0-1"), ("fastlstm1.peephole2.mul.fix-rank-0-1", i"fastlstm1.peephole2.mul.fix-rank-0-1")], state = [("fastlstm1.c", i"tap.tap.fastlstm1.c_init.0-35/0-82/0", "fastlstm1.c_new"), ("fastlstm1.r", i"tap.fastlstm1.r_init.0-36/0", "fastlstm1.r_new")], output = [("fastlstm1.r_new", "full", 2, 1), ("fastlstm1.h_new.W.split-over-1.128..256.prop_axis.a.input_0", "full", 0, 1)], skip = 2, reset_every_turn = false);
  i"fastlstm1.h_new.W.split-over-1.128..256.fix_a.0" = transpose(i"fastlstm1.c_final_1", axes = [1, 0, 2]);
  i"fastlstm1.h_new.W.split-over-1.128..256.fix_a.1" = unsqueeze(i"fastlstm1.h_new.W.split-over-1.128..256.fix_a.0", axes = [0]);
  i"fastlstm1.h_new.W.split-over-1.128..256.fix_b.1" = variable<scalar>(label = "fastlstm1.h_new.W.split-over-1.128..256.fix_b.1", shape = [1, 1, 256, 128]);
  i"fastlstm1.h_new.W.split-over-1.128..256" = matmul(i"fastlstm1.h_new.W.split-over-1.128..256.fix_b.1", i"fastlstm1.h_new.W.split-over-1.128..256.fix_a.1", transposeA = true, transposeB = true);
  i"fastlstm1.h_new.W.split-over-1.128..256.fix_c.0" = squeeze(i"fastlstm1.h_new.W.split-over-1.128..256", axes = [1]);
  i"fastlstm1.c_final.fastlstm1.h_new.split-1-over-1.128..256.slice" = variable<scalar>(label = "fastlstm1.c_final.fastlstm1.h_new.split-1-over-1.128..256.slice", shape = [1, 128, 1]);
  i"fastlstm1.h_new.split-over-1.128..256" = add(i"fastlstm1.h_new.W.split-over-1.128..256.fix_c.0", i"fastlstm1.c_final.fastlstm1.h_new.split-1-over-1.128..256.slice");
  i"fastlstm1.h_new.concat-1" = concat([i"fastlstm1.c_final", i"fastlstm1.h_new.split-over-1.128..256"], axis = 1);
  i"tdnn4.affine.output.delay" = tract_pulse_delay(i"fastlstm1.h_new.concat-1", axis = 2, delay = 0, overlap = 2);
  i"tdnn4.affine.kernel.0" = variable<scalar>(label = "tdnn4.affine.kernel.0", shape = [256, 256, 3]);
  i"tdnn4.affine.bias.0" = variable<scalar>(label = "tdnn4.affine.bias.0", shape = [256]);
  i"tdnn4.affine.output_conv" = conv(i"tdnn4.affine.output.delay", i"tdnn4.affine.kernel.0", i"tdnn4.affine.bias.0", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"tdnn4.affine.output" = i"tdnn4.affine.output_conv";
  i"tdnn4.relu.output.low" = max(i"tdnn4.affine.output", i"tdnn1.relu.output.low.cst");
  i"tdnn4.renorm.reduced.sum.sqr" = square(i"tdnn4.relu.output.low");
  i"tdnn4.renorm.reduced.sum.sum" = sum_reduce(i"tdnn4.renorm.reduced.sum.sqr", axes = [1]);
  i"tdnn4.renorm.reduced.sum.card" = mul(i"tdnn4.renorm.reduced.sum.sum", i"tdnn1.renorm.reduced.sum.card.fix-rank-1-2");
  i"tdnn4.renorm.output-recip" = rsqrt(i"tdnn4.renorm.reduced.sum.card");
  i"tdnn4.renorm.output" = mul(i"tdnn4.relu.output.low", i"tdnn4.renorm.output-recip");
  i"tdnn5.affine.output.delay" = tract_pulse_delay(i"tdnn4.renorm.output", axis = 2, delay = 0, overlap = 2);
  i"tdnn5.affine.kernel.0" = variable<scalar>(label = "tdnn5.affine.kernel.0", shape = [256, 256, 3]);
  i"tdnn5.affine.bias.0" = variable<scalar>(label = "tdnn5.affine.bias.0", shape = [256]);
  i"tdnn5.affine.output_conv" = conv(i"tdnn5.affine.output.delay", i"tdnn5.affine.kernel.0", i"tdnn5.affine.bias.0", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"tdnn5.affine.output" = i"tdnn5.affine.output_conv";
  i"tdnn5.relu.output.low" = max(i"tdnn5.affine.output", i"tdnn1.relu.output.low.cst");
  i"tdnn5.renorm.reduced.sum.sqr" = square(i"tdnn5.relu.output.low");
  i"tdnn5.renorm.reduced.sum.sum" = sum_reduce(i"tdnn5.renorm.reduced.sum.sqr", axes = [1]);
  i"tdnn5.renorm.reduced.sum.card" = mul(i"tdnn5.renorm.reduced.sum.sum", i"tdnn1.renorm.reduced.sum.card.fix-rank-1-2");
  i"tdnn5.renorm.output-recip" = rsqrt(i"tdnn5.renorm.reduced.sum.card");
  i"tdnn5.renorm.output" = mul(i"tdnn5.relu.output.low", i"tdnn5.renorm.output-recip");
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256.fix_b.0" = variable<scalar>(label = "fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256.fix_b.0", shape = [1, 256, 256]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256" = matmul(i"tdnn5.renorm.output", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256.fix_b.0", transposeA = true, transposeB = false);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256.fix_c.0" = squeeze(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256", axes = [0]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_a.0" = unsqueeze(i"tdnn5.renorm.output", axes = [0]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_b.1" = variable<scalar>(label = "fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_b.1", shape = [1, 1, 256, 256]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768" = matmul(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_a.0", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_b.1", transposeA = true, transposeB = false);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_c.0" = squeeze(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768", axes = [1]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_c.1" = transpose(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_c.0", axes = [1, 0, 2]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_a.0" = unsqueeze(i"tdnn5.renorm.output", axes = [0]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_b.1" = variable<scalar>(label = "fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_b.1", shape = [1, 1, 256, 256]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512" = matmul(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_a.0", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_b.1", transposeA = true, transposeB = false);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_c.0" = squeeze(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512", axes = [1]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_c.1" = transpose(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_c.0", axes = [1, 0, 2]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_a.0" = unsqueeze(i"tdnn5.renorm.output", axes = [0]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_b.1" = variable<scalar>(label = "fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_b.1", shape = [1, 1, 256, 256]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024" = matmul(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_a.0", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_b.1", transposeA = true, transposeB = false);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_c.0" = squeeze(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024", axes = [1]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_c.1" = transpose(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_c.0", axes = [1, 0, 2]);
  i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.0..256.slice" = variable<scalar>(label = "fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.0..256.slice", shape = [128, 256]);
  i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.256..512.slice" = variable<scalar>(label = "fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.256..512.slice", shape = [128, 256]);
  i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.512..768.slice" = variable<scalar>(label = "fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.512..768.slice", shape = [128, 256]);
  i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.768..1024.slice" = variable<scalar>(label = "fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.768..1024.slice", shape = [128, 256]);
  i"fastlstm2.four_parts.split-1-over-1.0..256.slice" = variable<scalar>(label = "fastlstm2.four_parts.split-1-over-1.0..256.slice", shape = [1, 256]);
  i"fastlstm2.four_parts.split-1-over-1.256..512.slice" = variable<scalar>(label = "fastlstm2.four_parts.split-1-over-1.256..512.slice", shape = [1, 256]);
  i"fastlstm2.four_parts.split-1-over-1.512..768.slice" = variable<scalar>(label = "fastlstm2.four_parts.split-1-over-1.512..768.slice", shape = [1, 256]);
  i"fastlstm2.four_parts.split-1-over-1.768..1024.slice" = variable<scalar>(label = "fastlstm2.four_parts.split-1-over-1.768..1024.slice", shape = [1, 256]);
  i"fastlstm2.h_new.W.split-1-over-1.0..128.slice" = variable<scalar>(label = "fastlstm2.h_new.W.split-1-over-1.0..128.slice", shape = [256, 128]);
  i"fastlstm2.h_new.split-1-over-1.0..128.slice" = variable<scalar>(label = "fastlstm2.h_new.split-1-over-1.0..128.slice", shape = [1, 128, 1]);
  i"fastlstm2.peephole0.mul.fix-rank-0-1" = variable<scalar>(label = "fastlstm2.peephole0.mul.fix-rank-0-1", shape = [1, 256]);
  i"fastlstm2.peephole1.mul.fix-rank-0-1" = variable<scalar>(label = "fastlstm2.peephole1.mul.fix-rank-0-1", shape = [1, 256]);
  i"fastlstm2.peephole2.mul.fix-rank-0-1" = variable<scalar>(label = "fastlstm2.peephole2.mul.fix-rank-0-1", shape = [1, 256]);
  ( i"fastlstm2.c_final", i"fastlstm2.c_final_1" ) = tract_core_scan(body = "scan_body_1", scan = [("fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.0..256.fix_c.0", 0, 1), ("fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.512..768.fix_c.1", 0, 1), ("fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.256..512.fix_c.1", 0, 1), ("fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.concat-einsum-k.0..256.split-over-1.768..1024.fix_c.1", 0, 1)], full = [("fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.0..256.slice", i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.0..256.slice"), ("fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.256..512.slice", i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.256..512.slice"), ("fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.512..768.slice", i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.512..768.slice"), ("fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.768..1024.slice", i"fastlstm2.four_parts.W.concat-einsum-k.256..384.split-1-over-1.768..1024.slice"), ("fastlstm2.four_parts.split-1-over-1.0..256.slice", i"fastlstm2.four_parts.split-1-over-1.0..256.slice"), ("fastlstm2.four_parts.split-1-over-1.256..512.slice", i"fastlstm2.four_parts.split-1-over-1.256..512.slice"), ("fastlstm2.four_parts.split-1-over-1.512..768.slice", i"fastlstm2.four_parts.split-1-over-1.512..768.slice"), ("fastlstm2.four_parts.split-1-over-1.768..1024.slice", i"fastlstm2.four_parts.split-1-over-1.768..1024.slice"), ("fastlstm2.h_new.W.split-1-over-1.0..128.slice", i"fastlstm2.h_new.W.split-1-over-1.0..128.slice"), ("fastlstm2.h_new.split-1-over-1.0..128.slice", i"fastlstm2.h_new.split-1-over-1.0..128.slice"), ("fastlstm2.peephole0.mul.fix-rank-0-1", i"fastlstm2.peephole0.mul.fix-rank-0-1"), ("fastlstm2.peephole1.mul.fix-rank-0-1", i"fastlstm2.peephole1.mul.fix-rank-0-1"), ("fastlstm2.peephole2.mul.fix-rank-0-1", i"fastlstm2.peephole2.mul.fix-rank-0-1")], state = [("fastlstm2.c", i"tap.tap.fastlstm1.c_init.0-35/0-82/0", "fastlstm2.c_new"), ("fastlstm2.r", i"tap.fastlstm1.r_init.0-36/0", "fastlstm2.r_new")], output = [("fastlstm2.r_new", "full", 2, 1), ("fastlstm2.h_new.W.split-over-1.128..256.prop_axis.a.input_0", "full", 0, 1)], skip = 6, reset_every_turn = false);
  i"output.affine.output.W.concat-einsum-k.0..128.fix_a.0" = variable<scalar>(label = "output.affine.output.W.concat-einsum-k.0..128.fix_a.0", shape = [1, 1690, 128]);
  i"output.affine.output.W.concat-einsum-k.0..128" = matmul(i"fastlstm2.c_final", i"output.affine.output.W.concat-einsum-k.0..128.fix_a.0", transposeA = true, transposeB = true);
  i"output.affine.output.W.concat-einsum-k.0..128.fix_c.0" = squeeze(i"output.affine.output.W.concat-einsum-k.0..128", axes = [0]);
  i"fastlstm2.h_new.W.split-over-1.128..256.fix_a.0" = transpose(i"fastlstm2.c_final_1", axes = [1, 0, 2]);
  i"fastlstm2.h_new.W.split-over-1.128..256.fix_b.0" = variable<scalar>(label = "fastlstm2.h_new.W.split-over-1.128..256.fix_b.0", shape = [1, 256, 128]);
  i"fastlstm2.h_new.W.split-over-1.128..256" = matmul(i"fastlstm2.h_new.W.split-over-1.128..256.fix_b.0", i"fastlstm2.h_new.W.split-over-1.128..256.fix_a.0", transposeA = true, transposeB = true);
  i"fastlstm2.h_new.W.split-over-1.128..256.fix_c.0" = squeeze(i"fastlstm2.h_new.W.split-over-1.128..256", axes = [0]);
  i"fastlstm2.c_final.fastlstm2.h_new.split-1-over-1.128..256.slice" = variable<scalar>(label = "fastlstm2.c_final.fastlstm2.h_new.split-1-over-1.128..256.slice", shape = [128, 1]);
  i"fastlstm2.h_new.split-over-1.128..256" = add(i"fastlstm2.h_new.W.split-over-1.128..256.fix_c.0", i"fastlstm2.c_final.fastlstm2.h_new.split-1-over-1.128..256.slice");
  i"output.affine.output.W.concat-einsum-slice-k.0.128..256" = variable<scalar>(label = "output.affine.output.W.concat-einsum-slice-k.0.128..256", shape = [1690, 128]);
  i"output.affine.output.W.concat-einsum-k.128..256" = matmul(i"fastlstm2.h_new.split-over-1.128..256", i"output.affine.output.W.concat-einsum-slice-k.0.128..256", transposeA = true, transposeB = true);
  i"output.affine.output.W.concat-einsum-k.add-1" = add(i"output.affine.output.W.concat-einsum-k.0..128.fix_c.0", i"output.affine.output.W.concat-einsum-k.128..256");
  i"output.affine.bias.0" = variable<scalar>(label = "output.affine.bias.0", shape = [1, 1690]);
  i"output.affine.output" = add(i"output.affine.output.W.concat-einsum-k.add-1", i"output.affine.bias.0");
  output = i"output.affine.output";
}
