esp-studio-examples/EndToEndExamples/onnx_voice_transcription/model.xml at main · sassoftware/esp-studio-examples · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
<project heartbeat-interval="1" index="pi_EMPTY" name="voice_transcription_with_onnx" pubsub="auto" threads="4">
  <metadata>
    <meta id="layout">{"cq":{"w_audio":{"x":170,"y":50},"w_postproc":{"x":170,"y":450},"w_preproc":{"x":170,"y":175},"w_reader":{"x":410,"y":175},"w_score":{"x":170,"y":295}}}</meta>
    <meta id="studioUploadedBy">anonymous_user</meta>
    <meta id="studioUploaded">1767009157385</meta>
    <meta id="studioModifiedBy">anonymous_user</meta>
    <meta id="studioModified">1767009237405</meta>
    <meta id="studioTags">Example</meta>
    <meta id="projectComputationalRequirements">{"minCpu":"2", "minMem":"4"}</meta>
  </metadata>
  <contqueries>
    <contquery name="cq" timing-threshold="1000">
      <windows>
        <window-source autogen-key="true" insert-only="true" pubsub="true" queue-height="32" name="w_audio">
          <schema>
            <fields>
              <field key="true" name="id" type="int64"/>
              <field name="audio" type="int64"/>
            </fields>
          </schema>
          <connectors>
            <connector class="audio" name="audio" active="false">
              <properties>
                <property name="type"><![CDATA[pub]]></property>
                <property name="devicename"><![CDATA[hw:0,0]]></property>
                <property name="blocksize"><![CDATA[160000]]></property>
                <property name="samplerate"><![CDATA[16000]]></property>
                <property name="wavfilename"><![CDATA[espwav]]></property>
              </properties>
            </connector>
            <connector class="python" name="python">
              <properties>
                <property name="type"><![CDATA[pub]]></property>
                <property name="blocksize"><![CDATA[160000]]></property>
                <property name="code"><![CDATA[#python code
from files.esp_whisper_funs import publish_fun_ac
image_gen = publish_fun_ac()
def publish():
  return next(image_gen)
  #python code]]></property>
              </properties>
            </connector>
            <connector class="fs" name="audio_Connector" active="false">
              <properties>
                <property name="type"><![CDATA[sub]]></property>
                <property name="fstype"><![CDATA[csv]]></property>
                <property name="fsname"><![CDATA[@ESP_PROJECT_OUTPUT@/audio.csv]]></property>
                <property name="snapshot"><![CDATA[true]]></property>
              </properties>
            </connector>
          </connectors>
        </window-source>
        <window-python events="preproc" index="pi_EMPTY" name="w_preproc" output-insert-only="true" process-blocks="true">
          <schema>
            <fields>
              <field key="true" name="id" type="int64"/>
              <field name="audio_pcm" type="blob"/>
              <field name="max_length" type="blob"/>
              <field name="min_length" type="blob"/>
              <field name="num_beams" type="blob"/>
              <field name="num_return_sequences" type="blob"/>
              <field name="length_penalty" type="blob"/>
              <field name="repetition_penalty" type="blob"/>
            </fields>
          </schema>
          <code file="@ESP_PROJECT_HOME@/files/esp_whisper_funs.py"/>
        </window-python>
        <window-model-reader model-type="onnx" name="w_reader">
          <description><![CDATA[w_reader is a Model Reader window. This window reads the ONNX model and passes it to the w_score window. Also, pre-processing steps for the incoming events are specified in this window.]]></description>
          <warmup-steps count="2" type="random">
            <tensors>
              <tensor onnx-field="audio_pcm" tensor-file="@ESP_PROJECT_HOME@/files/warmup-whisper.safetensors" type="file"/>
              <tensor onnx-field="max_length" tensor-file="@ESP_PROJECT_HOME@/files/warmup-whisper.safetensors" type="file"/>
              <tensor onnx-field="min_length" tensor-file="@ESP_PROJECT_HOME@/files/warmup-whisper.safetensors" type="file"/>
              <tensor onnx-field="num_beams" tensor-file="@ESP_PROJECT_HOME@/files/warmup-whisper.safetensors" type="file"/>
              <tensor onnx-field="num_return_sequences" tensor-file="@ESP_PROJECT_HOME@/files/warmup-whisper.safetensors" type="file"/>
              <tensor onnx-field="length_penalty" tensor-file="@ESP_PROJECT_HOME@/files/warmup-whisper.safetensors" type="file"/>
              <tensor onnx-field="repetition_penalty" tensor-file="@ESP_PROJECT_HOME@/files/warmup-whisper.safetensors" type="file"/>
            </tensors>
          </warmup-steps>
          <parameters>
            <properties>
              <property name="reference"><![CDATA[@ESP_PROJECT_HOME@/analytics/whisper_sm_int8_cpu.onnx]]></property>
              <property name="loggingLevel"><![CDATA[warning]]></property>
              <property name="execProvider"><![CDATA[cpu]]></property>
            </properties>
          </parameters>
        </window-model-reader>
        <window-score name="w_score">
          <description><![CDATA[w_score is a Score window. This window executes the ONNX model's code when data passes through the window.]]></description>
          <schema>
            <fields>
              <field key="true" name="id" type="int64"/>
              <field name="generated_ids" type="blob"/>
            </fields>
          </schema>
          <models>
            <offline model-type="onnx">
              <input-map>
                <properties>
                  <property name="audio_pcm"><![CDATA[audio_pcm]]></property>
                  <property name="max_length"><![CDATA[max_length]]></property>
                  <property name="min_length"><![CDATA[min_length]]></property>
                  <property name="num_beams"><![CDATA[num_beams]]></property>
                  <property name="num_return_sequences"><![CDATA[num_return_sequences]]></property>
                  <property name="length_penalty"><![CDATA[length_penalty]]></property>
                  <property name="repetition_penalty"><![CDATA[repetition_penalty]]></property>
                </properties>
              </input-map>
              <output-map>
                <properties>
                  <property name="generated_ids"><![CDATA[generated_ids]]></property>
                </properties>
              </output-map>
            </offline>
          </models>
        </window-score>
        <window-python events="postproc" name="w_postproc" output-insert-only="true">
          <schema>
            <fields>
              <field key="true" name="id" type="int64"/>
              <field name="words" type="string"/>
              <field name="timestamp" type="string"/>
            </fields>
          </schema>
          <copy><![CDATA[id]]></copy>
          <code file="@ESP_PROJECT_HOME@/files/esp_whisper_funs.py"/>
          <connectors>
            <connector class="fs" name="out_score_Connector">
              <properties>
                <property name="type"><![CDATA[sub]]></property>
                <property name="fstype"><![CDATA[csv]]></property>
                <property name="fsname"><![CDATA[@ESP_PROJECT_OUTPUT@/out_score.csv]]></property>
                <property name="snapshot"><![CDATA[true]]></property>
              </properties>
            </connector>
          </connectors>
        </window-python>
      </windows>
      <edges>
        <edge role="data" source="w_audio" target="w_preproc"/>
        <edge role="model" source="w_reader" target="w_score"/>
        <edge role="data" source="w_preproc" target="w_score"/>
        <edge role="data" source="w_score" target="w_postproc"/>
      </edges>
    </contquery>
  </contqueries>
</project>