Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- import `conllu`: Allow `newpar` and `newdoc` keywords

### Changed

- `enumerate`: sort as numeric uses float internally (lower maximum value, but sorting time values is possible)
- `time`: More generic functionality
- `time`: existing time annotations are now overwritten

### Fixed

- `enumerate`: Fixed tracking of `by`-values
- `time`: no more misleading error messages

## [0.46.0] - 2026-01-13

### Added
Expand Down
5 changes: 4 additions & 1 deletion src/importer/conllu/conllu.pest
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@ whitespace = _{ "\t" }
initial_padding = _{ SOI ~ (NEWLINE*)? }
final_padding = _{ (NEWLINE*)? ~ EOI }
conllu = ${ initial_padding ~ sentence+ ~ final_padding }
sentence = ${ ( s_anno | s_comment )* ~ (multi_token | empty_node | token)+ ~ NEWLINE? }
sentence = ${ ( text_structure | s_anno | s_comment )* ~ (multi_token | empty_node | token)+ ~ NEWLINE? }
s_anno = ${ hash ~ space* ~ name ~ space* ~ equals ~ space* ~ s_anno_value ~ NEWLINE }
s_anno_value = @{ (char | WHITESPACE )+ }
s_comment = ${ hash ~ space* ~ comment ~ NEWLINE }
comment = @{ comment_char+ }
text_structure = { hash ~ space* ~ (newdoc | newpar) ~ NEWLINE }
newdoc = { "newdoc" ~ (char | space)* }
newpar = { "newpar" }

token = ${ id ~ whitespace
~ ( no_value | form ) ~ whitespace
Expand Down
26 changes: 15 additions & 11 deletions src/manipulator/enumerate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use graphannis_core::{
graph::{ANNIS_NS, NODE_NAME_KEY, NODE_TYPE_KEY},
};
use itertools::Itertools;
use ordered_float::OrderedFloat;
use serde::Serialize;
use serde_derive::Deserialize;

Expand Down Expand Up @@ -126,15 +127,17 @@ impl<'a> SortByNode {
fn sortable_value(&self, value: Cow<'a, str>) -> Result<SortValue<'a>, anyhow::Error> {
Ok(match self {
SortByNode::AsString(_) => SortValue::StringValue(value),
SortByNode::AsInteger { .. } => SortValue::NumericValue(value.parse::<usize>()?),
SortByNode::AsInteger { .. } => {
SortValue::NumericValue(value.parse::<OrderedFloat<f64>>()?)
}
})
}
}

#[derive(PartialEq, PartialOrd, Eq, Ord)]
enum SortValue<'a> {
StringValue(Cow<'a, str>),
NumericValue(usize),
NumericValue(OrderedFloat<f64>),
}

impl Default for EnumerateMatches {
Expand Down Expand Up @@ -174,11 +177,9 @@ impl Manipulator for EnumerateMatches {

for query_s in &self.queries {
let query = aql::parse(query_s, false)?;
let mut search_results: Vec<_> = Vec::new();
for m in aql::execute_query_on_graph(graph, &query, true, None)? {
let m = m?;
search_results.push(m);
}
let mut search_results = aql::execute_query_on_graph(graph, &query, true, None)?
.flatten()
.collect_vec();
// Sort results with the default ANNIS sort order
search_results.sort_by(|m1, m2| {
sort_cache
Expand Down Expand Up @@ -230,9 +231,10 @@ impl Manipulator for EnumerateMatches {
let mut offset = 0;
let mut i_correction = 0;
let mut visited = BTreeSet::new();
let mut by_values = Vec::with_capacity(self.by.len());
let mut by_values = vec![String::with_capacity(0); self.by.len()];
let mut reset_count;
for (i, mut m) in search_results.into_iter().enumerate() {
let mut reset_count = false;
reset_count = false;
let matching_nodes: Result<Vec<String>, GraphAnnisCoreError> = m
.iter()
.map(|m| {
Expand All @@ -257,17 +259,19 @@ impl Manipulator for EnumerateMatches {
.get_value_for_item(&internal_id, &coord_anno_key)?
.unwrap_or_default()
.to_string();
if let Some(previous_value) = by_values.get(bi)
if let Some(previous_value) = by_values.get_mut(bi)
&& &next_value != previous_value
{
// reset count
reset_count = true;
previous_value.clear();
previous_value.push_str(&next_value);
}
by_values.insert(bi, next_value);
}
}
if reset_count {
i_correction = i;
offset = 0;
}
if let Some(value_i) = self.value {
if value_i <= m.len() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
---
source: src/manipulator/time.rs
expression: "format!(\"{data_output}\\n{messages}\")"
---
---- corpus.graphml:
<?xml version="1.0" encoding="UTF-8"?>
<graphml>
<key id="k0" for="graph" attr.name="configuration" attr.type="string"/>
<key id="k1" for="node" attr.name="annis::node_type" attr.type="string"/>
<key id="k2" for="node" attr.name="annis::time" attr.type="string"/>
<key id="k3" for="node" attr.name="annis::tok" attr.type="string"/>
<graph edgedefault="directed" parse.order="nodesfirst" parse.nodeids="free" parse.edgeids="canonical">
<data key="k0"><![CDATA[
# configure visualizations here
]]></data>
<node id="corpus">
<data key="k1">corpus</data>
</node>
<node id="corpus/doc">
<data key="k1">corpus</data>
</node>
<node id="corpus/doc#t0">
<data key="k1">node</data>
<data key="k2">0.0000000000000000-0.7500000000000000</data>
<data key="k3">This</data>
</node>
<node id="corpus/doc#t1">
<data key="k1">node</data>
<data key="k2">0.7500000000000000-1.5000000000000000</data>
<data key="k3">is</data>
</node>
<node id="corpus/doc#t2">
<data key="k1">node</data>
<data key="k2">1.5000000000000000-2.2500000000000000</data>
<data key="k3">a</data>
</node>
<node id="corpus/doc#t3">
<data key="k1">node</data>
<data key="k2">2.2500000000000000-3.0000000000000000</data>
<data key="k3">test</data>
</node>
<node id="corpus/doc#t4">
<data key="k1">node</data>
<data key="k2">4.0000000000000000-5.3333333333333330</data>
<data key="k3">What</data>
</node>
<node id="corpus/doc#t5">
<data key="k1">node</data>
<data key="k2">5.3333333333333330-6.6666666666666661</data>
<data key="k3">will</data>
</node>
<node id="corpus/doc#t6">
<data key="k1">node</data>
<data key="k2">6.6666666666666661-8.0000000000000000</data>
<data key="k3">happen</data>
</node>
<node id="corpus/doc#t7">
<data key="k1">node</data>
<data key="k2">20.0000000000000000-22.0000000000000000</data>
<data key="k3">nothing</data>
</node>
<node id="corpus/doc#t8">
<data key="k1">node</data>
<data key="k2">27.0000000000000000-28.5000000000000000</data>
<data key="k3">sure</data>
</node>
<node id="corpus/doc#t9">
<data key="k1">node</data>
<data key="k2">28.5000000000000000-30.0000000000000000</data>
<data key="k3">thing</data>
</node>
<edge id="e0" source="corpus/doc#t0" target="corpus/doc#t1" label="Ordering/annis/">
</edge>
<edge id="e1" source="corpus/doc#t1" target="corpus/doc#t2" label="Ordering/annis/">
</edge>
<edge id="e2" source="corpus/doc#t2" target="corpus/doc#t3" label="Ordering/annis/">
</edge>
<edge id="e3" source="corpus/doc#t3" target="corpus/doc#t4" label="Ordering/annis/">
</edge>
<edge id="e4" source="corpus/doc#t4" target="corpus/doc#t5" label="Ordering/annis/">
</edge>
<edge id="e5" source="corpus/doc#t5" target="corpus/doc#t6" label="Ordering/annis/">
</edge>
<edge id="e6" source="corpus/doc#t6" target="corpus/doc#t7" label="Ordering/annis/">
</edge>
<edge id="e7" source="corpus/doc#t7" target="corpus/doc#t8" label="Ordering/annis/">
</edge>
<edge id="e8" source="corpus/doc#t8" target="corpus/doc#t9" label="Ordering/annis/">
</edge>
<edge id="e9" source="corpus/doc#t0" target="corpus/doc" label="PartOf/annis/">
</edge>
<edge id="e10" source="corpus/doc#t1" target="corpus/doc" label="PartOf/annis/">
</edge>
<edge id="e11" source="corpus/doc#t2" target="corpus/doc" label="PartOf/annis/">
</edge>
<edge id="e12" source="corpus/doc#t3" target="corpus/doc" label="PartOf/annis/">
</edge>
<edge id="e13" source="corpus/doc#t4" target="corpus/doc" label="PartOf/annis/">
</edge>
<edge id="e14" source="corpus/doc#t5" target="corpus/doc" label="PartOf/annis/">
</edge>
<edge id="e15" source="corpus/doc#t6" target="corpus/doc" label="PartOf/annis/">
</edge>
<edge id="e16" source="corpus/doc#t7" target="corpus/doc" label="PartOf/annis/">
</edge>
<edge id="e17" source="corpus/doc#t8" target="corpus/doc" label="PartOf/annis/">
</edge>
<edge id="e18" source="corpus/doc#t9" target="corpus/doc" label="PartOf/annis/">
</edge>
</graph>
</graphml>
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,15 @@ expression: actual.unwrap()
<node id="valid-sparse-timevalues_2/import/exmaralda/test_doc#t_dipl_T3-T4">
<data key="k11">dipl</data>
<data key="k13">node</data>
<data key="k19">4.44444-5.55555</data>
<data key="k19">4.4444400000000002-5.5555500000000002</data>
<data key="k20">York</data>
<data key="k5">York</data>
</node>
<node id="valid-sparse-timevalues_2/import/exmaralda/test_doc#t_norm_T286-T0">
<data key="k11">norm</data>
<data key="k13">node</data>
<data key="k14">I</data>
<data key="k19">0-1</data>
<data key="k19">0.0000000000000000-1.0000000000000000</data>
<data key="k20">I</data>
</node>
<node id="valid-sparse-timevalues_2/import/exmaralda/test_doc#t_norm_T0-T1">
Expand Down Expand Up @@ -145,14 +145,14 @@ expression: actual.unwrap()
<data key="k11">dipl</data>
<data key="k13">node</data>
<data key="k18">1</data>
<data key="k19">0-5.55555</data>
<data key="k19">0.0000000000000000-5.5555500000000002</data>
</node>
<node id="valid-sparse-timevalues_2/import/exmaralda/test_doc#a_norm_T286-T0">
<data key="k11">norm</data>
<data key="k12">I</data>
<data key="k13">node</data>
<data key="k17">PRON</data>
<data key="k19">0-1</data>
<data key="k19">0.0000000000000000-1.0000000000000000</data>
</node>
<node id="valid-sparse-timevalues_2/import/exmaralda/test_doc#a_norm_T0-T1">
<data key="k11">norm</data>
Expand Down
Loading