22
33use std:: collections:: HashSet ;
44
5+ use eure_document:: document:: { EureDocument , NodeId } ;
56use eure_document:: map:: Map ;
67use regex:: Regex ;
78use std:: sync:: LazyLock ;
@@ -44,7 +45,7 @@ fn collect_section_keys(sections: &Map<String, Section>, keys: &mut HashSet<Stri
4445 }
4546}
4647
47- /// Check all references in a document
48+ /// Check all references in a document (basic version without spans)
4849pub fn check_references ( doc : & EumdDocument ) -> CheckResult {
4950 let mut result = CheckResult :: default ( ) ;
5051
@@ -61,7 +62,7 @@ pub fn check_references(doc: &EumdDocument) -> CheckResult {
6162 collect_section_keys ( & doc. sections , & mut section_keys) ;
6263
6364 // Check references in all markdown content
64- check_content (
65+ check_content_simple (
6566 doc. description . as_deref ( ) ,
6667 "in description" ,
6768 & cite_keys,
@@ -70,7 +71,7 @@ pub fn check_references(doc: &EumdDocument) -> CheckResult {
7071 & mut result,
7172 ) ;
7273
73- check_content (
74+ check_content_simple (
7475 doc. intro . as_deref ( ) ,
7576 "in intro" ,
7677 & cite_keys,
@@ -80,7 +81,7 @@ pub fn check_references(doc: &EumdDocument) -> CheckResult {
8081 ) ;
8182
8283 // Check sections recursively
83- check_sections (
84+ check_sections_simple (
8485 & doc. sections ,
8586 "" ,
8687 & cite_keys,
@@ -91,7 +92,7 @@ pub fn check_references(doc: &EumdDocument) -> CheckResult {
9192
9293 // Check footnote content
9394 for ( key, footnote) in doc. footnotes . iter ( ) {
94- check_content (
95+ check_content_simple (
9596 Some ( & footnote. content ) ,
9697 & format ! ( "in footnote '{key}'" ) ,
9798 & cite_keys,
@@ -104,7 +105,7 @@ pub fn check_references(doc: &EumdDocument) -> CheckResult {
104105 result
105106}
106107
107- fn check_content (
108+ fn check_content_simple (
108109 content : Option < & str > ,
109110 location : & str ,
110111 cite_keys : & HashSet < String > ,
@@ -122,16 +123,16 @@ fn check_content(
122123 } ;
123124
124125 if !is_valid {
125- result. errors . push ( ReferenceError {
126- ref_type : reference. ref_type ,
127- key : reference. key ,
128- location : location . to_string ( ) ,
129- } ) ;
126+ result. errors . push ( ReferenceError :: new (
127+ reference. ref_type ,
128+ reference. key ,
129+ location. to_string ( ) ,
130+ ) ) ;
130131 }
131132 }
132133}
133134
134- fn check_sections (
135+ fn check_sections_simple (
135136 sections : & Map < String , Section > ,
136137 path : & str ,
137138 cite_keys : & HashSet < String > ,
@@ -147,7 +148,7 @@ fn check_sections(
147148 } ;
148149
149150 // Check header if present
150- check_content (
151+ check_content_simple (
151152 section. header . as_deref ( ) ,
152153 & format ! ( "in section '{current_path}' header" ) ,
153154 cite_keys,
@@ -157,7 +158,7 @@ fn check_sections(
157158 ) ;
158159
159160 // Check body
160- check_content (
161+ check_content_simple (
161162 section. body . as_deref ( ) ,
162163 & format ! ( "in section '{current_path}'" ) ,
163164 cite_keys,
@@ -167,7 +168,7 @@ fn check_sections(
167168 ) ;
168169
169170 // Recurse into nested sections
170- check_sections (
171+ check_sections_simple (
171172 & section. sections ,
172173 & current_path,
173174 cite_keys,
@@ -178,6 +179,182 @@ fn check_sections(
178179 }
179180}
180181
182+ // ============================================================================
183+ // Advanced checking with span information
184+ // ============================================================================
185+
186+ /// Context for checking with span information
187+ struct CheckContext < ' a > {
188+ raw_doc : & ' a EureDocument ,
189+ cite_keys : HashSet < String > ,
190+ footnote_keys : HashSet < String > ,
191+ section_keys : HashSet < String > ,
192+ result : CheckResult ,
193+ }
194+
195+ impl < ' a > CheckContext < ' a > {
196+ fn new ( eumd_doc : & EumdDocument , raw_doc : & ' a EureDocument ) -> Self {
197+ let cite_keys: HashSet < String > = eumd_doc
198+ . cites
199+ . as_ref ( )
200+ . map ( |c| extract_bibtex_keys ( c) )
201+ . unwrap_or_default ( ) ;
202+
203+ let footnote_keys: HashSet < String > =
204+ eumd_doc. footnotes . iter ( ) . map ( |( k, _) | k. clone ( ) ) . collect ( ) ;
205+
206+ let mut section_keys = HashSet :: new ( ) ;
207+ collect_section_keys ( & eumd_doc. sections , & mut section_keys) ;
208+
209+ CheckContext {
210+ raw_doc,
211+ cite_keys,
212+ footnote_keys,
213+ section_keys,
214+ result : CheckResult :: default ( ) ,
215+ }
216+ }
217+
218+ fn check_content ( & mut self , content : & str , location : & str , node_id : NodeId ) {
219+ // Get the actual text content offset within the code block
220+ let content_offset = get_code_block_content_offset ( self . raw_doc , node_id) ;
221+
222+ for reference in extract_references ( content) {
223+ let is_valid = match reference. ref_type {
224+ ReferenceType :: Cite => self . cite_keys . contains ( & reference. key ) ,
225+ ReferenceType :: Footnote => self . footnote_keys . contains ( & reference. key ) ,
226+ ReferenceType :: Section => self . section_keys . contains ( & reference. key ) ,
227+ } ;
228+
229+ if !is_valid {
230+ self . result . errors . push ( ReferenceError :: with_span (
231+ reference. ref_type ,
232+ reference. key ,
233+ location. to_string ( ) ,
234+ node_id,
235+ content_offset + reference. offset ,
236+ reference. len ,
237+ ) ) ;
238+ }
239+ }
240+ }
241+
242+ fn check_sections (
243+ & mut self ,
244+ sections : & Map < String , Section > ,
245+ path : & str ,
246+ sections_node_id : NodeId ,
247+ ) {
248+ let sections_node = self . raw_doc . node ( sections_node_id) ;
249+ let Some ( sections_map) = sections_node. as_map ( ) else {
250+ return ;
251+ } ;
252+
253+ for ( key, section) in sections. iter ( ) {
254+ let current_path = if path. is_empty ( ) {
255+ key. clone ( )
256+ } else {
257+ format ! ( "{path}.{key}" )
258+ } ;
259+
260+ let Some ( section_node_id) = sections_map. get_node_id ( & key. clone ( ) . into ( ) ) else {
261+ continue ;
262+ } ;
263+
264+ let section_node = self . raw_doc . node ( section_node_id) ;
265+ let Some ( section_map) = section_node. as_map ( ) else {
266+ continue ;
267+ } ;
268+
269+ // Check header if present
270+ if let Some ( ref header) = section. header
271+ && let Some ( header_node_id) = section_map. get_node_id ( & "header" . into ( ) )
272+ {
273+ self . check_content (
274+ header,
275+ & format ! ( "in section '{current_path}' header" ) ,
276+ header_node_id,
277+ ) ;
278+ }
279+
280+ // Check body
281+ if let Some ( ref body) = section. body
282+ && let Some ( body_node_id) = section_map. get_node_id ( & "body" . into ( ) )
283+ {
284+ self . check_content ( body, & format ! ( "in section '{current_path}'" ) , body_node_id) ;
285+ }
286+
287+ // Recurse into nested sections
288+ if let Some ( nested_sections_id) = section_map. get_node_id ( & "sections" . into ( ) ) {
289+ self . check_sections ( & section. sections , & current_path, nested_sections_id) ;
290+ }
291+ }
292+ }
293+ }
294+
295+ /// Get the byte offset of the code block content start within the node
296+ fn get_code_block_content_offset ( _raw_doc : & EureDocument , _node_id : NodeId ) -> u32 {
297+ // For code blocks, we need to account for the opening ``` and language tag
298+ // However, since we're using the node's span which points to the content,
299+ // we can return 0 here. The actual offset calculation happens in report.rs
300+ // when we compute the final span using OriginMap.
301+ 0
302+ }
303+
304+ /// Check references with span information for better error reporting
305+ pub fn check_references_with_spans ( eumd_doc : & EumdDocument , raw_doc : & EureDocument ) -> CheckResult {
306+ let mut ctx = CheckContext :: new ( eumd_doc, raw_doc) ;
307+
308+ let root_id = raw_doc. get_root_id ( ) ;
309+ let root = raw_doc. node ( root_id) ;
310+
311+ let Some ( map) = root. as_map ( ) else {
312+ return ctx. result ;
313+ } ;
314+
315+ // Check description
316+ if let Some ( ref content) = eumd_doc. description
317+ && let Some ( node_id) = map. get_node_id ( & "description" . into ( ) )
318+ {
319+ ctx. check_content ( content, "in description" , node_id) ;
320+ }
321+
322+ // Check intro
323+ if let Some ( ref content) = eumd_doc. intro
324+ && let Some ( node_id) = map. get_node_id ( & "intro" . into ( ) )
325+ {
326+ ctx. check_content ( content, "in intro" , node_id) ;
327+ }
328+
329+ // Check sections recursively
330+ if let Some ( sections_node_id) = map. get_node_id ( & "sections" . into ( ) ) {
331+ ctx. check_sections ( & eumd_doc. sections , "" , sections_node_id) ;
332+ }
333+
334+ // Check footnotes
335+ if let Some ( footnotes_node_id) = map. get_node_id ( & "footnotes" . into ( ) ) {
336+ let footnotes_node = raw_doc. node ( footnotes_node_id) ;
337+ if let Some ( footnotes_map) = footnotes_node. as_map ( ) {
338+ for ( key, footnote) in eumd_doc. footnotes . iter ( ) {
339+ if let Some ( footnote_node_id) = footnotes_map. get_node_id ( & key. clone ( ) . into ( ) )
340+ && let Some ( content_node_id) = raw_doc
341+ . node ( footnote_node_id)
342+ . as_map ( )
343+ . and_then ( |m| m. get_node_id ( & "content" . into ( ) ) )
344+ {
345+ ctx. check_content (
346+ & footnote. content ,
347+ & format ! ( "in footnote '{key}'" ) ,
348+ content_node_id,
349+ ) ;
350+ }
351+ }
352+ }
353+ }
354+
355+ ctx. result
356+ }
357+
181358#[ cfg( test) ]
182359mod tests {
183360 use super :: * ;
0 commit comments