33
44#include < algorithm> // std::erase_if
55#include < cassert> // assert
6- #include < set > // std::set
6+ #include < functional > // std::hash
77#include < sstream> // std::ostringstream
88#include < tuple> // std::tuple
99#include < unordered_set> // std::unordered_set
1010#include < utility> // std::move, std::pair
11+ #include < vector> // std::vector
1112
1213namespace {
1314
15+ struct ProcessedRuleHasher {
16+ auto
17+ operator ()(const std::tuple<const sourcemeta::core::JSON *, std::string_view,
18+ std::uint64_t > &value) const noexcept
19+ -> std::size_t {
20+ return std::hash<const void *>{}(std::get<0 >(value)) ^
21+ (std::hash<std::string_view>{}(std::get<1 >(value)) << 1 ) ^
22+ (std::hash<std::uint64_t >{}(std::get<2 >(value)) << 2 );
23+ }
24+ };
25+
1426auto calculate_health_percentage (const std::size_t subschemas,
1527 const std::size_t failed_subschemas)
1628 -> std::uint8_t {
@@ -56,40 +68,6 @@ auto SchemaTransformRule::rereference(const std::string_view reference,
5668 " The reference broke after transformation" );
5769}
5870
59- auto SchemaTransformRule::apply (JSON &schema, const JSON &root,
60- const Vocabularies &vocabularies,
61- const SchemaWalker &walker,
62- const SchemaResolver &resolver,
63- const SchemaFrame &frame,
64- const SchemaFrame::Location &location) const
65- -> std::pair<bool, Result> {
66- auto outcome{this ->condition (schema, root, vocabularies, frame, location,
67- walker, resolver)};
68- if (!outcome.applies ) {
69- return {true , std::move (outcome)};
70- }
71-
72- try {
73- this ->transform (schema, outcome);
74- } catch (const SchemaAbortError &) {
75- return {false , std::move (outcome)};
76- }
77-
78- // The condition must always be false after applying the
79- // transformation in order to avoid infinite loops
80- if (this ->condition (schema, root, vocabularies, frame, location, walker,
81- resolver)
82- .applies ) {
83- // TODO: Throw a better custom error that also highlights the schema
84- // location
85- std::ostringstream error;
86- error << " Rule condition holds after application: " << this ->name ();
87- throw std::runtime_error (error.str ());
88- }
89-
90- return {true , std::move (outcome)};
91- }
92-
9371auto SchemaTransformRule::check (const JSON &schema, const JSON &root,
9472 const Vocabularies &vocabularies,
9573 const SchemaWalker &walker,
@@ -163,22 +141,37 @@ auto SchemaTransformer::apply(JSON &schema, const SchemaWalker &walker,
163141 std::string_view default_dialect,
164142 std::string_view default_id) const
165143 -> std::pair<bool, std::uint8_t> {
166- // There is no point in applying an empty bundle
167144 assert (!this ->rules .empty ());
168- std::set<std::tuple<const JSON *, std::string_view, std::uint64_t >>
145+ std::unordered_set<std::tuple<const JSON *, std::string_view, std::uint64_t >,
146+ ProcessedRuleHasher>
169147 processed_rules;
170148
171149 bool result{true };
172150 std::size_t subschema_count{0 };
173151 std::size_t subschema_failures{0 };
152+
153+ SchemaFrame frame{SchemaFrame::Mode::References};
154+
155+ struct PotentiallyBrokenReference {
156+ Pointer origin;
157+ JSON::String original;
158+ JSON::String destination;
159+ Pointer target_pointer;
160+ std::size_t target_relative_pointer;
161+ };
162+
163+ std::vector<PotentiallyBrokenReference> potentially_broken_references;
164+
174165 while (true ) {
175- SchemaFrame frame{SchemaFrame::Mode::References};
176- frame.analyse (schema, walker, resolver, default_dialect, default_id);
177- std::unordered_set<Pointer> visited;
166+ if ( frame. empty ()) {
167+ frame.analyse (schema, walker, resolver, default_dialect, default_id);
168+ }
178169
170+ std::unordered_set<Pointer> visited;
179171 bool applied{false };
180172 subschema_count = 0 ;
181173 subschema_failures = 0 ;
174+
182175 for (const auto &entry : frame.locations ()) {
183176 if (entry.second .type != SchemaFrame::LocationType::Resource &&
184177 entry.second .type != SchemaFrame::LocationType::Subschema) {
@@ -199,71 +192,116 @@ auto SchemaTransformer::apply(JSON &schema, const SchemaWalker &walker,
199192
200193 bool subschema_failed{false };
201194 for (const auto &rule : this ->rules ) {
202- const auto subresult{rule->apply (current, schema, current_vocabularies,
203- walker, resolver, frame,
204- entry.second )};
205- // This means the rule is fixable
206- if (subresult.first ) {
207- applied = subresult.second .applies || applied;
208- } else {
209- result = false ;
210- subschema_failed = true ;
211- callback (entry.second .pointer , rule->name (), rule->message (),
212- subresult.second );
213- }
195+ auto outcome{rule->condition (current, schema, current_vocabularies,
196+ frame, entry.second , walker, resolver)};
214197
215- if (!applied ) {
198+ if (!outcome. applies ) {
216199 continue ;
217200 }
218201
219- std::tuple<const JSON *, std::string_view, std::uint64_t > mark{
220- ¤t, rule->name (),
221- // Allow applying the same rule to the same location if the schema
222- // has changed, which means we are still "making progress". The
223- // hashing is not perfect, but its enough
224- current.fast_hash ()};
225- if (processed_rules.contains (mark)) {
226- throw SchemaTransformRuleProcessedTwiceError (rule->name (),
227- entry.second .pointer );
228- }
202+ // Store data we need before invalidating the frame
203+ const auto transformed_pointer{entry.second .pointer };
204+ const auto transformed_relative_pointer{entry.second .relative_pointer };
229205
230- // Identify and try to address broken references, if any
206+ // Collect reference information BEFORE invalidating the frame.
207+ // We need to save this data because after the transform, the old
208+ // frame's views may point to invalid memory, and a new frame won't
209+ // have location entries for paths that no longer exist.
210+ potentially_broken_references.clear ();
231211 for (const auto &reference : frame.references ()) {
232212 const auto destination{frame.traverse (reference.second .destination )};
233213 if (!destination.has_value () ||
234- // We only care about references with JSON Pointer fragments,
235- // as these are the only cases, by definition, where the target
236- // is location-dependent.
237214 !reference.second .fragment .has_value () ||
238215 !reference.second .fragment .value ().starts_with (' /' )) {
239216 continue ;
240217 }
241218
242219 const auto &target{destination.value ().get ()};
220+ potentially_broken_references.push_back (
221+ {reference.first .second , JSON::String{reference.second .original },
222+ reference.second .destination , target.pointer ,
223+ target.relative_pointer });
224+ }
225+
226+ try {
227+ rule->transform (current, outcome);
228+ } catch (const SchemaAbortError &) {
229+ result = false ;
230+ subschema_failed = true ;
231+ callback (transformed_pointer, rule->name (), rule->message (), outcome);
232+ continue ;
233+ }
234+
235+ applied = true ;
236+
237+ frame.analyse (schema, walker, resolver, default_dialect, default_id);
238+
239+ const auto new_location{frame.traverse (transformed_pointer)};
240+ // The location should still exist after transform
241+ assert (new_location.has_value ());
242+
243+ // Get vocabularies from the new frame
244+ const auto new_vocabularies{
245+ frame.vocabularies (new_location.value ().get (), resolver)};
246+
247+ // The condition must always be false after applying the
248+ // transformation in order to avoid infinite loops
249+ if (rule->condition (current, schema, new_vocabularies, frame,
250+ new_location.value ().get (), walker, resolver)
251+ .applies ) {
252+ std::ostringstream error;
253+ error << " Rule condition holds after application: " << rule->name ();
254+ throw std::runtime_error (error.str ());
255+ }
256+
257+ // Identify and fix broken references using the saved data
258+ bool references_fixed{false };
259+ for (const auto &saved_reference : potentially_broken_references) {
243260 // The destination still exists, so we don't have to do anything
244- if (try_get (schema, target. pointer )) {
261+ if (try_get (schema, saved_reference. target_pointer )) {
245262 continue ;
246263 }
247264
248265 // If the source no longer exists, we don't need to fix the reference
249- if (!try_get (schema, reference. first . second .initial ())) {
266+ if (!try_get (schema, saved_reference. origin .initial ())) {
250267 continue ;
251268 }
252269
253270 const auto new_fragment{rule->rereference (
254- reference.second .destination , reference.first .second ,
255- frame.relative_instance_location (target),
256- frame.relative_instance_location (entry.second ))};
271+ saved_reference.destination , saved_reference.origin ,
272+ saved_reference.target_pointer .slice (
273+ saved_reference.target_relative_pointer ),
274+ transformed_pointer.slice (transformed_relative_pointer))};
257275
258276 // Note we use the base from the original reference before any
259277 // canonicalisation takes place so that we don't overly change
260278 // user's references when only fixing up their pointer fragments
261- URI original{reference. second .original };
279+ URI original{saved_reference .original };
262280 original.fragment (to_string (new_fragment));
263- set (schema, reference.first .second , JSON{original.recompose ()});
281+ set (schema, saved_reference.origin , JSON{original.recompose ()});
282+ references_fixed = true ;
283+ }
284+
285+ std::tuple<const JSON *, std::string_view, std::uint64_t > mark{
286+ ¤t, rule->name (),
287+ // Allow applying the same rule to the same location if the schema
288+ // has changed, which means we are still "making progress". The
289+ // hashing is not perfect, but its enough
290+ current.fast_hash ()};
291+ if (processed_rules.contains (mark)) {
292+ throw SchemaTransformRuleProcessedTwiceError (rule->name (),
293+ transformed_pointer);
264294 }
265295
266296 processed_rules.emplace (std::move (mark));
297+
298+ // If we fixed references, the schema changed again, so we need to
299+ // invalidate the frame. Otherwise, we can reuse it for the next
300+ // iteration.
301+ if (references_fixed) {
302+ frame.reset ();
303+ }
304+
267305 goto core_transformer_start_again;
268306 }
269307
0 commit comments