Skip to content

Commit b74941e

Browse files
committed
Optimise schema framing when dealing with big schemas
Signed-off-by: Juan Cruz Viotti <[email protected]>
1 parent 36c0734 commit b74941e

File tree

1 file changed

+105
-65
lines changed

1 file changed

+105
-65
lines changed

src/core/jsonschema/frame.cc

Lines changed: 105 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,9 @@ auto find_anchors(const sourcemeta::core::JSON &schema,
125125
return result;
126126
}
127127

128-
template <typename StringType>
129-
auto find_nearest_bases_ref(
130-
const std::unordered_map<sourcemeta::core::WeakPointer,
131-
std::vector<StringType>> &bases,
132-
const sourcemeta::core::WeakPointer &pointer)
128+
template <typename StringType, typename MapType>
129+
auto find_nearest_bases_ref(const MapType &bases,
130+
const sourcemeta::core::WeakPointer &pointer)
133131
-> std::optional<
134132
std::pair<std::reference_wrapper<const std::vector<StringType>>,
135133
sourcemeta::core::WeakPointer>> {
@@ -150,14 +148,12 @@ auto find_nearest_bases_ref(
150148
return std::nullopt;
151149
}
152150

153-
template <typename StringType>
154-
auto find_nearest_bases(
155-
const std::unordered_map<sourcemeta::core::WeakPointer,
156-
std::vector<StringType>> &bases,
157-
const sourcemeta::core::WeakPointer &pointer,
158-
const std::optional<std::string_view> &default_base)
151+
template <typename StringType, typename MapType>
152+
auto find_nearest_bases(const MapType &bases,
153+
const sourcemeta::core::WeakPointer &pointer,
154+
const std::optional<std::string_view> &default_base)
159155
-> std::pair<std::vector<StringType>, sourcemeta::core::WeakPointer> {
160-
const auto result{find_nearest_bases_ref(bases, pointer)};
156+
const auto result{find_nearest_bases_ref<StringType>(bases, pointer)};
161157
if (result.has_value()) {
162158
return {result->first.get(), result->second};
163159
}
@@ -170,21 +166,37 @@ auto find_nearest_bases(
170166
return {{}, sourcemeta::core::empty_weak_pointer};
171167
}
172168

173-
auto find_every_base(
174-
const std::unordered_map<sourcemeta::core::WeakPointer,
175-
std::vector<sourcemeta::core::JSON::String>>
176-
&bases,
177-
const sourcemeta::core::WeakPointer &pointer)
178-
-> std::vector<std::pair<std::string_view, sourcemeta::core::WeakPointer>> {
169+
template <typename DialectStringType> struct CombinedWalkResult {
170+
std::optional<
171+
std::pair<std::reference_wrapper<const std::vector<DialectStringType>>,
172+
sourcemeta::core::WeakPointer>>
173+
dialect_match;
179174
std::vector<std::pair<std::string_view, sourcemeta::core::WeakPointer>>
180-
result;
175+
every_base;
176+
};
177+
178+
template <typename DialectStringType, typename DialectMapType,
179+
typename BaseMapType>
180+
auto find_dialect_and_all_bases(const DialectMapType &base_dialects,
181+
const BaseMapType &base_uris,
182+
const sourcemeta::core::WeakPointer &pointer)
183+
-> CombinedWalkResult<DialectStringType> {
184+
CombinedWalkResult<DialectStringType> result;
181185

182186
auto current_pointer{pointer};
183187
while (true) {
184-
const auto match{bases.find(current_pointer)};
185-
if (match != bases.cend()) {
186-
for (const auto &base : match->second) {
187-
result.emplace_back(std::string_view{base}, current_pointer);
188+
if (!result.dialect_match.has_value()) {
189+
const auto dialect_it{base_dialects.find(current_pointer)};
190+
if (dialect_it != base_dialects.cend()) {
191+
result.dialect_match =
192+
std::make_pair(std::cref(dialect_it->second), current_pointer);
193+
}
194+
}
195+
196+
const auto base_it{base_uris.find(current_pointer)};
197+
if (base_it != base_uris.cend()) {
198+
for (const auto &base : base_it->second) {
199+
result.every_base.emplace_back(std::string_view{base}, current_pointer);
188200
}
189201
}
190202

@@ -195,10 +207,10 @@ auto find_every_base(
195207
current_pointer = current_pointer.initial();
196208
}
197209

198-
if (result.empty() ||
199-
result.back().second != sourcemeta::core::empty_weak_pointer) {
200-
result.emplace_back(std::string_view{},
201-
sourcemeta::core::empty_weak_pointer);
210+
if (result.every_base.empty() ||
211+
result.every_base.back().second != sourcemeta::core::empty_weak_pointer) {
212+
result.every_base.emplace_back(std::string_view{},
213+
sourcemeta::core::empty_weak_pointer);
202214
}
203215

204216
return result;
@@ -440,8 +452,8 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker,
440452
paths.size());
441453
std::vector<InternalEntry> subschema_entries;
442454
std::unordered_map<WeakPointer, CacheSubschema> subschemas;
443-
std::unordered_map<WeakPointer, std::vector<JSON::String>> base_uris;
444-
std::unordered_map<WeakPointer, std::vector<std::string_view>> base_dialects;
455+
std::map<WeakPointer, std::vector<JSON::String>> base_uris;
456+
std::map<WeakPointer, std::vector<std::string_view>> base_dialects;
445457

446458
for (const auto &path : paths) {
447459
// Passing paths that overlap is undefined behavior. No path should
@@ -547,7 +559,7 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker,
547559
// identifier, we ignore it as a traditional identifier and take
548560
// care of it as an anchor
549561
!is_pre_2019_09_location_independent_identifier) {
550-
const auto bases{find_nearest_bases(
562+
const auto bases{find_nearest_bases<JSON::String>(
551563
base_uris, common_pointer_weak,
552564
entry.id ? std::optional<std::string_view>{*entry.id}
553565
: std::nullopt)};
@@ -613,7 +625,7 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker,
613625
sourcemeta::core::dialect(entry.common.subschema.get())};
614626
if (!maybe_metaschema.empty()) {
615627
sourcemeta::core::URI metaschema{maybe_metaschema};
616-
const auto nearest_bases{find_nearest_bases(
628+
const auto nearest_bases{find_nearest_bases<JSON::String>(
617629
base_uris, common_pointer_weak,
618630
entry.id ? std::optional<std::string_view>{*entry.id}
619631
: std::nullopt)};
@@ -639,7 +651,7 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker,
639651
// Handle schema anchors
640652
for (const auto &[name, type] : find_anchors(entry.common.subschema.get(),
641653
entry.common.vocabularies)) {
642-
const auto bases{find_nearest_bases(
654+
const auto bases{find_nearest_bases<JSON::String>(
643655
base_uris, common_pointer_weak,
644656
entry.id ? std::optional<std::string_view>{*entry.id}
645657
: std::nullopt)};
@@ -742,13 +754,41 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker,
742754
for (const auto &relative_pointer : pointers) {
743755
const auto pointer_weak{path.concat(relative_pointer)};
744756

745-
const auto dialect_match{
746-
find_nearest_bases_ref(base_dialects, pointer_weak)};
747-
const auto &dialect_for_pointer{dialect_match.has_value()
748-
? dialect_match->first.get().front()
749-
: root_dialect};
757+
const auto combined{find_dialect_and_all_bases<std::string_view>(
758+
base_dialects, base_uris, pointer_weak)};
759+
const auto &dialect_for_pointer{
760+
combined.dialect_match.has_value()
761+
? combined.dialect_match->first.get().front()
762+
: root_dialect};
763+
const auto &every_base_result{combined.every_base};
764+
765+
std::optional<std::pair<std::string_view, WeakPointer>> nearest_base_info;
766+
for (const auto &entry : every_base_result) {
767+
if (!entry.first.empty()) {
768+
nearest_base_info = entry;
769+
break;
770+
}
771+
}
750772

751-
auto every_base_result = find_every_base(base_uris, pointer_weak);
773+
const auto subschema_it{subschemas.find(pointer_weak)};
774+
const bool is_subschema{subschema_it != subschemas.cend()};
775+
const auto nearest_base_depth =
776+
nearest_base_info.has_value() ? nearest_base_info->second.size() : 0;
777+
778+
std::string_view hoisted_base_view{};
779+
sourcemeta::core::SchemaBaseDialect hoisted_base_dialect{};
780+
if (nearest_base_info.has_value()) {
781+
const JSON::String nearest_base_str{nearest_base_info->first};
782+
const auto base_entry{this->locations_.find(
783+
{SchemaReferenceType::Static, nearest_base_str})};
784+
if (base_entry != this->locations_.cend()) {
785+
hoisted_base_view = base_entry->first.second;
786+
hoisted_base_dialect = base_entry->second.base_dialect;
787+
} else {
788+
hoisted_base_view = nearest_base_info->first;
789+
hoisted_base_dialect = root_base_dialect.value();
790+
}
791+
}
752792

753793
WeakPointer cached_base{};
754794
for (const auto &base : every_base_result) {
@@ -769,39 +809,39 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker,
769809
this->locations_.contains({SchemaReferenceType::Static, result});
770810

771811
if (!contains) {
772-
const auto nearest_bases{
773-
find_nearest_bases(base_uris, pointer_weak,
774-
std::optional<std::string_view>{base.first})};
775-
assert(!nearest_bases.first.empty());
776-
const auto &current_base{nearest_bases.first.front()};
777-
778-
const auto base_entry{this->locations_.find(
779-
{SchemaReferenceType::Static, current_base})};
780-
781-
const std::string_view base_view{
782-
base_entry != this->locations_.cend()
783-
? std::string_view{base_entry->first.second}
784-
: std::string_view{current_base}};
785-
786-
const sourcemeta::core::SchemaBaseDialect current_base_dialect{
787-
base_entry != this->locations_.cend()
788-
? base_entry->second.base_dialect
789-
: root_base_dialect.value()};
790-
791-
const auto subschema{subschemas.find(pointer_weak)};
792-
if (subschema != subschemas.cend()) {
812+
std::string_view base_view;
813+
sourcemeta::core::SchemaBaseDialect current_base_dialect;
814+
815+
if (nearest_base_info.has_value()) {
816+
base_view = hoisted_base_view;
817+
current_base_dialect = hoisted_base_dialect;
818+
} else {
819+
const JSON::String current_base{base.first};
820+
const auto base_entry{this->locations_.find(
821+
{SchemaReferenceType::Static, current_base})};
822+
if (base_entry != this->locations_.cend()) {
823+
base_view = base_entry->first.second;
824+
current_base_dialect = base_entry->second.base_dialect;
825+
} else {
826+
base_view = base.first;
827+
current_base_dialect = root_base_dialect.value();
828+
}
829+
}
830+
831+
if (is_subschema) {
793832
store(this->locations_, SchemaReferenceType::Static,
794833
SchemaFrame::LocationType::Subschema, std::move(result),
795-
base_view, pointer_weak, nearest_bases.second.size(),
834+
base_view, pointer_weak, nearest_base_depth,
796835
dialect_for_pointer, current_base_dialect,
797-
subschema->second.parent, false, true);
836+
subschema_it->second.parent, false, true);
798837
} else {
799838
store(this->locations_, SchemaReferenceType::Static,
800839
SchemaFrame::LocationType::Pointer, std::move(result),
801-
base_view, pointer_weak, nearest_bases.second.size(),
840+
base_view, pointer_weak, nearest_base_depth,
802841
dialect_for_pointer, current_base_dialect,
803-
dialect_match.has_value() ? dialect_match->second
804-
: empty_weak_pointer,
842+
combined.dialect_match.has_value()
843+
? combined.dialect_match->second
844+
: empty_weak_pointer,
805845
false, true);
806846
}
807847
}
@@ -817,7 +857,7 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker,
817857
for (const auto &entry : subschema_entries) {
818858
const auto &common_pointer_weak{entry.common.pointer};
819859
if (entry.common.subschema.get().is_object()) {
820-
const auto nearest_bases{find_nearest_bases(
860+
const auto nearest_bases{find_nearest_bases<JSON::String>(
821861
base_uris, common_pointer_weak,
822862
entry.id ? std::optional<std::string_view>{*entry.id}
823863
: std::nullopt)};

0 commit comments

Comments
 (0)