Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/vunnel/cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class Providers:
oracle: providers.oracle.Config = field(default_factory=providers.oracle.Config)
rhel: providers.rhel.Config = field(default_factory=providers.rhel.Config)
rocky: providers.rocky.Config = field(default_factory=providers.rocky.Config)
rootio: providers.rootio.Config = field(default_factory=providers.rootio.Config)
sles: providers.sles.Config = field(default_factory=providers.sles.Config)
ubuntu: providers.ubuntu.Config = field(default_factory=providers.ubuntu.Config)
wolfi: providers.wolfi.Config = field(default_factory=providers.wolfi.Config)
Expand Down
2 changes: 2 additions & 0 deletions src/vunnel/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
oracle,
rhel,
rocky,
rootio,
sles,
ubuntu,
wolfi,
Expand All @@ -46,6 +47,7 @@
oracle.Provider.name(): oracle.Provider,
rhel.Provider.name(): rhel.Provider,
rocky.Provider.name(): rocky.Provider,
rootio.Provider.name(): rootio.Provider,
sles.Provider.name(): sles.Provider,
ubuntu.Provider.name(): ubuntu.Provider,
wolfi.Provider.name(): wolfi.Provider,
Expand Down
82 changes: 82 additions & 0 deletions src/vunnel/providers/rootio/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import TYPE_CHECKING

from vunnel import provider, result, schema
from vunnel.utils import timer

from .parser import Parser

if TYPE_CHECKING:
import datetime


@dataclass
class Config:
runtime: provider.RuntimeConfig = field(
default_factory=lambda: provider.RuntimeConfig(
result_store=result.StoreStrategy.SQLITE,
existing_results=result.ResultStatePolicy.DELETE_BEFORE_WRITE,
),
)
request_timeout: int = 125
api_base_url: str = "https://api.root.io/external/osv"


class Provider(provider.Provider):
__schema__ = schema.OSVSchema(version="1.6.1")
__distribution_version__ = int(__schema__.major_version)

def __init__(self, root: str, config: Config | None = None):
if not config:
config = Config()

super().__init__(root, runtime_cfg=config.runtime)
self.config = config
self.logger.debug(f"config: {config}")

self.parser = Parser(
ws=self.workspace,
api_base_url=config.api_base_url,
download_timeout=config.request_timeout,
logger=self.logger,
)

# This provider requires the previous state from former runs
provider.disallow_existing_input_policy(config.runtime)

@classmethod
def name(cls) -> str:
return "rootio"

@classmethod
def tags(cls) -> list[str]:
return ["vulnerability", "os", "language"]

@classmethod
def compatible_schema(cls, schema_version: str) -> schema.Schema | None:
candidate = schema.OSVSchema(schema_version)
if candidate.major_version == cls.__schema__.major_version:
return candidate
return None

def update(self, last_updated: datetime.datetime | None) -> tuple[list[str], int]:
with timer(self.name(), self.logger):
# TODO: use last_updated for incremental updates if Root IO API supports it
with self.results_writer() as writer, self.parser:
for vuln_id, vuln_schema_version, record in self.parser.get():
vuln_schema = self.compatible_schema(vuln_schema_version)
if not vuln_schema:
self.logger.warning(
f"skipping vulnerability {vuln_id} with schema version {vuln_schema_version} ",
f"as is incompatible with provider schema version {self.__schema__.version}",
)
continue
writer.write(
identifier=vuln_id.lower(),
schema=vuln_schema,
payload=record,
)

return self.parser.urls, len(writer)
144 changes: 144 additions & 0 deletions src/vunnel/providers/rootio/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from __future__ import annotations

import logging
import os
from typing import TYPE_CHECKING, Any

import orjson

from vunnel.tool import fixdate
from vunnel.utils import http_wrapper as http

if TYPE_CHECKING:
from collections.abc import Generator
from types import TracebackType

from vunnel.workspace import Workspace


namespace = "rootio"


class Parser:
_api_base_url_ = "https://api.root.io/external/osv"

def __init__(
self,
ws: Workspace,
api_base_url: str | None = None,
download_timeout: int = 125,
fixdater: fixdate.Finder | None = None,
logger: logging.Logger | None = None,
):
if not fixdater:
fixdater = fixdate.default_finder(ws)
self.fixdater = fixdater
self.workspace = ws
self.api_base_url = api_base_url or self._api_base_url_
self.download_timeout = download_timeout
self.urls = [self.api_base_url]
if not logger:
logger = logging.getLogger(self.__class__.__name__)
self.logger = logger

def __enter__(self) -> Parser:
self.fixdater.__enter__()
return self

def __exit__(self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None) -> None:
self.fixdater.__exit__(exc_type, exc_val, exc_tb)

def _fetch_osv_ids(self) -> list[str]:
"""Fetch the list of OSV record IDs from the Root IO API."""
self.logger.info("fetching list of OSV IDs from Root IO")
url = f"{self.api_base_url}/all.json"
response = http.get(url, self.logger, timeout=self.download_timeout)

# Parse the response - it's an array of objects with "id" and "modified" fields
id_objects = response.json()

# Extract just the ID strings from each object
id_list = [obj["id"] for obj in id_objects]

# Save the full response to workspace for debugging/reproducibility
os.makedirs(self.workspace.input_path, exist_ok=True)
ids_file = os.path.join(self.workspace.input_path, "osv_ids.json")
with open(ids_file, "wb") as f:
f.write(orjson.dumps(id_objects))

self.logger.info(f"found {len(id_list)} OSV records")
return id_list

def _fetch_osv_record(self, osv_id: str) -> dict[str, Any]:
"""Fetch an individual OSV record from the Root IO API."""
self.logger.debug(f"fetching OSV record: {osv_id}")
url = f"{self.api_base_url}/{osv_id}.json"
response = http.get(url, self.logger, timeout=self.download_timeout)

record = response.json()

# Save the record to workspace for reproducibility
record_dir = os.path.join(self.workspace.input_path, "osv")
os.makedirs(record_dir, exist_ok=True)
record_file = os.path.join(record_dir, f"{osv_id}.json")
with open(record_file, "wb") as f:
f.write(orjson.dumps(record))

return record

def _normalize(self, vuln_entry: dict[str, Any]) -> tuple[str, str, dict[str, Any]]:
"""Normalize a vulnerability entry into the expected tuple format."""
self.logger.trace("normalizing vulnerability data") # type: ignore[attr-defined]

# Extract the OSV record as-is (using OSV schema)
# Transformation to Grype-specific schema happens in grype-db
vuln_id = vuln_entry["id"]
vuln_schema = vuln_entry["schema_version"]

# Transform ecosystem format: Root IO API returns "Root:Alpine:3.18" format,
# but grype-db expects "Alpine:3.18" (without "Root:" prefix)
for affected in vuln_entry.get("affected", []):
package = affected.get("package", {})
ecosystem = package.get("ecosystem", "")
if ecosystem.startswith("Root:"):
package["ecosystem"] = ecosystem[5:] # Strip "Root:" prefix
self.logger.debug(f"normalized ecosystem: {ecosystem} -> {package['ecosystem']}")

# Set database_specific metadata to mark as advisory for grype-db
# This is critical for grype-db to emit unaffectedPackageHandles for the NAK pattern
if "database_specific" not in vuln_entry:
vuln_entry["database_specific"] = {}
if "anchore" not in vuln_entry["database_specific"]:
vuln_entry["database_specific"]["anchore"] = {}
vuln_entry["database_specific"]["anchore"]["record_type"] = "advisory"

return vuln_id, vuln_schema, vuln_entry

def get(self) -> Generator[tuple[str, str, dict[str, Any]]]:
"""
Fetch and yield OSV records from Root IO API.

Yields:
Tuples of (vulnerability_id, schema_version, record_dict)
"""
# Fetch the list of OSV IDs
osv_ids = self._fetch_osv_ids()

# Download fixdate information if needed
# TEMPORARILY DISABLED: self.fixdater.download()
# Fix date patching is optional and requires authentication

# Fetch and process each OSV record
for osv_id in osv_ids:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please make this concurrent in some way. Right now this provider does ~9K sequential, blocking http gets, which makes it very slow for a relatively small amount of data. Many of the other providers have some concurrent.futures.ThreadPoolExecutor use and a config that controls the concurrency (and sets a default higher than 1). Please imitate that pattern here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably fine to enter a concurrent section that pulls down all the osv docs and then process them sequentially, which is probably easier than trying to get the entire record normalized and processed concurrently.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added concurrency

try:
vuln_entry = self._fetch_osv_record(osv_id)

# Apply fix date patching for published/modified dates
# TEMPORARILY DISABLED: osv.patch_fix_date(vuln_entry, self.fixdater)
# Fix date patching is optional and requires authentication

# Normalize and yield the record
yield self._normalize(vuln_entry)
except Exception as e:
self.logger.error(f"failed to process OSV record {osv_id}: {e}")
continue
32 changes: 32 additions & 0 deletions tests/quality/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -374,3 +374,35 @@ tests:
- <<: *default-validations
max_year: 2022
candidate_tool_label: custom-db

- provider: rootio
# Root IO provides patched packages for multiple ecosystems
# Test images contain Root IO patched versions (rootio- prefix, _rootio_ version suffix)
additional_providers:
- name: nvd
use_cache: true
- name: alpine
use_cache: true
- name: debian
use_cache: true
- name: ubuntu
use_cache: true
- name: github
use_cache: true
images:
- cr.root.io/cassandra@sha256:b3cc918a6a364af0a6b0a45becef0d0979db7e604751fad627ec2a94945b4e03
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you changed this image to be on a different repo?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

expected_namespaces:
# Root IO namespaces (per grype-db implementation)
- rootio:distro:ubuntu:22.04
# Upstream provider namespaces (for NAK pattern verification)
- ubuntu:distro:ubuntu:22.04
- github:language:java
- github:language:go
- nvd:cpe
validations:
- <<: *default-validations
max_year: 2021 # Root IO is a new provider - limiting to older CVEs for initial validation
max_new_false_negatives: 5 # Allow some FNs for initial provider PR
max_unlabeled_percent: 90 # Relaxed for initial PR with limited test image
max_f1_regression: 0.30 # Allow F1 score up to 0.70 (vs 1.00 reference)
candidate_tool_label: custom-db
19 changes: 19 additions & 0 deletions tests/unit/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,25 @@ def test_config(monkeypatch) -> None:
skip_download: false
skip_newer_archive_check: false
user_agent: null
rootio:
api_base_url: https://api.root.io/external/osv
request_timeout: 125
runtime:
existing_input: keep
existing_results: delete-before-write
import_results_enabled: false
import_results_host: ''
import_results_path: providers/{provider_name}/listing.json
on_error:
action: fail
input: keep
results: keep
retry_count: 3
retry_delay: 5
result_store: sqlite
skip_download: false
skip_newer_archive_check: false
user_agent: null
sles:
allow_versions:
- '11'
Expand Down
22 changes: 22 additions & 0 deletions tests/unit/providers/rootio/test-fixtures/all.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[
{
"id": "ROOT-OS-ALPINE-318-CVE-2000-0548",
"modified": "2024-11-20T16:00:00Z"
},
{
"id": "ROOT-OS-DEBIAN-bookworm-CVE-2025-53014",
"modified": "2024-12-01T10:30:00Z"
},
{
"id": "ROOT-OS-UBUNTU-2004-CVE-2024-12345",
"modified": "2024-12-10T08:15:00Z"
},
{
"id": "ROOT-APP-NPM-CVE-2022-25883",
"modified": "2024-11-22T14:20:00Z"
},
{
"id": "ROOT-APP-PYPI-CVE-2025-30473",
"modified": "2024-12-05T09:45:00Z"
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{
"schema_version": "1.6.1",
"id": "ROOT-APP-NPM-CVE-2022-25883",
"modified": "2024-12-01T10:00:00Z",
"published": "2024-11-01T08:00:00Z",
"aliases": [
"CVE-2022-25883",
"GHSA-c2qf-rxjj-qqgw"
],
"summary": "semver vulnerable to Regular Expression Denial of Service",
"details": "Versions of the package semver before 7.5.2 are vulnerable to Regular Expression Denial of Service (ReDoS) via the function new Range, when untrusted user data is provided as a range.",
"affected": [
{
"package": {
"ecosystem": "npm",
"name": "@rootio/semver"
},
"ranges": [
{
"type": "ECOSYSTEM",
"events": [
{
"introduced": "0"
},
{
"fixed": "7.5.2-root.io.1"
}
]
}
]
}
],
"references": [
{
"type": "ADVISORY",
"url": "https://nvd.nist.gov/vuln/detail/CVE-2022-25883"
},
{
"type": "WEB",
"url": "https://github.com/npm/node-semver/pull/564"
}
],
"severity": [
{
"type": "CVSS_V3",
"score": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H"
}
],
"database_specific": {
"source": "Root"
}
}
Loading