Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/vunnel/cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class Providers:
oracle: providers.oracle.Config = field(default_factory=providers.oracle.Config)
rhel: providers.rhel.Config = field(default_factory=providers.rhel.Config)
rocky: providers.rocky.Config = field(default_factory=providers.rocky.Config)
rootio: providers.rootio.Config = field(default_factory=providers.rootio.Config)
sles: providers.sles.Config = field(default_factory=providers.sles.Config)
ubuntu: providers.ubuntu.Config = field(default_factory=providers.ubuntu.Config)
wolfi: providers.wolfi.Config = field(default_factory=providers.wolfi.Config)
Expand Down
2 changes: 2 additions & 0 deletions src/vunnel/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
oracle,
rhel,
rocky,
rootio,
sles,
ubuntu,
wolfi,
Expand All @@ -46,6 +47,7 @@
oracle.Provider.name(): oracle.Provider,
rhel.Provider.name(): rhel.Provider,
rocky.Provider.name(): rocky.Provider,
rootio.Provider.name(): rootio.Provider,
sles.Provider.name(): sles.Provider,
ubuntu.Provider.name(): ubuntu.Provider,
wolfi.Provider.name(): wolfi.Provider,
Expand Down
84 changes: 84 additions & 0 deletions src/vunnel/providers/rootio/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import TYPE_CHECKING

from vunnel import provider, result, schema
from vunnel.utils import timer

from .parser import Parser

if TYPE_CHECKING:
import datetime


@dataclass
class Config:
runtime: provider.RuntimeConfig = field(
default_factory=lambda: provider.RuntimeConfig(
result_store=result.StoreStrategy.SQLITE,
existing_results=result.ResultStatePolicy.DELETE_BEFORE_WRITE,
),
)
request_timeout: int = 125
api_base_url: str = "https://api.root.io/external/osv"
parallelism: int = 10 # concurrent downloads for improved performance


class Provider(provider.Provider):
__schema__ = schema.OSVSchema(version="1.6.1")
__distribution_version__ = int(__schema__.major_version)

def __init__(self, root: str, config: Config | None = None):
if not config:
config = Config()

super().__init__(root, runtime_cfg=config.runtime)
self.config = config
self.logger.debug(f"config: {config}")

self.parser = Parser(
ws=self.workspace,
api_base_url=config.api_base_url,
download_timeout=config.request_timeout,
parallelism=config.parallelism,
logger=self.logger,
)

# This provider requires the previous state from former runs
provider.disallow_existing_input_policy(config.runtime)

@classmethod
def name(cls) -> str:
return "rootio"

@classmethod
def tags(cls) -> list[str]:
return ["vulnerability", "os", "language"]

@classmethod
def compatible_schema(cls, schema_version: str) -> schema.Schema | None:
candidate = schema.OSVSchema(schema_version)
if candidate.major_version == cls.__schema__.major_version:
return candidate
return None

def update(self, last_updated: datetime.datetime | None) -> tuple[list[str], int]:
with timer(self.name(), self.logger):
# TODO: use last_updated for incremental updates if Root IO API supports it
with self.results_writer() as writer, self.parser:
for vuln_id, vuln_schema_version, record in self.parser.get():
vuln_schema = self.compatible_schema(vuln_schema_version)
if not vuln_schema:
self.logger.warning(
f"skipping vulnerability {vuln_id} with schema version {vuln_schema_version} ",
f"as is incompatible with provider schema version {self.__schema__.version}",
)
continue
writer.write(
identifier=vuln_id.lower(),
schema=vuln_schema,
payload=record,
)

return self.parser.urls, len(writer)
194 changes: 194 additions & 0 deletions src/vunnel/providers/rootio/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
from __future__ import annotations

import concurrent.futures
import logging
import os
from typing import TYPE_CHECKING, Any

import orjson

from vunnel.tool import fixdate
from vunnel.utils import http_wrapper as http
from vunnel.utils import osv

if TYPE_CHECKING:
from collections.abc import Generator
from types import TracebackType

from vunnel.workspace import Workspace


namespace = "rootio"


class Parser:
_api_base_url_ = "https://api.root.io/external/osv"

def __init__( # noqa: PLR0913
self,
ws: Workspace,
api_base_url: str | None = None,
download_timeout: int = 125,
parallelism: int = 10,
fixdater: fixdate.Finder | None = None,
logger: logging.Logger | None = None,
):
if not fixdater:
fixdater = fixdate.default_finder(ws)
self.fixdater = fixdater
self.workspace = ws
self.api_base_url = api_base_url or self._api_base_url_
self.download_timeout = download_timeout
self.parallelism = parallelism
self.urls = [self.api_base_url]
if not logger:
logger = logging.getLogger(self.__class__.__name__)
self.logger = logger

def __enter__(self) -> Parser:
self.fixdater.__enter__()
return self

def __exit__(self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None) -> None:
self.fixdater.__exit__(exc_type, exc_val, exc_tb)

def _is_valid_osv_id(self, osv_id: str) -> bool:
"""
Validate OSV ID format.

Valid IDs should not be empty or end with a trailing dash.
Examples of invalid IDs: "ROOT-APP-NPM-", "", " "
"""
if not osv_id or not osv_id.strip():
return False
return not osv_id.endswith("-")

def _fetch_osv_ids(self) -> list[str]:
"""Fetch the list of OSV record IDs from the Root IO API."""
self.logger.info("fetching list of OSV IDs from Root IO")
url = f"{self.api_base_url}/all.json"
response = http.get(url, self.logger, timeout=self.download_timeout)

# Parse the response - it's an array of objects with "id" and "modified" fields
id_objects = response.json()

# Extract and validate ID strings from each object
all_ids = [obj["id"] for obj in id_objects]
valid_ids = [osv_id for osv_id in all_ids if self._is_valid_osv_id(osv_id)]

invalid_count = len(all_ids) - len(valid_ids)
if invalid_count > 0:
self.logger.warning(f"skipping {invalid_count} invalid OSV IDs")

# Save the full response to workspace for debugging/reproducibility
os.makedirs(self.workspace.input_path, exist_ok=True)
ids_file = os.path.join(self.workspace.input_path, "osv_ids.json")
with open(ids_file, "wb") as f:
f.write(orjson.dumps(id_objects))

self.logger.info(f"found {len(valid_ids)} valid OSV records")
return valid_ids

def _fetch_osv_record(self, osv_id: str) -> dict[str, Any]:
"""Fetch an individual OSV record from the Root IO API."""
self.logger.debug(f"fetching OSV record: {osv_id}")
url = f"{self.api_base_url}/{osv_id}.json"
response = http.get(url, self.logger, timeout=self.download_timeout)

record = response.json()

# Save the record to workspace for reproducibility
record_dir = os.path.join(self.workspace.input_path, "osv")
os.makedirs(record_dir, exist_ok=True)
record_file = os.path.join(record_dir, f"{osv_id}.json")
with open(record_file, "wb") as f:
f.write(orjson.dumps(record))

return record

def _normalize(self, vuln_entry: dict[str, Any]) -> tuple[str, str, dict[str, Any]]:
"""Normalize a vulnerability entry into the expected tuple format."""
self.logger.trace("normalizing vulnerability data") # type: ignore[attr-defined]

# Extract the OSV record as-is (using OSV schema)
# Transformation to Grype-specific schema happens in grype-db
vuln_id = vuln_entry["id"]
vuln_schema = vuln_entry["schema_version"]

# Transform ecosystem format: Root IO API returns "Root:Alpine:3.18" format,
# but grype-db expects "Alpine:3.18" (without "Root:" prefix)
for affected in vuln_entry.get("affected", []):
package = affected.get("package", {})
ecosystem = package.get("ecosystem", "")
if ecosystem.startswith("Root:"):
package["ecosystem"] = ecosystem[5:] # Strip "Root:" prefix
self.logger.debug(f"normalized ecosystem: {ecosystem} -> {package['ecosystem']}")

# Set database_specific metadata to mark as advisory for grype-db
# This is critical for grype-db to emit unaffectedPackageHandles for the NAK pattern
if "database_specific" not in vuln_entry:
vuln_entry["database_specific"] = {}
if "anchore" not in vuln_entry["database_specific"]:
vuln_entry["database_specific"]["anchore"] = {}
vuln_entry["database_specific"]["anchore"]["record_type"] = "advisory"

return vuln_id, vuln_schema, vuln_entry

def get(self) -> Generator[tuple[str, str, dict[str, Any]]]:
"""
Fetch and yield OSV records from Root IO API.

Downloads records concurrently for performance, then processes them sequentially.

Yields:
Tuples of (vulnerability_id, schema_version, record_dict)
"""
# Fetch the list of OSV IDs
osv_ids = self._fetch_osv_ids()

# TEMPORARILY DISABLED: Download fixdate information for precise fix dates
# Note: Requires ghcr.io/anchore/grype-db-observed-fix-date/rootio to exist
# FIXME: Enable once Anchore creates the fixdate database for rootio
# self.fixdater.download()

# Download all OSV records concurrently
self.logger.info(f"downloading {len(osv_ids)} OSV records with parallelism={self.parallelism}")
records = {}
failed_ids = []

with concurrent.futures.ThreadPoolExecutor(max_workers=self.parallelism) as executor:
# Submit all download tasks
future_to_id = {executor.submit(self._fetch_osv_record, osv_id): osv_id for osv_id in osv_ids}

# Collect results as they complete
for future in concurrent.futures.as_completed(future_to_id):
osv_id = future_to_id[future]
try:
record = future.result()
records[osv_id] = record
except Exception as e:
self.logger.error(f"failed to download OSV record {osv_id}: {e}")
failed_ids.append(osv_id)

if failed_ids:
self.logger.warning(f"failed to download {len(failed_ids)} records")

self.logger.info(f"successfully downloaded {len(records)} OSV records")

# Process downloaded records sequentially
for osv_id in osv_ids:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please make this concurrent in some way. Right now this provider does ~9K sequential, blocking http gets, which makes it very slow for a relatively small amount of data. Many of the other providers have some concurrent.futures.ThreadPoolExecutor use and a config that controls the concurrency (and sets a default higher than 1). Please imitate that pattern here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably fine to enter a concurrent section that pulls down all the osv docs and then process them sequentially, which is probably easier than trying to get the entire record normalized and processed concurrently.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added concurrency

if osv_id not in records:
continue # Skip failed downloads

try:
vuln_entry = records[osv_id]

# TEMPORARILY DISABLED: Apply fix date patching to add precise fix dates to ranges
# FIXME: Enable once Anchore creates the fixdate database for rootio
# osv.patch_fix_date(vuln_entry, self.fixdater)

# Normalize and yield the record
yield self._normalize(vuln_entry)
except Exception as e:
self.logger.error(f"failed to process OSV record {osv_id}: {e}")
continue
33 changes: 33 additions & 0 deletions tests/quality/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -374,3 +374,36 @@ tests:
- <<: *default-validations
max_year: 2022
candidate_tool_label: custom-db

- provider: rootio
# Root IO provides patched packages for multiple ecosystems
# Test images contain Root IO patched versions (rootio- prefix, _rootio_ version suffix)
additional_providers:
- name: nvd
use_cache: true
- name: alpine
use_cache: true
- name: debian
use_cache: true
- name: ubuntu
use_cache: true
- name: github
use_cache: true
images:
- docker.io/rootpublic/cassandra:latest@sha256:02272b14efbe14e70ee5512ce707c4e300d3c1813f0e5df9562512c1b96be835
- docker.io/rootpublic/ubuntu:22.04@sha256:1390a26823a5a761dfbb7f591ae74a71afd8e23583a2f0c58dca6943b606f6d5
expected_namespaces:
# Root IO namespaces (per grype-db implementation)
- rootio:distro:ubuntu:22.04
# Upstream provider namespaces (for NAK pattern verification)
- ubuntu:distro:ubuntu:22.04
- github:language:java
- github:language:go
- nvd:cpe
validations:
- <<: *default-validations
max_year: 2021 # Root IO is a new provider - limiting to older CVEs for initial validation
max_new_false_negatives: 5 # Allow some FNs for initial provider PR
max_unlabeled_percent: 90 # Relaxed for initial PR with limited test image
max_f1_regression: 0.30 # Allow F1 score up to 0.70 (vs 1.00 reference)
candidate_tool_label: custom-db
20 changes: 20 additions & 0 deletions tests/unit/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,26 @@ def test_config(monkeypatch) -> None:
skip_download: false
skip_newer_archive_check: false
user_agent: null
rootio:
api_base_url: https://api.root.io/external/osv
parallelism: 10
request_timeout: 125
runtime:
existing_input: keep
existing_results: delete-before-write
import_results_enabled: false
import_results_host: ''
import_results_path: providers/{provider_name}/listing.json
on_error:
action: fail
input: keep
results: keep
retry_count: 3
retry_delay: 5
result_store: sqlite
skip_download: false
skip_newer_archive_check: false
user_agent: null
sles:
allow_versions:
- '11'
Expand Down
22 changes: 22 additions & 0 deletions tests/unit/providers/rootio/test-fixtures/all.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[
{
"id": "ROOT-OS-ALPINE-318-CVE-2000-0548",
"modified": "2024-11-20T16:00:00Z"
},
{
"id": "ROOT-OS-DEBIAN-bookworm-CVE-2025-53014",
"modified": "2024-12-01T10:30:00Z"
},
{
"id": "ROOT-OS-UBUNTU-2004-CVE-2024-12345",
"modified": "2024-12-10T08:15:00Z"
},
{
"id": "ROOT-APP-NPM-CVE-2022-25883",
"modified": "2024-11-22T14:20:00Z"
},
{
"id": "ROOT-APP-PYPI-CVE-2025-30473",
"modified": "2024-12-05T09:45:00Z"
}
]
Loading