Skip to content
This repository was archived by the owner on Jan 6, 2026. It is now read-only.
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
175 changes: 175 additions & 0 deletions src/patent_client/epo/ops/register/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
import logging
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's clean this file up, and get rid of the API's we don't use. Register only has a Search / Retrieval / Events / Procedural Steps endpoints, so we can delete the rest.

Remember, this is all in version control, so don't worry about losing information by deleting it, it will always be in the version history!

from io import BytesIO
from warnings import warn

import lxml.etree as ET
from patent_client.epo.ops.session import session
from patent_client.util.base.collections import ListManager
from yankee.util import AttrDict

from .model import BiblioResult
from .model import Claims
from .model import Description
from .model import Images
from .model import Search
from .schema import BiblioResultSchema
from .schema import ClaimsSchema
from .schema import DescriptionSchema
from .schema import ImagesSchema
# from .search import SearchSchema
from .schema import EPRegisterSearchSchema

logger = logging.getLogger(__name__)


class PublishedBiblioApi:
schema = BiblioResultSchema()

@classmethod
def get_constituents(cls, number, doc_type="publication", format="docdb", constituents=("biblio",)) -> BiblioResult:
"""Published Data Constituents API
number: document number to search
doc_type: document type (application / publication)
format: document number format (original / docdb / epodoc)
constituents: what data to retrieve. Can be combined. (biblio / abstract / full-cycle)

"""
base_url = f"http://ops.epo.org/3.2/rest-services/published-data/{doc_type}/{format}/{number}/"
if isinstance(constituents, str):
constituents = (constituents,)
url = base_url + ",".join(constituents)
response = session.get(url)
response.raise_for_status()
tree = ET.fromstring(response.text.encode())
return cls.schema.load(tree)

@classmethod
def get_biblio(cls, number, doc_type="publication", format="docdb") -> BiblioResult:
return cls.get_constituents(number, doc_type, format, constituents="biblio")

@classmethod
def get_abstract(cls, number, doc_type="publication", format="docdb") -> BiblioResult:
return cls.get_constituents(number, doc_type, format, constituents="abstract")

@classmethod
def get_full_cycle(cls, number, doc_type="publication", format="docdb") -> BiblioResult:
return cls.get_constituents(number, doc_type, format, constituents="full-cycle")


class PublishedFulltextApi:
fulltext_jurisdictions = "EP, WO, AT, BE, BG, CA, CH, CY, CZ, DK, EE, ES, FR, GB, GR, HR, IE, IT, LT, LU, MC, MD, ME, NO, PL, PT, RO, RS, SE, SK".split(
", "
)
desciption_schema = DescriptionSchema()
claims_schema = ClaimsSchema()

@classmethod
def get_fulltext_result(cls, number, doc_type="publication", format="docdb", inquiry="fulltext"):
"""Published Fulltext API
number: document number to search
doc_type: document type (application / publication)
format: document number format (original / docdb / epodoc)
inquiry: what data to retrieve. Can be combined. (fulltext / description / claims)

"""
url = f"http://ops.epo.org/3.2/rest-services/published-data/{doc_type}/{format}/{number}/{inquiry}"
if number[:2] not in cls.fulltext_jurisdictions:
raise ValueError(
f"Fulltext Is Not Available For Country Code {number[:2]}. Fulltext is only available in {', '.join(cls.fulltext_jurisdictions)}"
)
response = session.get(url)
response.raise_for_status
return response.text

@classmethod
def get_description(cls, number, doc_type="publication", format="docdb") -> Description:
text = cls.get_fulltext_result(number, doc_type="publication", format="docdb", inquiry="description")
tree = ET.fromstring(text.encode())
return cls.desciption_schema.load(tree)

@classmethod
def get_claims(cls, number, doc_type="publication", format="docdb") -> Claims:
text = cls.get_fulltext_result(number, doc_type="publication", format="docdb", inquiry="claims")
tree = ET.fromstring(text.encode())
return cls.claims_schema.load(tree)


###############################
###############################
class EPRegisterSearchApi:
schema = EPRegisterSearchSchema()

@classmethod
def search(cls, query, start=1, end=100) -> Search:
print("==============TEST=================")
print("==============EPRegisterSearchApi=================")

base_url = "http://ops.epo.org/3.2/rest-services/register/search"
range = f"{start}-{end}"
logger.debug(f"OPS Search Endpoint - Query: {query}\nRange: {start}-{end}")
response = session.get(base_url, params={"Range": range, "q": query})

### Error handling
if response.status_code == 404:
return AttrDict.convert(
{
"query": "query",
"num_results": 0,
"begin": start,
"end": end,
"results": ListManager(),
}
)
### END # Error handling

response.raise_for_status()
tree = ET.fromstring(response.text.encode())
result = cls.schema.load(tree)
# result = response.text.encode()
# if result.num_results == 10000:
# warn("Actual Number of Results is Greater Than 10,000 - OPS stops counting after 10,000")
return result


###############################
###############################

class PublishedImagesApi:
schema = ImagesSchema()

@classmethod
def get_images(cls, number, doc_type="publication", format="docdb") -> Images:
base_url = f"http://ops.epo.org/3.2/rest-services/published-data/{doc_type}/{format}/{number}/images"
response = session.get(base_url)
response.raise_for_status()
tree = ET.fromstring(response.text.encode())
return cls.schema.load(tree)

@classmethod
def get_page_image(cls, country, number, kind, image_type, page_number, image_format="pdf"):
response = session.get(
f"https://ops.epo.org/3.2/rest-services/published-data/images/{country}/{number}/{kind}/{image_type}.{image_format}",
params={"Range": page_number},
stream=True,
)
response.raise_for_status()
return BytesIO(response.raw.read())

@classmethod
def get_page_image_from_link(cls, link, page_number, image_format="pdf"):
response = session.get(
f"https://ops.epo.org/3.2/rest-services/{link}.{image_format}",
params={"Range": page_number},
stream=True,
)
response.raise_for_status()
return BytesIO(response.raw.read())


class PublishedApi:
biblio = PublishedBiblioApi
fulltext = PublishedFulltextApi
# change here
search = EPRegisterSearchApi
#
images = PublishedImagesApi
44 changes: 44 additions & 0 deletions src/patent_client/epo/ops/register/cql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
SEARCH_FIELDS = {
"title": "title",
"abstract": "abstract",
"title_and_abstract": "titleandabstract",
"inventor": "inventor",
"applicant": "applicant",
"inventor_or_applicant": "inventorandapplicant",
"publication": "publicationnumber",
"epodoc_publication": "spn",
"application": "applicationnumber",
"epodoc_application": "sap",
"priority": "prioritynumber",
"epodoc_priority": "spr",
"number": "num", # Pub, App, or Priority Number
"publication_date": "publicationdate", # yyyy, yyyyMM, yyyyMMdd, yyyy-MM, yyyy-MM-dd
"citation": "citation",
"cited_in_examination": "ex",
"cited_in_opposition": "op",
"cited_by_applicant": "rf",
"other_citation": "oc",
"family": "famn",
"cpc_class": "cpc",
"ipc_class": "ipc",
"ipc_core_invention_class": "ci",
"ipc_core_additional_class": "cn",
"ipc_advanced_class": "ai",
"ipc_advanced_additional_class": "an",
"ipc_core_class": "c",
"classification": "cl", # IPC or CPC Class
"full_text": "txt", # title, abstract, inventor and applicant
}


def generate_query(**kwargs):
query = list()
for keyword, values in kwargs.items():
if isinstance(values, list):
for value in values:
if keyword:
query.append(f'{SEARCH_FIELDS[keyword]}="{value}"')
else:
if keyword:
query.append(f'{SEARCH_FIELDS[keyword]}="{values}"')
return " AND ".join(query)
29 changes: 29 additions & 0 deletions src/patent_client/epo/ops/register/example_ep_register.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey! Can we delete this and replace with standalone test files?

We need a api_test.py file with your API test method, and a manager_test.py file with what you've called a model test (but I would call a manager test).

Take a look at the manager_test.py and api_test.py as examples. All you need is a class or set of functions that runs some bit of code, and then assert statements to check for correct output.

"""

@author: chris
"""
from patent_client.epo.ops.register.api import EPRegisterSearchApi
from patent_client.epo.ops.register.model import EPRegister
from patent_client.epo.ops.register.model import Inpadoc
from patent_client.epo.ops.register.manager import EPRegisterSearchManager


# EPRegisterSearchApi()
print("-------------------- API TEST --------------------")
print()
test_API = EPRegisterSearchApi()
search_result = test_API.search("EP1000000")
print(search_result)
print(len(str(search_result)))
print()

# model
print("-------------------- MODEL TEST --------------------")
print()
test_model = EPRegister.objects.get("EP1000000")
#print(test_model)
print("Agent: " + test_model.agent)
print("Applicant: " + test_model.applicant)
print()

79 changes: 79 additions & 0 deletions src/patent_client/epo/ops/register/manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from patent_client.util import Manager

from .api import PublishedApi
from .api import EPRegisterSearchApi
from .cql import generate_query


class EPRegisterSearchManager(Manager):
result_size = 100
primary_key = "publication"

# def _get_search_results_range(self, start=1, end=100):
# if "cql_query" in self.config.filter:
# query = self.config.filter["cql_query"]
# else:
# query = generate_query(**self.config.filter)
# return PublishedApi.search.search(query, start, end)

# def __len__(self):
# page = self._get_search_results_range(1, 100)
# offset = self.config.offset or 0
# limit = self.config.limit or page.num_results - offset
# num_results = page.num_results
# num_results -= offset
# num_results = min(limit, num_results)
# return num_results

# def _get_results(self):
# if len(self) == 0:
# return
# num_pages = round(len(self) / self.result_size)
# limit = self.config.limit or len(self)
# offset = self.config.offset or 0
# max_position = offset + limit
# range = (offset + 1, min(offset + self.result_size, max_position))
# while True:
# page = self._get_search_results_range(*range)
# for result in page.results:
# yield result
# if range[1] == max_position:
# break
# range = (
# range[0] + self.result_size,
# min(range[1] + self.result_size, max_position),
# )

####
def get(self, number, doc_type="publication", format="docdb"):
result = EPRegisterSearchApi.search(number)
return result
# result = PublishedApi.biblio.get_biblio(number, doc_type, format)
# if len(result.documents) > 1:
# raise Exception("More than one result found! Try another query")
# return result.documents[0]

####


# class BiblioManager(Manager):
# def get(self, doc_number):
# result = PublishedApi.biblio.get_biblio(doc_number)
# if len(result.documents) > 1:
# raise ValueError(f"More than one result found for {doc_number}!")
# return result.documents[0]


# class ClaimsManager(Manager):
# def get(self, doc_number):
# return PublishedApi.fulltext.get_claims(doc_number)


# class DescriptionManager(Manager):
# def get(self, doc_number):
# return PublishedApi.fulltext.get_description(doc_number)


# class ImageManager(Manager):
# def get(self, doc_number):
# return PublishedApi.images.get_images(doc_number)
22 changes: 22 additions & 0 deletions src/patent_client/epo/ops/register/model/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from .biblio import BiblioResult
from .biblio import InpadocBiblio
from .fulltext import Claims
from .fulltext import Description
from .images import ImageDocument
from .images import Images
from .search import Inpadoc
from .search import Search
from .search import EPRegister

__all__ = [
"BiblioResult",
"InpadocBiblio",
"Claims",
"Description",
"Images",
"ImageDocument",
"Inpadoc",
"Search",
##
"EPRegister"
]
Loading