This repository was archived by the owner on Jan 6, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 46
EP Register prototype #71
Open
xi2pi
wants to merge
14
commits into
parkerhancock:master
Choose a base branch
from
xi2pi:master
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 2 commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
b04e2c6
EP Register prototype
xi2pi 92ceae9
Update util.py
xi2pi 3a44731
fix issue with potentially missing EPO legal codes
parkerhancock 9d594e5
Bump version: 3.0.4 → 3.0.5
parkerhancock a296bfa
Bump jupyter-core from 4.11.1 to 4.11.2
dependabot[bot] ed6732a
add changelog for 3.0.5 change
parkerhancock 9bcd972
Merge pull request #75 from parkerhancock/dependabot/pip/jupyter-core…
parkerhancock c4b2c2c
reproduced error in issue 76
parkerhancock d26bf7c
fixed with graceful error regarding misconfigured EPO settings
parkerhancock 8653703
Merge pull request #77 from parkerhancock/issue_76
parkerhancock 1a953a7
EP Register prototype
xi2pi 3ee5d4c
Update util.py
xi2pi 2da57d6
WIP Regiser implementation
parkerhancock b744778
Merge branch 'master' of https://github.com/xi2pi/patent_client_ep_re…
parkerhancock File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Empty file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,175 @@ | ||
| import logging | ||
| from io import BytesIO | ||
| from warnings import warn | ||
|
|
||
| import lxml.etree as ET | ||
| from patent_client.epo.ops.session import session | ||
| from patent_client.util.base.collections import ListManager | ||
| from yankee.util import AttrDict | ||
|
|
||
| from .model import BiblioResult | ||
| from .model import Claims | ||
| from .model import Description | ||
| from .model import Images | ||
| from .model import Search | ||
| from .schema import BiblioResultSchema | ||
| from .schema import ClaimsSchema | ||
| from .schema import DescriptionSchema | ||
| from .schema import ImagesSchema | ||
| # from .search import SearchSchema | ||
| from .schema import EPRegisterSearchSchema | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class PublishedBiblioApi: | ||
| schema = BiblioResultSchema() | ||
|
|
||
| @classmethod | ||
| def get_constituents(cls, number, doc_type="publication", format="docdb", constituents=("biblio",)) -> BiblioResult: | ||
| """Published Data Constituents API | ||
| number: document number to search | ||
| doc_type: document type (application / publication) | ||
| format: document number format (original / docdb / epodoc) | ||
| constituents: what data to retrieve. Can be combined. (biblio / abstract / full-cycle) | ||
|
|
||
| """ | ||
| base_url = f"http://ops.epo.org/3.2/rest-services/published-data/{doc_type}/{format}/{number}/" | ||
| if isinstance(constituents, str): | ||
| constituents = (constituents,) | ||
| url = base_url + ",".join(constituents) | ||
| response = session.get(url) | ||
| response.raise_for_status() | ||
| tree = ET.fromstring(response.text.encode()) | ||
| return cls.schema.load(tree) | ||
|
|
||
| @classmethod | ||
| def get_biblio(cls, number, doc_type="publication", format="docdb") -> BiblioResult: | ||
| return cls.get_constituents(number, doc_type, format, constituents="biblio") | ||
|
|
||
| @classmethod | ||
| def get_abstract(cls, number, doc_type="publication", format="docdb") -> BiblioResult: | ||
| return cls.get_constituents(number, doc_type, format, constituents="abstract") | ||
|
|
||
| @classmethod | ||
| def get_full_cycle(cls, number, doc_type="publication", format="docdb") -> BiblioResult: | ||
| return cls.get_constituents(number, doc_type, format, constituents="full-cycle") | ||
|
|
||
|
|
||
| class PublishedFulltextApi: | ||
| fulltext_jurisdictions = "EP, WO, AT, BE, BG, CA, CH, CY, CZ, DK, EE, ES, FR, GB, GR, HR, IE, IT, LT, LU, MC, MD, ME, NO, PL, PT, RO, RS, SE, SK".split( | ||
| ", " | ||
| ) | ||
| desciption_schema = DescriptionSchema() | ||
| claims_schema = ClaimsSchema() | ||
|
|
||
| @classmethod | ||
| def get_fulltext_result(cls, number, doc_type="publication", format="docdb", inquiry="fulltext"): | ||
| """Published Fulltext API | ||
| number: document number to search | ||
| doc_type: document type (application / publication) | ||
| format: document number format (original / docdb / epodoc) | ||
| inquiry: what data to retrieve. Can be combined. (fulltext / description / claims) | ||
|
|
||
| """ | ||
| url = f"http://ops.epo.org/3.2/rest-services/published-data/{doc_type}/{format}/{number}/{inquiry}" | ||
| if number[:2] not in cls.fulltext_jurisdictions: | ||
| raise ValueError( | ||
| f"Fulltext Is Not Available For Country Code {number[:2]}. Fulltext is only available in {', '.join(cls.fulltext_jurisdictions)}" | ||
| ) | ||
| response = session.get(url) | ||
| response.raise_for_status | ||
| return response.text | ||
|
|
||
| @classmethod | ||
| def get_description(cls, number, doc_type="publication", format="docdb") -> Description: | ||
| text = cls.get_fulltext_result(number, doc_type="publication", format="docdb", inquiry="description") | ||
| tree = ET.fromstring(text.encode()) | ||
| return cls.desciption_schema.load(tree) | ||
|
|
||
| @classmethod | ||
| def get_claims(cls, number, doc_type="publication", format="docdb") -> Claims: | ||
| text = cls.get_fulltext_result(number, doc_type="publication", format="docdb", inquiry="claims") | ||
| tree = ET.fromstring(text.encode()) | ||
| return cls.claims_schema.load(tree) | ||
|
|
||
|
|
||
| ############################### | ||
| ############################### | ||
| class EPRegisterSearchApi: | ||
| schema = EPRegisterSearchSchema() | ||
|
|
||
| @classmethod | ||
| def search(cls, query, start=1, end=100) -> Search: | ||
| print("==============TEST=================") | ||
| print("==============EPRegisterSearchApi=================") | ||
|
|
||
| base_url = "http://ops.epo.org/3.2/rest-services/register/search" | ||
| range = f"{start}-{end}" | ||
| logger.debug(f"OPS Search Endpoint - Query: {query}\nRange: {start}-{end}") | ||
| response = session.get(base_url, params={"Range": range, "q": query}) | ||
|
|
||
| ### Error handling | ||
| if response.status_code == 404: | ||
| return AttrDict.convert( | ||
| { | ||
| "query": "query", | ||
| "num_results": 0, | ||
| "begin": start, | ||
| "end": end, | ||
| "results": ListManager(), | ||
| } | ||
| ) | ||
| ### END # Error handling | ||
|
|
||
| response.raise_for_status() | ||
| tree = ET.fromstring(response.text.encode()) | ||
| result = cls.schema.load(tree) | ||
| # result = response.text.encode() | ||
| # if result.num_results == 10000: | ||
| # warn("Actual Number of Results is Greater Than 10,000 - OPS stops counting after 10,000") | ||
| return result | ||
|
|
||
|
|
||
| ############################### | ||
| ############################### | ||
|
|
||
| class PublishedImagesApi: | ||
| schema = ImagesSchema() | ||
|
|
||
| @classmethod | ||
| def get_images(cls, number, doc_type="publication", format="docdb") -> Images: | ||
| base_url = f"http://ops.epo.org/3.2/rest-services/published-data/{doc_type}/{format}/{number}/images" | ||
| response = session.get(base_url) | ||
| response.raise_for_status() | ||
| tree = ET.fromstring(response.text.encode()) | ||
| return cls.schema.load(tree) | ||
|
|
||
| @classmethod | ||
| def get_page_image(cls, country, number, kind, image_type, page_number, image_format="pdf"): | ||
| response = session.get( | ||
| f"https://ops.epo.org/3.2/rest-services/published-data/images/{country}/{number}/{kind}/{image_type}.{image_format}", | ||
| params={"Range": page_number}, | ||
| stream=True, | ||
| ) | ||
| response.raise_for_status() | ||
| return BytesIO(response.raw.read()) | ||
|
|
||
| @classmethod | ||
| def get_page_image_from_link(cls, link, page_number, image_format="pdf"): | ||
| response = session.get( | ||
| f"https://ops.epo.org/3.2/rest-services/{link}.{image_format}", | ||
| params={"Range": page_number}, | ||
| stream=True, | ||
| ) | ||
| response.raise_for_status() | ||
| return BytesIO(response.raw.read()) | ||
|
|
||
|
|
||
| class PublishedApi: | ||
| biblio = PublishedBiblioApi | ||
| fulltext = PublishedFulltextApi | ||
| # change here | ||
| search = EPRegisterSearchApi | ||
| # | ||
| images = PublishedImagesApi | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| SEARCH_FIELDS = { | ||
| "title": "title", | ||
| "abstract": "abstract", | ||
| "title_and_abstract": "titleandabstract", | ||
| "inventor": "inventor", | ||
| "applicant": "applicant", | ||
| "inventor_or_applicant": "inventorandapplicant", | ||
| "publication": "publicationnumber", | ||
| "epodoc_publication": "spn", | ||
| "application": "applicationnumber", | ||
| "epodoc_application": "sap", | ||
| "priority": "prioritynumber", | ||
| "epodoc_priority": "spr", | ||
| "number": "num", # Pub, App, or Priority Number | ||
| "publication_date": "publicationdate", # yyyy, yyyyMM, yyyyMMdd, yyyy-MM, yyyy-MM-dd | ||
| "citation": "citation", | ||
| "cited_in_examination": "ex", | ||
| "cited_in_opposition": "op", | ||
| "cited_by_applicant": "rf", | ||
| "other_citation": "oc", | ||
| "family": "famn", | ||
| "cpc_class": "cpc", | ||
| "ipc_class": "ipc", | ||
| "ipc_core_invention_class": "ci", | ||
| "ipc_core_additional_class": "cn", | ||
| "ipc_advanced_class": "ai", | ||
| "ipc_advanced_additional_class": "an", | ||
| "ipc_core_class": "c", | ||
| "classification": "cl", # IPC or CPC Class | ||
| "full_text": "txt", # title, abstract, inventor and applicant | ||
| } | ||
|
|
||
|
|
||
| def generate_query(**kwargs): | ||
| query = list() | ||
| for keyword, values in kwargs.items(): | ||
| if isinstance(values, list): | ||
| for value in values: | ||
| if keyword: | ||
| query.append(f'{SEARCH_FIELDS[keyword]}="{value}"') | ||
| else: | ||
| if keyword: | ||
| query.append(f'{SEARCH_FIELDS[keyword]}="{values}"') | ||
| return " AND ".join(query) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| # -*- coding: utf-8 -*- | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey! Can we delete this and replace with standalone test files? We need a Take a look at the manager_test.py and api_test.py as examples. All you need is a class or set of functions that runs some bit of code, and then assert statements to check for correct output. |
||
| """ | ||
|
|
||
| @author: chris | ||
| """ | ||
| from patent_client.epo.ops.register.api import EPRegisterSearchApi | ||
| from patent_client.epo.ops.register.model import EPRegister | ||
| from patent_client.epo.ops.register.model import Inpadoc | ||
| from patent_client.epo.ops.register.manager import EPRegisterSearchManager | ||
|
|
||
|
|
||
| # EPRegisterSearchApi() | ||
| print("-------------------- API TEST --------------------") | ||
| print() | ||
| test_API = EPRegisterSearchApi() | ||
| search_result = test_API.search("EP1000000") | ||
| print(search_result) | ||
| print(len(str(search_result))) | ||
| print() | ||
|
|
||
| # model | ||
| print("-------------------- MODEL TEST --------------------") | ||
| print() | ||
| test_model = EPRegister.objects.get("EP1000000") | ||
| #print(test_model) | ||
| print("Agent: " + test_model.agent) | ||
| print("Applicant: " + test_model.applicant) | ||
| print() | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| from patent_client.util import Manager | ||
|
|
||
| from .api import PublishedApi | ||
| from .api import EPRegisterSearchApi | ||
| from .cql import generate_query | ||
|
|
||
|
|
||
| class EPRegisterSearchManager(Manager): | ||
| result_size = 100 | ||
| primary_key = "publication" | ||
|
|
||
| # def _get_search_results_range(self, start=1, end=100): | ||
| # if "cql_query" in self.config.filter: | ||
| # query = self.config.filter["cql_query"] | ||
| # else: | ||
| # query = generate_query(**self.config.filter) | ||
| # return PublishedApi.search.search(query, start, end) | ||
|
|
||
| # def __len__(self): | ||
| # page = self._get_search_results_range(1, 100) | ||
| # offset = self.config.offset or 0 | ||
| # limit = self.config.limit or page.num_results - offset | ||
| # num_results = page.num_results | ||
| # num_results -= offset | ||
| # num_results = min(limit, num_results) | ||
| # return num_results | ||
|
|
||
| # def _get_results(self): | ||
| # if len(self) == 0: | ||
| # return | ||
| # num_pages = round(len(self) / self.result_size) | ||
| # limit = self.config.limit or len(self) | ||
| # offset = self.config.offset or 0 | ||
| # max_position = offset + limit | ||
| # range = (offset + 1, min(offset + self.result_size, max_position)) | ||
| # while True: | ||
| # page = self._get_search_results_range(*range) | ||
| # for result in page.results: | ||
| # yield result | ||
| # if range[1] == max_position: | ||
| # break | ||
| # range = ( | ||
| # range[0] + self.result_size, | ||
| # min(range[1] + self.result_size, max_position), | ||
| # ) | ||
|
|
||
| #### | ||
| def get(self, number, doc_type="publication", format="docdb"): | ||
| result = EPRegisterSearchApi.search(number) | ||
| return result | ||
| # result = PublishedApi.biblio.get_biblio(number, doc_type, format) | ||
| # if len(result.documents) > 1: | ||
| # raise Exception("More than one result found! Try another query") | ||
| # return result.documents[0] | ||
|
|
||
| #### | ||
|
|
||
|
|
||
| # class BiblioManager(Manager): | ||
| # def get(self, doc_number): | ||
| # result = PublishedApi.biblio.get_biblio(doc_number) | ||
| # if len(result.documents) > 1: | ||
| # raise ValueError(f"More than one result found for {doc_number}!") | ||
| # return result.documents[0] | ||
|
|
||
|
|
||
| # class ClaimsManager(Manager): | ||
| # def get(self, doc_number): | ||
| # return PublishedApi.fulltext.get_claims(doc_number) | ||
|
|
||
|
|
||
| # class DescriptionManager(Manager): | ||
| # def get(self, doc_number): | ||
| # return PublishedApi.fulltext.get_description(doc_number) | ||
|
|
||
|
|
||
| # class ImageManager(Manager): | ||
| # def get(self, doc_number): | ||
| # return PublishedApi.images.get_images(doc_number) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| from .biblio import BiblioResult | ||
| from .biblio import InpadocBiblio | ||
| from .fulltext import Claims | ||
| from .fulltext import Description | ||
| from .images import ImageDocument | ||
| from .images import Images | ||
| from .search import Inpadoc | ||
| from .search import Search | ||
| from .search import EPRegister | ||
|
|
||
| __all__ = [ | ||
| "BiblioResult", | ||
| "InpadocBiblio", | ||
| "Claims", | ||
| "Description", | ||
| "Images", | ||
| "ImageDocument", | ||
| "Inpadoc", | ||
| "Search", | ||
| ## | ||
| "EPRegister" | ||
| ] |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's clean this file up, and get rid of the API's we don't use. Register only has a Search / Retrieval / Events / Procedural Steps endpoints, so we can delete the rest.
Remember, this is all in version control, so don't worry about losing information by deleting it, it will always be in the version history!