Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions app/schemas/players/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ class PlayerProfile(TransfermarktBaseModel, AuditMixin):
full_name: Optional[str]
name_in_home_country: Optional[str]
image_url: Optional[HttpUrl]
dateOfBirth: Optional[str] = None
dateOfBirthRaw: Optional[str] = None
date_of_birth: Optional[date]
place_of_birth: PlayerPlaceOfBirth
age: Optional[int]
Expand Down
38 changes: 33 additions & 5 deletions app/services/players/profile.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,25 @@
from dataclasses import dataclass
from datetime import datetime
import re

# same separator-tolerant capture as regex.py, but local fallback if needed
_DOB_FALLBACK = re.compile(r"(?P<dob>\d{1,2}[./-]\d{1,2}[./-]\d{4})")

def clean_dob(raw_text: str) -> str | None:
"""
Find a DD[./-]MM[./-]YYYY in the input and return ISO YYYY-MM-DD.
Returns None if not found or parsing fails.
"""
if not raw_text:
return None
m = _DOB_FALLBACK.search(raw_text)
if not m:
return None
dob = m.group("dob").replace("-", ".").replace("/", ".") # unify to dots
try:
return datetime.strptime(dob, "%d.%m.%Y").strftime("%Y-%m-%d")
except Exception:
return None
Comment on lines +8 to +22
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Quick sanity checks for DOB parsing.

Add a tiny table‑driven test to guard formats you intend to support.

Also confirm the repo’s Python requirement to validate str | None usage vs Optional:

Also applies to: 90-101


🏁 Script executed:

#!/bin/bash
python - << 'PY'
from app.services.players.profile import clean_dob
samples = [
    "24/06/1987 (38)",
    "1-2-2000 (5)",
    "03.11.1999",
    None,
    "",
]
for s in samples:
    print(repr(s), "->", clean_dob(s))
PY

Length of output: 353


🏁 Script executed:

#!/bin/bash
rg -n "requires-python|python_requires" -C2 pyproject.toml setup.cfg setup.py || true

Length of output: 98


🏁 Script executed:

#!/bin/bash
python3 - << 'PY'
import sys, types

# Stub bs4 to avoid ModuleNotFoundError
sys.modules['bs4'] = types.ModuleType('bs4')
sys.modules['bs4'].BeautifulSoup = lambda *args, **kwargs: None

# Stub app.services.base before importing profile to bypass its imports
base_mod = types.ModuleType('app.services.base')
base_mod.TransfermarktBase = type('TransfermarktBase', (), {})
sys.modules['app.services.base'] = base_mod

from app.services.players.profile import clean_dob

samples = [
    "24/06/1987 (38)",
    "1-2-2000 (5)",
    "03.11.1999",
    None,
    "",
]
for s in samples:
    print(f"{s!r} -> {clean_dob(s)}")
PY

Length of output: 168


🏁 Script executed:

#!/bin/bash
# Test clean_dob outputs with proper f-string and stubbed imports
python3 - << 'PY'
import sys, types
# Stub missing modules
sys.modules['bs4'] = types.ModuleType('bs4')
sys.modules['bs4'].BeautifulSoup = lambda *args, **kwargs: None
base_mod = types.ModuleType('app.services.base')
base_mod.TransfermarktBase = type('TransfermarktBase', (), {})
sys.modules['app.services.base'] = base_mod

from app.services.players.profile import clean_dob
samples = ["24/06/1987 (38)", "1-2-2000 (5)", "03.11.1999", None, ""]
for s in samples:
    print(f"{s!r} -> {clean_dob(s)}")
PY

# Search for Python version requirements
rg -n "requires-python|python_requires|requires_python|requires-python" pyproject.toml || echo "No python requirement found"

Length of output: 196


🏁 Script executed:

#!/bin/bash
# 1. Test clean_dob outputs with correct f-string
python3 - << 'PY'
import sys, types
# Stub missing modules
sys.modules['bs4'] = types.ModuleType('bs4')
sys.modules['bs4'].BeautifulSoup = lambda *args, **kwargs: None
base_mod = types.ModuleType('app.services.base')
base_mod.TransfermarktBase = type('TransfermarktBase', (), {})
sys.modules['app.services.base'] = base_mod

from app.services.players.profile import clean_dob
samples = ["24/06/1987 (38)", "1-2-2000 (5)", "03.11.1999", None, ""]
for s in samples:
    print(f"{s!r} -> {clean_dob(s)}")
PY

# 2. Inspect Python version requirement in pyproject.toml
sed -n '1,200p' pyproject.toml

Length of output: 1508


Align type annotations with project’s Python requirement

  • The pyproject.toml pins python = "^3.9", but PEP 604 union syntax (str | None) isn’t valid until Python 3.10+. Either raise the requirement to ^3.10 in pyproject.toml or revert to Optional[str] (with from typing import Optional) in app/services/players/profile.py.
  • Add a small, table-driven pytest for clean_dob covering “24/06/1987 (38)” → "1987-06-24", “1-2-2000 (5)” → "2000-02-01", “03.11.1999” → "1999-11-03", plus None and empty string cases.
🤖 Prompt for AI Agents
In app/services/players/profile.py around lines 8 to 22, the function uses PEP
604 union syntax `str | None` which is invalid under the project's Python 3.9
constraint; replace the return annotation with Optional[str] and add `from
typing import Optional` at the top of the file, keeping behavior unchanged. Also
add a small table-driven pytest (e.g., in tests/unit/test_profile.py) that
parametrizes inputs and expected outputs for cases: "24/06/1987 (38)" ->
"1987-06-24", "1-2-2000 (5)" -> "2000-02-01", "03.11.1999" -> "1999-11-03", None
-> None, and "" -> None to ensure parsing and edge cases are covered.


from app.services.base import TransfermarktBase
from app.utils.regex import REGEX_DOB_AGE
Expand Down Expand Up @@ -66,11 +87,18 @@ def get_player_profile(self) -> dict:
self.response["fullName"] = self.get_text_by_xpath(Players.Profile.FULL_NAME)
self.response["nameInHomeCountry"] = self.get_text_by_xpath(Players.Profile.NAME_IN_HOME_COUNTRY)
self.response["imageUrl"] = self.get_text_by_xpath(Players.Profile.IMAGE_URL)
self.response["dateOfBirth"] = safe_regex(
self.get_text_by_xpath(Players.Profile.DATE_OF_BIRTH_AGE),
REGEX_DOB_AGE,
"dob",
)
raw_dob_text = self.get_text_by_xpath(Players.Profile.DATE_OF_BIRTH_AGE)

if raw_dob_text:
# remove trailing "(38)" or similar age markers
raw_dob_only = raw_dob_text.split("(")[0].strip()
else:
raw_dob_only = None

# Store both versions
self.response["dateOfBirthRaw"] = raw_dob_only # "24/06/1987"
self.response["dateOfBirth"] = clean_dob(raw_dob_only) # "1987-06-24"

self.response["placeOfBirth"] = {
"city": self.get_text_by_xpath(Players.Profile.PLACE_OF_BIRTH_CITY),
"country": self.get_text_by_xpath(Players.Profile.PLACE_OF_BIRTH_COUNTRY),
Expand Down
2 changes: 1 addition & 1 deletion app/utils/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
REGEX_BG_COLOR: str = r"background-color:(?P<color>.+);"
REGEX_CHART_CLUB_ID: str = r"(?P<club_id>\d+)"
REGEX_COUNTRY_ID: str = r"(?P<id>\d)"
REGEX_DOB_AGE: str = r"^(?P<dob>\w{3} \d{1,2}, \d{4}) \((?P<age>\d{2})\)"
REGEX_DOB_AGE: str = r"(?P<dob>\d{1,2}[./-]\d{1,2}[./-]\d{4})(?:\s*\((?P<age>\d{1,2})\))?"