For a more responsible foundry industry.

His Brother 39-s Wife | 1936 Ok.ru

return feats

print(extract_features(text))

def extract_features(txt): feats = {} # 1. relation feats['relation_to_subject'] = re.search(r'\b(brother|sister|father|mother|son|daughter|uncle|aunt)\b', txt).group(0) # 2. gender feats['subject_gender'] = 'male' if 'his' in txt else ('female' if 'her' in txt else 'unknown') # 3‑4. spouse age age_match = re.search(r'(\d+)-s', txt) feats['spouse_age_exact'] = int(age_match.group(1)) if age_match else None # 5‑6. birth year year_match = re.search(r'\b(19|20)\d2\b', txt) feats['spouse_birth_year'] = int(year_match.group(0)) if year_match else None # 7‑8. platform feats['social_media_platform'] = 'ok.ru' if 'ok.ru' in txt else None feats['platform_region'] = 'Russia' if feats['social_media_platform'] else None # 9‑11. numeric tokens numeric_tokens = re.findall(r'\d+', txt) feats['contains_numeric_token'] = bool(numeric_tokens) feats['numeric_token_count'] = len(numeric_tokens) feats['token_length'] = len(txt.split()) # 12‑13. pronoun & language hint feats['has_possessive_pronoun'] = any(p in txt for p in ['his', 'her', 'their']) feats['language_hint'] = 'Russian-orientated' if 'ok.ru' in txt else 'generic' # 14‑15. consistency check if feats['spouse_birth_year']: current_year = 2024 computed_age = current_year - feats['spouse_birth_year'] if feats['spouse_age_exact'] is not None: diff = abs(computed_age - feats['spouse_age_exact']) feats['age_year_consistency'] = 'consistent' if diff <= 5 else 'inconsistent' else: feats['age_year_consistency'] = None else: feats['age_year_consistency'] = None # 16. surname placeholder flag feats['has_surname_placeholder'] = bool(re.search(r'\d+-s', txt)) his brother 39-s wife 1936 ok.ru

You can drop these directly into a feature‑engineering pipeline (e.g., for a text‑classification, user‑profiling, or recommendation model). spouse age age_match = re

text = "his brother 39-s wife 1936 ok.ru" numeric tokens numeric_tokens = re

+46 (0) 457 46 58 00 Subscribe to our newsletter
All rights reserved NovaCast © 2025. Production by Bravissimo

Would you like to get in touch with us?

Fill in your contact details below and we will get back to you within short.

    I'm mainly interested in...

    Area of interest:

    Name*

    E-mail address*

    Click here to read our privacy policy.

    his brother 39-s wife 1936 ok.ru