# -*- coding: utf-8 -*-
#!/usr/bin/env python3
import os
import json
import re
import datetime
import argparse
from jinja2 import Template
from bs4 import BeautifulSoup

# --- CONFIGURATION ---
MAIN_DOMAIN = "https://rowjobs.site "

def slugify(title):
    slug = title.lower()
    slug = re.sub(r'[^a-z0-9\s-]', '', slug)
    slug = re.sub(r'[\s-]+', '-', slug).strip('-')
    return slug

def professional_cleaner(text):
    if not text: 
        return "We are looking for a qualified candidate to join our remote team. Apply for full details."
    
    # Remove HTML tags
    soup = BeautifulSoup(text, "html.parser")
    
    # Remove any unwanted source links or common scraper footprints
    for scrap in soup.find_all(['a', 'script', 'style']):
        scrap.decompose()
        
    clean_text = soup.get_text(separator=' ')
    
    # Professional string cleaning
    clean_text = clean_text.replace('"', "'").replace('\n', ' ').replace('\r', '').strip()
    
    # Remove common "source" phrases found in scraped data
    clean_text = re.sub(r'(?i)source\s*:\s*\S+', '', clean_text)
    clean_text = re.sub(r'(?i)click here to apply', 'apply below', clean_text)
    
    # Limit to a clean paragraph format if it's too messy
    clean_text = ' '.join(clean_text.split())
    return clean_text

# --- PREMIUM MINIMALIST DESIGN ---
template_html = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>{{ job_title }} | Remote Careers</title>
    <link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;600;700&display=swap" rel="stylesheet">
    
    <script type="application/ld+json">
    {
      "@context": "https://schema.org",
      "@type": "JobPosting",
      "title": "{{ job_title }}",
      "datePosted": "{{ iso_date }}",
      "validThrough": "{{ valid_through }}",
      "description": "{{ job_description_clean }}",
      "url": "{{ full_job_url }}",
      "jobLocationType": "TELECOMMUTE",
      "applicantLocationRequirements": [
        {"@type":"Country","name":"United States"},{"@type":"Country","name":"United Kingdom"},{"@type":"Country","name":"Canada"},{"@type":"Country","name":"India"}
      ],
      "employmentType": "FULL_TIME",
      "hiringOrganization": {
        "@type": "Organization",
        "name": "Rowjobs Remote",
        "sameAs": "{{ main_domain }}"
      },
      "jobLocation": { "@type": "Place", "address": { "@type": "PostalAddress", "addressCountry": "US" } }
    }
    </script>

    <style>
        :root { --primary: #2563eb; --text: #1e293b; --bg: #f8fafc; }
        body { font-family: 'Plus Jakarta Sans', sans-serif; background: var(--bg); color: var(--text); margin: 0; line-height: 1.6; }
        header { background: white; border-bottom: 1px solid #e2e8f0; padding: 15px 5%; }
        .logo { font-size: 1.25rem; font-weight: 700; color: var(--primary); text-decoration: none; }
        .breadcrumb { padding: 20px 5%; font-size: 0.85rem; color: #64748b; }
        .breadcrumb a { color: var(--primary); text-decoration: none; }
        .main-container { max-width: 1100px; margin: 0 auto; padding: 0 20px 60px; display: grid; grid-template-columns: 2fr 1fr; gap: 30px; }
        .job-card { background: white; padding: 40px; border-radius: 16px; border: 1px solid #e2e8f0; box-shadow: 0 4px 6px -1px rgba(0,0,0,0.05); }
        h1 { font-size: 2.2rem; margin: 0 0 15px 0; color: #0f172a; line-height: 1.2; }
        .meta-tags { display: flex; gap: 10px; margin-bottom: 30px; }
        .tag { background: #eff6ff; color: var(--primary); padding: 5px 15px; border-radius: 99px; font-size: 0.8rem; font-weight: 600; }
        .description-header { border-bottom: 2px solid #f1f5f9; padding-bottom: 10px; margin-bottom: 20px; font-weight: 700; font-size: 1.2rem; }
        .content { font-size: 1.1rem; color: #334155; }
        .sidebar-card { background: #fff; padding: 25px; border-radius: 16px; border: 1px solid #e2e8f0; position: sticky; top: 20px; }
        .apply-btn { display: block; text-align: center; background: var(--primary); color: white; padding: 16px; border-radius: 12px; text-decoration: none; font-weight: 700; margin-top: 20px; transition: 0.2s; }
        .apply-btn:hover { background: #1d4ed8; transform: translateY(-2px); }
        @media (max-width: 850px) { .main-container { grid-template-columns: 1fr; } }
    </style>
</head>
<body>
    <header><a href="/" class="logo">Rowjobs<span style="color:#64748b">.store</span></a></header>
    <div class="breadcrumb"><a href="/">Home</a> / <a href="/">Jobs</a> / {{ job_title }}</div>
    <div class="main-container">
        <div class="job-body">
            <div class="job-card">
                <div class="meta-tags"><span class="tag">Remote</span><span class="tag">Verified</span><span class="tag">Full-Time</span></div>
                <h1>{{ job_title }}</h1>
                <div class="description-header">Professional Summary</div>
                <div class="content">{{ job_description | safe }}</div>
            </div>
        </div>
        <div class="sidebar">
            <div class="sidebar-card">
                <div style="margin-bottom:20px">
                    <div style="font-size:0.8rem;color:#64748b;font-weight:700">POSTED</div>
                    <div style="font-weight:600">{{ date_posted }}</div>
                </div>
                <div style="margin-bottom:20px">
                    <div style="font-size:0.8rem;color:#64748b;font-weight:700">LOCATION</div>
                    <div style="font-weight:600">Remote US</div>
                </div>
                <a href="{{ apply_url }}" class="apply-btn" target="_blank">Apply Now</a>
            </div>
        </div>
    </div>
</body>
</html>
"""

def main(ndjson_file, output_dir):
    job_dir = os.path.join(output_dir, "job")
    if not os.path.exists(job_dir):
        os.makedirs(job_dir)

    today_dt = datetime.date.today()
    date_posted_str = today_dt.strftime("%B %d, %Y") 
    iso_date = today_dt.isoformat()                 
    valid_through = (today_dt + datetime.timedelta(days=60)).isoformat()

    # The 4-Way Variation List
    prefixes = [
        "",                     # Standard Version
        "Urgent Hiring: ",      # Version 2
        "Remote Work - ",       # Version 3
        "New Opening: "         # Version 4
    ]

    file_counter = 0
    with open(ndjson_file, 'r', encoding='utf-8') as f:
        for line in f:
            if not line.strip(): continue
            try:
                job = json.loads(line)
            except: continue
            
            base_title = job.get("title", "Job Opportunity")
            raw_desc = job.get("description", "")
            cleaned_desc = professional_cleaner(raw_desc)
            
            # MULTIPLIER LOOP
            for prefix in prefixes:
                file_counter += 1
                full_title = f"{prefix}{base_title}".strip()
                slug = f"{slugify(full_title)}-{file_counter}"
                
                context = {
                    "job_title": full_title,
                    "date_posted": date_posted_str,
                    "iso_date": iso_date,
                    "valid_through": valid_through,
                    "job_description": raw_desc, # Raw for the HTML content
                    "job_description_clean": cleaned_desc, # Cleaned for Schema SEO
                    "full_job_url": f"{MAIN_DOMAIN}/job/{slug}",
                    "main_domain": MAIN_DOMAIN,
                    "apply_url": job.get("apply_url", "#")
                }

                template = Template(template_html)
                output_path = os.path.join(job_dir, f"{slug}.html")
                
                with open(output_path, "w", encoding="utf-8") as out:
                    out.write(template.render(context))

    print(f"DONE! Generated {file_counter} unique pages.")
    print(f"Total jobs processed: {file_counter // 4}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("ndjson_file", help="Input NDJSON file")
    parser.add_argument("output_dir", help="Main output directory")
    args = parser.parse_args()
    main(args.ndjson_file, args.output_dir)