from flask import Flask, jsonify, request
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import logging
import threading
from flask_caching import Cache
import requests
import json
import re
from datetime import datetime
import os
import logging
from datetime import datetime

app = Flask(__name__)

# Flask-Caching setup
cache = Cache(config={"CACHE_TYPE": "SimpleCache", "CACHE_DEFAULT_TIMEOUT": 60})
cache.init_app(app)

# Logging setup
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Global locks to coordinate duplicate requests
locks = {}

# Utility function to set up Selenium WebDriver
def create_driver():
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument("--disable-extensions")
    chrome_options.add_argument("--disable-notifications")
    chrome_options.add_argument("--disable-logging")
    chrome_options.add_argument("--disable-crash-reporter")
    chrome_options.add_argument(
        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
    )
    service = Service(ChromeDriverManager().install())
    return webdriver.Chrome(service=service, options=chrome_options)

# Utility function to fetch and parse HTML
def fetch_html(url, wait_id):
    MAX_RETRIES = 5
    attempt = 0
    while attempt < MAX_RETRIES:
        driver = create_driver()
        try:
            driver.get(url)
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, wait_id)))
            container = driver.find_element(By.ID, wait_id)
            return container.get_attribute("innerHTML")
        except Exception as e:
            logging.error(f"Error on attempt {attempt + 1} fetching HTML from {url}: {e}")
            attempt += 1
            time.sleep(3)  # Wait before retrying
        finally:
            driver.quit()
    logging.error(f"Failed to fetch HTML from {url} after {MAX_RETRIES} attempts")
    return None

# Common route handler with caching and locking
def handle_request_with_cache(cache_key, fetch_function, endpoint, dataa=None, cache_timeout=None):
    # Check if the result is already cached
    cached_result = cache.get(cache_key)
    if cached_result:
        return jsonify(cached_result)

    # Lock to handle duplicate requests
    lock = locks.setdefault(cache_key, threading.Lock())
    with lock:
        # Recheck the cache inside the lock to avoid race conditions
        cached_result = cache.get(cache_key)
        if cached_result:
            return jsonify(cached_result)

        # Fetch the data and process it
        result = fetch_function()

        if endpoint == 'competitions':
            print('competitions')
            requests.post('https://sportina1.com/save_data.php?competitions', json=result)
        elif endpoint == 'posts':
            print('posts')
            requests.post(f'https://sportina1.com/save_data.php?posts&data={dataa}', json=result)
        elif endpoint == 'post':
            print('post')
            requests.post(f'https://sportina1.com/save_data.php?post&data={dataa}', json=result)
        
        if result is None:
            return jsonify({"error": "Failed to fetch data"}), 500

        # Cache the result with the specified timeout
        cache.set(cache_key, result, timeout=cache_timeout)

        # Return the result as a JSON response
        return jsonify(result)

def extract_arabic_title_from_link(link):
    arabic_text = re.findall(r'[\u0600-\u06FF]+', link)
    if arabic_text:
        title = ' '.join(arabic_text).replace('-', ' ')
        return title
    return "Untitled"


# Ensure the 'images' folder exists
os.makedirs('../sportina1.com/public_html/images', exist_ok=True)

# def delete_old_images(folder, max_images=50):
#     """Deletes the oldest images in the folder if the number of images exceeds max_images."""
#     try:
#         # List all files in the folder
#         files = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]

#         # Sort files by modification time (oldest first)
#         files.sort(key=lambda x: os.path.getmtime(os.path.join(folder, x)))

#         # If the number of files exceeds the limit, delete the oldest files
#         if len(files) > max_images:
#             files_to_delete = files[:len(files) - max_images]
#             for file in files_to_delete:
#                 os.remove(os.path.join(folder, file))
#                 logging.info(f"Deleted old image: {file}")

#     except Exception as e:
#         logging.error(f"Error cleaning up old images: {e}")

def screenshot_image(img_url, folder='../sportina1.com/public_html/images'):
    """Captures a screenshot of an image and saves it locally."""
    if not img_url:
        return None

    try:
        # Initialize the driver
        driver = create_driver()

        # Navigate to the image URL
        driver.get(img_url)

        # Clean up old images if the folder exceeds the limit
        # delete_old_images(folder)

        # Create a unique filename for the image
        filename = os.path.join(folder, f"{datetime.now().strftime('%Y%m%d%H%M%S%f')}.png")

        # Locate the image and take a screenshot
        image_element = driver.find_element(By.TAG_NAME, "img")
        image_element.screenshot(filename)

        logging.info(f"Saved screenshot: {filename}")
        return filename

    except Exception as e:
        logging.error(f"Error capturing screenshot for {img_url}: {e}")
        return None

    finally:
        driver.quit()

@app.route("/")
def home():
    return "Welcome to the Enhanced Flask App!"

@app.route("/posts", methods=["GET"])
def posts():
    data = request.args.get("data")
    if not data:
        return jsonify({"error": "data is required"}), 400

    # If data == 0, scrape from https://www.ysscores.com/ar/news
    if data == "0":
        url = "https://www.ysscores.com/ar/news"
        cache_key = "news_data"

        def fetch_news_data():
            driver = create_driver()
            try:
                driver.get(url)
                soup = BeautifulSoup(driver.page_source, "html.parser")

                # Extract data from the first 4 elements with class "col-6"
                news_data = []
                for col in soup.find_all('div', class_='col-6')[:4]:
                    link = col.find('a')['href'] if col.find('a') else None
                    img_url = col.find('img')['src'] if col.find('img') else None
                    news_title = col.find('div', class_='news-title').text.strip() if col.find('div', class_='news-title') else None
                    news_date = col.find('div', class_='news-date').text.strip() if col.find('div', class_='news-date') else None

                    # Save the image as a screenshot
                    saved_img_path = screenshot_image(img_url) if img_url else None

                    # If news_title is missing, extract it from the link
                    if not news_title and link:
                        news_title = extract_arabic_title_from_link(link)

                    news_data.append({
                        'link': link,
                        'img': saved_img_path,
                        'title': news_title,
                        'date': news_date
                    })

                # Scrape data from element with id="news_list"
                news_list_element = soup.find(id="news_list")
                if news_list_element:
                    for a_tag in news_list_element.find_all('a'):
                        link = a_tag['href'] if a_tag else None
                        img_url = a_tag.find('img')['src'] if a_tag.find('img') else None
                        news_title = a_tag.find('div', class_='news-title').text.strip() if a_tag.find('div', class_='news-title') else None

                        # Save the image as a screenshot
                        saved_img_path = screenshot_image(img_url) if img_url else None

                        # If news_title is missing, extract it from the link
                        if not news_title and link:
                            news_title = extract_arabic_title_from_link(link)

                        # Set the date to today's date
                        news_data.append({
                            'link': link,
                            'img': saved_img_path,
                            'title': news_title,
                            'date': datetime.now().strftime('%Y-%m-%d')
                        })

                return news_data

            except Exception as e:
                logging.error(f"Error fetching news data: {e}")
                return {"error": "Failed to fetch news data"}
            finally:
                driver.quit()

        return handle_request_with_cache(cache_key, fetch_news_data, 'posts', data, cache_timeout=300)

    # If data != 0, continue with the original behavior
    url = f"https://www.ysscores.com/ar/get_news_more?count_news={data}"
    cache_key = f"posts_{data}"

    def fetch_matchv2():
        driver = create_driver()
        try:
            driver.get(url)
            page_source = driver.find_element("tag name", "pre").text
            return json.loads(page_source)
        except Exception as e:
            logging.error(f"Error fetching matchv2 data: {e}")
            return None
        finally:
            driver.quit()

    return handle_request_with_cache(cache_key, fetch_matchv2, 'posts', data, cache_timeout=300)

@app.route("/post", methods=["GET"])
def post():
    try:
        response = requests.get("https://sportina1.com/save_data.php?update_post")
        
        if response.status_code == 200:
            data = response.text
        else:
            print("Failed to retrieve data. Status code:", response.status_code)
    except requests.RequestException as e:
        print("An error occurred:", e)

    if 'data' in locals():
        cache_key = f"posts_{data}"

        def fetch_post_data():
            driver = create_driver()
            try:
                driver.get(data)

                WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CLASS_NAME, "news-details-wrap"))
                )

                html_content = driver.page_source

                soup = BeautifulSoup(html_content, "html.parser")
                news_details = soup.find("div", class_="news-details-wrap")

                if not news_details:
                    return {"error": "Content not found"}

                post_image = news_details.find("img", class_="single-news-image")
                post_image_url = post_image["src"] if post_image else None

                post_title = news_details.find("div", class_="mobile-single-title")
                post_title_text = post_title.text.strip() if post_title else None

                post_content = news_details.find("div", class_="news-content")
                post_content_text = (
                    " ".join(p.text.strip() for p in post_content.find_all("p"))
                    if post_content
                    else None
                )

                return {
                    "image": post_image_url,
                    "title": post_title_text,
                    "content": post_content_text,
                }

            except Exception as e:
                logging.error(f"Error fetching post data: {e}")
                return {"error": "Failed to fetch content"}
            finally:
                driver.quit()

        return handle_request_with_cache(cache_key, fetch_post_data, "post", data, cache_timeout=10)
    else:
        return "Updated"

if __name__ == "__main__":
    app.run(host='0.0.0.0', port=5000)
