from googleapiclient.discovery import build
from isodate import parse_duration
import pandas as pd
import json
import xlsxwriter
from fuzzywuzzy import fuzz
import nltk
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
from googleapiclient.errors import HttpError
import logging
import threading
import os
thread_local = threading.local()

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Read the API key
api_key_path = os.path.join(os.path.dirname(__file__), 'ApiConfig.json')
with open(api_key_path, 'r') as api_config:
    youtube_config = json.load(api_config)

# Build the YouTube service
youtube = build('youtube', 'v3', developerKey=youtube_config['API_KEY'])

try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')


def load_quota_config():
    quota_config_path = os.path.join(os.path.dirname(__file__), 'QuotaConfig.json')
    with open(quota_config_path) as quota_config_file:
        return json.load(quota_config_file)

quota_config = load_quota_config()

def init_thread_local():
    thread_local.quota_usage = 0

def update_quota(api_method):
    thread_local.quota_usage += quota_config[api_method]['cost']

def modify_search_criteria(search_criteria):
    """
    Modifies the search criteria by adding appropriate keywords for YouTube API searches.

    Args:
        search_criteria (str): The original search criteria.

    Returns:
        str: Modified search criteria with additional keywords.
    """
    # Remove leading/trailing whitespace
    #return search_criteria
    search_criteria = search_criteria.strip()
    search_criteria = f'"{search_criteria}"'
    return search_criteria

def calculate_fuzzy_match_score(search_criteria, video_title, video_tags=None):
    """
    Calculates a fuzzy match score based on the presence of search criteria keywords in the video title and tags.
    
    :param search_criteria: The search query string.
    :param video_title: The title of the video.
    :param video_tags: A list of tags associated with the video.
    :return: Match score as a percentage.
    """
    stemmer = PorterStemmer()
    stop_words = set(stopwords.words('english'))

    def preprocess_text(text):
        words = text.lower().split()
        stemmed_words = [stemmer.stem(word) for word in words if word not in stop_words]
        return ' '.join(stemmed_words)

    search_criteria_stemmed = preprocess_text(search_criteria)
    video_title_stemmed = preprocess_text(video_title)

    # Combine title and tags for matching
    video_info = f"{video_title_stemmed} {' '.join(video_tags) if video_tags else ''}"

    # Calculate fuzzy match score
    match_score = fuzz.token_set_ratio(search_criteria_stemmed, video_info)

    return match_score

def fetch_channel_info(channel_id):
    """
    Fetches channel information from YouTube API.
    """
    channel_response = youtube.channels().list(
        part='snippet,statistics',
        id=channel_id
    ).execute()
    update_quota("channels.list")
    return channel_response.get('items', [])[0] if channel_response.get('items', []) else {}

def create_header_row(search_criteria, search_type,channel_title, total_videos):
    """
    Creates header row for Excel file.
    """
    return [
        {
            'Search Criteria': search_criteria,
            'Search Type': search_type,
            '%Match': '',
            'Full Playlist Name': '',
            'Split Playlist1': '',
            'Split Playlist2': '',
            'Split Playlist3': '',
            'Split Playlist4': '',
            'Split Playlist5': '',
            'Full Title': '',
            'Split Title1': '',
            'Split Title2': '',
            'Split Title3': '',
            'Split Title4': '',
            'Split Title5': '',
            'Video Link': '',
            'Published At': '',
            'Channel Title': channel_title,
            'Channel Total Videos': total_videos,
            'Length': '',
            'Views': '',
            'Likes': '',
            'Comments': '',
            'Tags': [],
            'Description': '',
            'Video Language': '',
            'License': ''
        }
    ]

def process_single_video(video_detail, group_name,channel_id, total_videos,search_criteria,search_type):
    """
    Processes a single video detail.
    """
    # Extract relevant information from the video detail
    video_id = video_detail['id']
    snippet = video_detail['snippet']
    statistics = video_detail['statistics']
    content_details = video_detail['contentDetails']
    status = video_detail['status']

    video_title = snippet['title']
    
    title_segments = video_title.split("|", 4)
    playlist_segments = group_name.split("|",4)

    title_segments.extend([""] * (5 - len(title_segments)))
    playlist_segments.extend([""] * (5 - len(playlist_segments))) 

    video_published_at = snippet['publishedAt']
    video_channel_id = snippet['channelId']
    video_channel_title = snippet['channelTitle']
    video_length = content_details['duration']
    video_view_count = statistics.get('viewCount', '0')
    video_like_count = statistics.get('likeCount', '0')
    video_comment_count = statistics.get('commentCount', '0')
    description = snippet.get('description', '')
    tags = snippet.get('tags', [])
    license = status.get('license', '')
    language = snippet.get('defaultAudioLanguage','')

    duration = parse_duration(video_length)
    minutes = int(duration.total_seconds() // 60)
    seconds = int(duration.total_seconds() % 60)

    video_link = f"https://www.youtube.com/watch?v={video_id}"

    # Fetch channel information if not already cached
    if channel_id is None :
        channel_info_cached = fetch_channel_info(video_channel_id)
        channel_title = channel_info_cached.get('snippet', {}).get('title', 'Unknown Channel')
        total_videos = channel_info_cached.get('statistics', {}).get('videoCount', 'Unknown')

    return {
        'Search Criteria': search_criteria,
        'Search Type': search_type,
        'Full Playlist Name': group_name,
        'Split Playlist1': playlist_segments[0],
        'Split Playlist2': playlist_segments[1],
        'Split Playlist3': playlist_segments[2],
        'Split Playlist4': playlist_segments[3],
        'Split Playlist5': playlist_segments[4],
        'Full Title': video_title,
        'Split Title1': title_segments[0],
        'Split Title2': title_segments[1],
        'Split Title3': title_segments[2],
        'Split Title4': title_segments[3],
        'Split Title5': title_segments[4],
        'Video Link': video_link,
        'Published At': video_published_at,
        'Channel Title': video_channel_title,
        'Channel ID': video_channel_id,
        'Channel Total Videos': total_videos,
        'Length': f"{minutes}:{seconds:02d}",
        'Views': video_view_count,
        'Likes': video_like_count,
        'Comments': video_comment_count,
        'Tags': tags,
        'Description': description,
        'Video Language' : language,
        'License': license
    }

def process_based_on_search_type(df, search_type, grouped_video_details, search_criteria):
    """
    Processes the DataFrame based on the search type.

    Args:
        df (pandas.DataFrame): The DataFrame containing video details.
        search_type (str): Type of search ('Videos' or 'Playlists').
        grouped_video_details (dict): Dictionary of video details grouped by playlist.
        search_criteria (str): The criteria used for searching videos.

    Returns:
        pandas.DataFrame: The processed DataFrame.
    """
    
    # Handle empty DataFrame (no videos passed filtering)
    if df.empty:
        logger.warning("No videos passed filtering - returning empty DataFrame")
        return df
    
    def calculate_match_score(text):
        try:
            if not isinstance(text, str) or not text.strip():
                return 0
            
            text_lower = text.lower()
            if not search_criteria or not search_criteria.strip():
                return 0
            
            # Extract meaningful words from search criteria (length > 2)
            search_words = [w.strip().lower() for w in search_criteria.split() if len(w.strip()) > 2]
            
            if not search_words:
                return 0
            
            # Count matching words
            matches = sum(1 for word in search_words if word in text_lower)
            
            # Calculate percentage of words that match
            match_percentage = (matches / len(search_words)) * 100
            
            return match_percentage
        except Exception as e:
            print(f"Error calculating match score for '{text}': {str(e)}")
            return 0

    if search_type == 'Videos':
        if 'Full Title' in df.columns:
            df['%Match'] = df['Full Title'].apply(lambda x: calculate_match_score(x))
            
            # Sort by %Match in descending order
            df = df.sort_values(by='%Match', ascending=False).reset_index(drop=True)

            columns = df.columns.tolist()
            columns.insert(columns.index('Full Playlist Name'), columns.pop(columns.index('%Match')))
            df = df[columns]

    elif search_type == 'Playlists':
        if 'Full Playlist Name' in df.columns and 'Full Title' in df.columns:
            original_playlist_order = list(grouped_video_details.keys())
            df['Original Playlist Order'] = df['Full Playlist Name'].apply(lambda x: original_playlist_order.index(x))
            
            video_orders = {}
            for playlist, videos in grouped_video_details.items():
                video_orders[playlist] = {video['snippet']['title']: i for i, video in enumerate(videos)}
            
            df['Original Video Order'] = df.apply(lambda row: video_orders[row['Full Playlist Name']][row['Full Title']], axis=1)
            playlist_scores = {playlist: calculate_match_score(playlist) for playlist in grouped_video_details.keys()}
            df['Individual Video %Match'] = df['Full Title'].apply(lambda x: calculate_match_score(x))
            avg_video_scores = df.groupby('Full Playlist Name')['Individual Video %Match'].mean().to_dict()
            df['Average Video %Match'] = df['Full Playlist Name'].map(avg_video_scores)
            df['Playlist %Match'] = df['Full Playlist Name'].map(playlist_scores)

            # Calculate combined score for each playlist (weighting playlist score more heavily)
            def calculate_combined_score(row):
                if row['Playlist %Match'] == 100:
                    return row['Playlist %Match']
                else:
                    return (row['Playlist %Match'] * 0.5) + (row['Average Video %Match'] * 0.5)
            
            df['%Match'] = df.apply(calculate_combined_score, axis=1)     

            # Sort by combined match score, then maintain original order within playlists
            df = df.sort_values(by=['%Match', 'Original Playlist Order', 'Original Video Order'], 
                                ascending=[False, True, True]).reset_index(drop=True)

        # Drop temporary columns
        temp_columns = ['Original Playlist Order', 'Original Video Order']
        existing_temp_columns = [col for col in temp_columns if col in df.columns]
        if existing_temp_columns:
            df = df.drop(columns=existing_temp_columns)

        columns = df.columns.tolist()
        if 'Playlist %Match' in columns and 'Individual Video %Match' in columns and 'Average Video %Match' in columns and '%Match' in columns and 'Full Playlist Name' in columns:
            columns.insert(columns.index('Full Playlist Name'), columns.pop(columns.index('Playlist %Match')))
            columns.insert(columns.index('Full Playlist Name'), columns.pop(columns.index('Individual Video %Match')))
            columns.insert(columns.index('Full Playlist Name'), columns.pop(columns.index('Average Video %Match')))
            columns.insert(columns.index('Full Playlist Name'), columns.pop(columns.index('%Match')))
            df = df[columns]
    
    return df

def write_excel_file(df):
    """
    Writes processed DataFrame to Excel file.
    """
    excel_file_path = 'video_details.xlsx'
    writer = pd.ExcelWriter(excel_file_path, engine='xlsxwriter')
    df.to_excel(writer, index=False, sheet_name='Sheet1')
    
    worksheet = writer.sheets['Sheet1']
    video_link_col_idx = df.columns.get_loc('Video Link')
    
    for idx, row in df.iterrows():
        video_link = row['Video Link']
        worksheet.write_url(idx + 1, video_link_col_idx, video_link, string=video_link)

    writer.close()
    return excel_file_path

def process_video_details(grouped_video_details, channel_id, search_criteria, search_type):
    """
    Processes video details from a dictionary of video IDs grouped by playlist.

    Args:
        grouped_video_details (dict): A dictionary where keys are playlist titles and values are lists of video details dictionaries.
        channel_id (str): The ID of the YouTube channel.
        search_criteria (str): The criteria used for searching videos.
        search_type (str): Type of search ('Videos' or 'Playlists').

    Returns:
        pandas.DataFrame: A DataFrame containing video details.

    Raises:
        HttpError: If there's an error in the API request.
    """

    video_details_list = []
    channel_info_cached = None
    
    # Initialize variables for channel information
    channel_title = ''
    total_videos = ''

    if channel_id is not None:
        channel_info_cached = fetch_channel_info(channel_id)
        channel_title = channel_info_cached.get('snippet', {}).get('title', 'Unknown Channel')
        total_videos = channel_info_cached.get('statistics', {}).get('videoCount', 'Unknown')

    # Add header row if no video details available
    if not grouped_video_details:
        video_details_list = create_header_row(search_criteria, search_type, channel_title, total_videos)
        
        df = pd.DataFrame(video_details_list)
        writer = pd.ExcelWriter('video_details.xlsx', engine='xlsxwriter')
        df.to_excel(writer, index=False, sheet_name='Sheet1')
        writer.close()
        return df

    # Process video details for each playlist
    original_count = 0
    kept_count = 0
    
    for group_name, video_details in grouped_video_details.items():
        for video_detail in video_details:
            original_count += 1
            
            # --- SMART KEYWORD FILTER START ---
            if search_criteria:
                snippet = video_detail.get('snippet', {})
                title = snippet.get('title', '')
                description = snippet.get('description', '')
                text = (title + " " + description).lower()

                # --- NEW FUZZY FILTER (EdTech optimized) ---
                score = fuzz.token_set_ratio(search_criteria.lower(), text)

                # Threshold tuned for educational content
                if score < 60:
                    logger.debug(f"Filtered out video: '{title}' - fuzzy score {score}")
                    continue
            # --- SMART KEYWORD FILTER END ---
            
            # Video passed filtering, keep it
            kept_count += 1
            try:
                video_details_list.append(
                    process_single_video(video_detail, group_name, channel_id,total_videos,search_criteria,search_type))
            except HttpError as he:
                if e.resp.status == 403 and 'quotaExceeded' in str(e):
                    raise
            except Exception as e:
                logger.exception(f"Unexpected error processing video - {e}")
    
    # Log filtering results
    if search_criteria and search_criteria.strip():
        logger.info(f"Smart keyword filtering: kept {kept_count} out of {original_count} videos for: '{search_criteria}'")
    else:
        logger.info(f"No keyword filtering applied - processed {kept_count} videos")

    df = pd.DataFrame(video_details_list)

    df = process_based_on_search_type(df,search_type,grouped_video_details,search_criteria)
    write_excel_file(df)
    return df



def get_video_ids_only_videos(search_criteria, max_results=50, channel_id=None, videoLicense=None, videoDuration=None):
    """
    Updated video discovery:
    - If channel_id provided → fetch from channel uploads playlist (reliable)
    - Else → fallback to search API (original behavior)
    Returns SAME format: {"Video Without Playlist": [ids]}
    """

    logger.info(f"Starting video discovery | Channel: {channel_id} | Criteria: {search_criteria}")

    all_video_ids = []

    try:
        # -------------------------------------------------
        # CHANNEL MODE — Search API with channelId filter
        # Respects search_criteria, videoLicense, videoDuration
        # -------------------------------------------------
        if channel_id:
            logger.info("Using Search API with channelId filter for channel-specific video search")

            next_page_token = None

            while max_results is None or len(all_video_ids) < max_results:
                search_response = youtube.search().list(
                    part='snippet',
                    maxResults=min(50, max_results - len(all_video_ids)) if max_results else 50,
                    q=modify_search_criteria(search_criteria),
                    type='video',
                    order='relevance',
                    channelId=channel_id,
                    videoLicense=videoLicense,
                    videoDuration=videoDuration,
                    pageToken=next_page_token
                ).execute()

                update_quota("search.list")

                items = search_response.get('items', [])
                if not items:
                    break

                all_video_ids.extend([item['id']['videoId'] for item in items])

                next_page_token = search_response.get('nextPageToken')
                if not next_page_token:
                    break

        # -------------------------------------------------
        # FALLBACK: GLOBAL SEARCH (original behavior)
        # -------------------------------------------------
        else:
            logger.info("No channel provided, using search API")

            next_page_token = None

            while max_results is None or len(all_video_ids) < max_results:
                search_response = youtube.search().list(
                    part='snippet',
                    maxResults=min(50, max_results - len(all_video_ids)) if max_results else 50,
                    q=modify_search_criteria(search_criteria),
                    type='video',
                    order='relevance',
                    videoLicense=videoLicense,
                    videoDuration=videoDuration,
                    pageToken=next_page_token
                ).execute()

                update_quota("search.list")

                items = search_response.get('items', [])
                if not items:
                    break

                all_video_ids.extend([item['id']['videoId'] for item in items])

                next_page_token = search_response.get('nextPageToken')
                if not next_page_token:
                    break

        logger.info(f"Discovered {len(all_video_ids)} video IDs")

        return {} if not all_video_ids else {"Video Without Playlist": all_video_ids if max_results is None else all_video_ids[:max_results]}

    except HttpError as e:
        logger.exception(f"Error during video discovery: {e}")
        raise

def get_video_ids_playlist_videos(search_criteria, max_videos_total=100, max_videos_per_playlist=10, channel_id=None):
    """
    Retrieves video IDs from playlists based on the given search criteria and parameters.

    Args:
        search_criteria (str): The search query string.
        max_videos_total (int, optional): Maximum total number of videos to fetch. Defaults to 100.
        max_videos_per_playlist (int, optional): Maximum number of videos to fetch per playlist. Defaults to 10.
        channel_id (str, optional): Optional parameter to restrict searches to a specific channel. Defaults to None.

    Returns:
        dict: A dictionary where keys are playlist titles and values are lists of video IDs.

    Raises:
        HttpError: If there's an error in the API request.
    """
    
    logger.info(f"Starting playlist video ID retrieval with criteria: {search_criteria}")

    def fetch_videos_from_playlist(playlist_id, max_videos_per_playlist):
        """
        Fetches video IDs from a specific playlist.

        Args:
            playlist_id (str): The ID of the playlist.
            max_videos_per_playlist (int): Maximum number of videos to fetch per playlist.

        Returns:
            list: List of video IDs in the playlist.
        """
        
        logger.debug(f"Fetching videos from playlist: {playlist_id}")
        
        max_results_per_request = min(50, max_videos_per_playlist)
        all_video_ids = []
        page_token = None

        while True:
            # Fetch playlist items
            playlist_items_response = youtube.playlistItems().list(
                part='snippet',
                maxResults=max_results_per_request,
                playlistId=playlist_id,
                pageToken=page_token
            ).execute()
            update_quota("playlistItems.list")
            
            # Extract video IDs from the response
            video_ids_in_playlist = [item['snippet']['resourceId']['videoId'] for item in playlist_items_response.get('items', [])]
            all_video_ids.extend(video_ids_in_playlist)

            # Check for next page token
            page_token = playlist_items_response.get('nextPageToken')
            
            # Break loop conditions
            if not page_token or len(all_video_ids) >= max_videos_per_playlist:
                break

        logger.debug(f"Fetched {len(all_video_ids)} videos from playlist: {playlist_id}")
        return all_video_ids

    def fetch_playlists(next_page_token=None, channel_id=None, search_criteria=None):
        """
        Fetches playlists based on the given criteria.

        Args:
            next_page_token (str, optional): Token for pagination. Defaults to None.
            channel_id (str, optional): Channel ID to restrict searches. Defaults to None.
            search_criteria (str, optional): Search query string. Defaults to None.

        Returns:
            dict: Dictionary containing playlists and next page token.
        """
        
        logger.debug(f"Fetching playlists with criteria: {search_criteria}, channel_id: {channel_id}, page_token: {next_page_token}")

        if search_criteria is not None:
            # Search for playlists using the search API
            search_response = youtube.search().list(
                part='snippet',
                maxResults=50,
                q=modify_search_criteria(search_criteria),
                type='playlist',
                order='relevance',
                channelId=channel_id,
                pageToken=next_page_token
            ).execute()
            update_quota("search.list")
            
            playlists = search_response.get('items', [])
            custom_playlists = [{'id': item['id']['playlistId'], 'title': item['snippet']['title']} for item in playlists]
            return {'playlists': custom_playlists, 'next_page_token': search_response.get('nextPageToken')}

        else:
            # List playlists for a specific channel
            playlist_response = youtube.playlists().list(
                part='snippet',
                channelId=channel_id,
                maxResults=50,
                pageToken=next_page_token
            ).execute()
            update_quota("playlist.list")

            playlists = playlist_response.get('items', [])
            custom_playlists = [{'id': item['id'], 'title': item['snippet']['title']} for item in playlists]
            return {'playlists': custom_playlists, 'next_page_token': playlist_response.get('nextPageToken')}

    total_videos_fetched = 0
    all_playlists = []

    next_page_token = None
    playlist_videos = {}
    
    logger.info(f"Starting main loop to fetch videos (max_videos_total={max_videos_total})")

    while True:
        logger.debug(f"Fetching playlists with token: {next_page_token}")
        
        playlistandtoken = fetch_playlists(next_page_token, channel_id, search_criteria)
        next_page_token = playlistandtoken['next_page_token']
        all_playlists = playlistandtoken['playlists']
        
        for item in all_playlists:
            playlist_id = item['id']
            playlist_title = item['title']
            
            logger.debug(f"Processing playlist: {playlist_title}")
            
            video_ids_in_playlist = fetch_videos_from_playlist(playlist_id, max_videos_per_playlist)
            
            playlist_videos[playlist_title] = video_ids_in_playlist
            
            total_videos_fetched += len(video_ids_in_playlist)
            logger.info(f"Fetched {len(video_ids_in_playlist)} videos from playlist: {playlist_title}. Total fetched: {total_videos_fetched}")

            # Only enforce total cap if max_videos_total is set
            if max_videos_total is not None and total_videos_fetched >= max_videos_total:
                break
        
        if not next_page_token or (max_videos_total is not None and total_videos_fetched >= max_videos_total):
            logger.info(f"Completed fetching '{total_videos_fetched}' playlist videos")
            break

    return playlist_videos

def get_video_details(videos):
    """
    Processes video details from a dictionary of video IDs grouped by playlist.

    Args:
        videos (dict): A dictionary where keys are playlist titles and values are lists of video IDs.

    Returns:
        dict: A dictionary where keys are playlist titles and values are lists of video detail dictionaries.

    Raises:
        HttpError: If there's an error in the API request.
    """
    
    logger.info("Starting video details processing")

    # Initialize sets and lists
    all_video_ids = []
    processed_video_ids = set()

    # Process video IDs from all playlists
    for playlist_videos in videos.values():
        for video_id in playlist_videos:
            if video_id not in processed_video_ids:
                all_video_ids.append(video_id)
                processed_video_ids.add(video_id)
            else:
                logger.warning(f"Video id '{video_id}' already processed")

    # Chunk video IDs into groups of 50 for API requests
    all_video_ids_chunks = [all_video_ids[i:i + 50] for i in range(0, len(all_video_ids), 50)]
    logger.info(f"Created {len(all_video_ids_chunks)} chunks of video IDs")

    # Initialize list to store all video details
    video_details_list = []

    # Fetch video details in chunks
    for chunk in all_video_ids_chunks:
        chunk_ids = ','.join(chunk)
        logger.debug(f"Fetching details for chunk: {chunk_ids[:50]}...")  # Truncate for readability
        
        # Execute the YouTube API videos request
        video_details_response = youtube.videos().list(
            part='snippet,statistics,contentDetails,status',
            id=chunk_ids
        ).execute()
        
        # Update quota usage
        update_quota("videos.list")
        
        # Add fetched video details to the list
        video_details_list.extend(video_details_response.get('items', []))
        logger.debug(f"Fetched {len(video_details_response.get('items', []))} video details")

    # Group video details by playlist
    grouped_video_details = {}
    for playlist_title, video_ids in videos.items():
        grouped_video_details[playlist_title] = [detail for detail in video_details_list if detail['id'] in video_ids]
        logger.debug(f"Grouped {len(grouped_video_details[playlist_title])} videos for playlist: {playlist_title}")

    logger.info(f"Processed {len(video_details_list)} video details across {len(grouped_video_details)} playlists")
    return grouped_video_details

def get_all_channel_videos(channel_id, max_results=None):
    """
    Retrieves video IDs from a YouTube channel via its uploads playlist.
    No search criteria or filters are applied.

    Args:
        channel_id (str): The YouTube channel ID.
        max_results (int, optional): Maximum number of videos to fetch. If None, fetches all videos.

    Returns:
        dict: {"All Channel Videos": [video_id, ...]}
    """
    logger.info(f"Fetching videos for channel: {channel_id} via uploads playlist (max_results={max_results})")

    # Get the uploads playlist ID for this channel
    channel_response = youtube.channels().list(
        part='contentDetails',
        id=channel_id
    ).execute()
    update_quota("channels.list")

    items = channel_response.get('items', [])
    if not items:
        logger.warning(f"Channel not found or missing contentDetails for ID: {channel_id}")
        return {}

    uploads_playlist_id = items[0]['contentDetails']['relatedPlaylists']['uploads']
    logger.info(f"Found uploads playlist ID: {uploads_playlist_id}")

    all_video_ids = []
    next_page_token = None

    while True:
        # Respect max_results cap when deciding how many to request per page
        remaining = (max_results - len(all_video_ids)) if max_results else 50
        page_size = min(50, remaining)

        playlist_response = youtube.playlistItems().list(
            part='contentDetails',
            playlistId=uploads_playlist_id,
            maxResults=page_size,
            pageToken=next_page_token
        ).execute()
        update_quota("playlistItems.list")

        for item in playlist_response.get('items', []):
            all_video_ids.append(item['contentDetails']['videoId'])

        logger.info(f"Fetched {len(all_video_ids)} videos so far...")

        # Stop if we've hit the cap
        if max_results and len(all_video_ids) >= max_results:
            logger.info(f"Reached max_results cap of {max_results}")
            break

        next_page_token = playlist_response.get('nextPageToken')
        if not next_page_token:
            break

    logger.info(f"Total videos fetched from channel: {len(all_video_ids)}")
    return {"All Channel Videos": all_video_ids}

def get_channel_id_from_name(channel_name):
    """
    Retrieves the channel ID for a given channel name.

    Args:
        channel_name (str): The name of the YouTube channel.

    Returns:
        str or None: The channel ID if found, None otherwise.

    Raises:
        HttpError: If there's an error in the API request.
    """
    
    # Perform a search for the channel
    logger.info(f"Searching for channel: {channel_name}")
    
    # Execute the YouTube API search request
    search_response = youtube.search().list(
        part='snippet',
        maxResults=5,
        q=channel_name,
        type='channel'
    ).execute()
    
    # Update quota usage
    update_quota("search.list")

    # Check if the search returned any results
    if 'items' in search_response and len(search_response['items']) > 0:
        # Extract the channel ID from the first result
        channel_id = search_response['items'][0]['id']['channelId']
        logger.info(f"Found channel ID: {channel_id}")
        return channel_id
    else:
        # Log warning if no channels were found
        logger.warning(f"No channels found matching name: {channel_name}")
        return None

def get_video_metadata(search_type, search_criteria, max_results=1000, max_videos_per_playlist=50, channel_id=None, videoLicense = None, videoDuration = None):
    """
    Fetches video metadata based on the given search criteria and parameters.

    Args:
        search_type (str): Type of search. Can be 'Videos', 'Playlists', or 'AllChannelVideos'.
        search_criteria (str): The search query string.
        max_results (int, optional): Maximum number of results to return. Defaults to 1000.
        max_videos_per_playlist (int, optional): Maximum number of videos to fetch per playlist. Defaults to 50.
        channel_id (str, optional): Optional parameter to restrict searches to a specific channel. Defaults to None.
        videoLicense (str, optional): Filter videos by license. Defaults to None.
        videoDuration (str, optional): Filter videos by duration. Defaults to None.

    Returns:
        tuple: A tuple containing the processed video details and the quota usage.

    Raises:
        ValueError: If the search_type is invalid or if channel_id is None for 'AllChannelVideos' search type.
        HttpError: If there's an error in the API request.
    """
  
    try:
        logger.info(f"Starting video metadata retrieval. Parameters:\n"
                f"search_type: {search_type},\n"
                f"search_criteria: {search_criteria},\n"
                f"max_results: {max_results},\n"
                f"max_videos_per_playlist: {max_videos_per_playlist},\n"
                f"channel_id: {channel_id},\n"
                f"videoLicense: {videoLicense},\n"
                f"videoDuration: {videoDuration}")

        # Initialize thread-local storage for quota tracking
        init_thread_local()
        logger.info("Initialized thread-local storage for quota tracking")

        # Convert channel name to channel ID if provided
        if channel_id:
            #for _ in range(10000):
            channel_id = get_channel_id_from_name(channel_id)

        # Determine the search method based on search_type
        if search_type == 'Videos':
            logger.info("Performing video search")
            videos = get_video_ids_only_videos(search_criteria, max_results, channel_id, videoLicense, videoDuration)
        
        elif search_type == 'Playlists':
            logger.info("Performing playlist search")
            videos = get_video_ids_playlist_videos(search_criteria, max_results, max_videos_per_playlist, channel_id)
        
        elif search_type == 'AllChannelVideos':
            if not channel_id:
                raise ValueError("Channel ID is required for 'AllChannelVideos' search type.")
            # For AllChannelVideos, ignore ALL search criteria/filters — fetch every video
            # from the channel's uploads playlist using the reliable uploads-playlist method.
            search_criteria = None
            logger.info("Performing all channel videos search via uploads playlist")
            videos = get_all_channel_videos(channel_id, max_results)
        
        else:
            raise ValueError(f"Invalid search_type '{search_type}'. Must be 'Videos'/'Playlists'/'AllChannelVideos'.")

        # Fetch detailed video information
        logger.info("Fetching video details")
        video_details = get_video_details(videos)

        # Process and return the video details along with quota usage
        logger.info("Processing video details")
        return process_video_details(video_details, channel_id, search_criteria, search_type), thread_local.quota_usage

    except ValueError as ve:
        logger.exception(f"ValueError: {ve}")
        raise
    except HttpError as he:
        logger.exception(f"HttpError: {he}")
        if he.resp.status == 403 and 'quotaExceeded' in str(he):
            logger.exception(f"Quota exceeded")
        raise
    except Exception as e:
        logger.exception(f"Unexpected error: {e}")
        raise

# Example usage
'''
if __name__ == "__main__":
    
    max_results = 10
    max_videos_per_playlist = 10
    TicTacLearn Hindi
    youtube.com/channel/UCkxfbAky2v0yXuKWOKCDy1w

    TicTacLearn English
    youtube.com/channel/UCgc6BPchRUum7ASV2QPGw2g

    TicTacLearn Marathi
    youtube.com/channel/UCHB-eNXs4hJB__1G1lOR38Q

    TicTacLearn Telugu
    youtube.com/channel/UC9qLJLs51f97UVwzU19glnA

    TicTacLearn Odia
    youtube.com/channel/UCl8_6ZBA370fTg2w6gGS6Mw

    #channel_name = 'UC3HS6gQ79jjn4xHxogw0HiA'  # Specify a channel ID if you want to limit the search to a specific channel, or leave as None for a general search
    #channel_name = 'MagnetBrainsEducation'
    #channel_name = 'TicTacLearnEnglish'
    #channel_name = 'TicTacLearnHindi'
    #channel_name = 'bcpteteach7461'

    
    # Example invocation searching all videos on particular channel
    # get_video_metadata(search_type, search_criteria, 10, max_videos_per_playlist, channel_name)

    search_type = 'AllChannelVideos'
    #search_type = 'Playlists'
    #search_type = 'Videos'
    #channel_name = 'MagnetBrainsEducation'
    # Example invocation without specifying a channel (general search)
    #get_video_metadata(search_type, search_criteria, max_results, max_videos_per_playlist)
    search_criteria_pass = 'class 9 maths'
    # Example invocation specifying a channel ID (channel-specific search)
    
    license = 'creativeCommon'
    #license = None

    #duration = 'medium'
    duration = None
    
    channel_name = 'TicTacLearnTelugu'
    #channel_name = 'Doubtnut'
    #channel_name = 'MagnetBrainsEducation'
    #channel_name = None 
    try:
        df, qsu = get_video_metadata(search_type, search_criteria_pass,50, 1, channel_name,license,duration)
        print(f"quota used = {qsu}")
    except HttpError as e:
        if e.resp.status == 403 and 'quotaExceeded' in str(e):
            print(f"Quota exceeded")
    except Exception as e:
        print(f"unhandled exception = {e}")


    #search_type = 'Videos'
    # Example invocation without specifying a channel (general search)
    #get_video_metadata(search_type, search_criteria, max_results, max_videos_per_playlist)
    
    # Example invocation specifying a channel ID (channel-specific search)
    #get_video_metadata(search_type, search_criteria, max_results, max_videos_per_playlist, channel_name)
'''