o
    ii.                     @   s  d dl mZ d dlmZ d dlZd dlZd dlZd dlm	Z	 d dl
Z
d dlmZ d dlmZ d dlmZ d dlZd dlZd dlZe Zejejdd	 eeZejejed
Ze edZ!e"e!Z#W d   n1 sqw   Y  edde#d dZ$ze
j%&d W n e'y   e
(d Y nw ze
j%&d W n e'y   e
(d Y nw dd Z)e) Z*dd Z+dd Z,dd Z-d:ddZ.dd Z/d d! Z0d"d# Z1d$d% Z2d&d' Z3d(d) Z4d;d+d,Z5d<d/d0Z6d1d2 Z7d:d3d4Z8d5d6 Z9d=d8d9Z:dS )>    )build)parse_durationN)fuzz)PorterStemmer)	stopwords)	HttpErrorz4%(asctime)s - %(name)s - %(levelname)s - %(message)s)levelformatzApiConfig.jsonryoutubev3API_KEY)developerKeyztokenizers/punktpunktzcorpora/stopwordsr   c                  C   sL   t jt jtd} t| }t|W  d    S 1 sw   Y  d S )NzQuotaConfig.json)ospathjoindirname__file__openjsonload)quota_config_pathquota_config_file r   */var/www/edux/Edux_v2/service/VideoCall.pyload_quota_config'   s   
$r   c                   C   s
   dt _d S )Nr   )thread_localquota_usager   r   r   r   init_thread_local.   s   
r   c                 C   s   t  jt|  d 7  _d S )Ncost)r   r   quota_config)
api_methodr   r   r   update_quota1   s   r#   c                 C   s   |   } d|  d} | S )z
    Modifies the search criteria by adding appropriate keywords for YouTube API searches.

    Args:
        search_criteria (str): The original search criteria.

    Returns:
        str: Modified search criteria with additional keywords.
    ")stripsearch_criteriar   r   r   modify_search_criteria4   s   r(   c                    s^   t   ttd fdd}|| }||}| d|r#d|nd }t||}|S )aL  
    Calculates a fuzzy match score based on the presence of search criteria keywords in the video title and tags.
    
    :param search_criteria: The search query string.
    :param video_title: The title of the video.
    :param video_tags: A list of tags associated with the video.
    :return: Match score as a percentage.
    englishc                    s*   |    } fdd|D }d|S )Nc                    s   g | ]}|vr  |qS r   )stem.0wordstemmer
stop_wordsr   r   
<listcomp>R       zHcalculate_fuzzy_match_score.<locals>.preprocess_text.<locals>.<listcomp> )lowersplitr   )textwordsstemmed_wordsr.   r   r   preprocess_textP   s   
z4calculate_fuzzy_match_score.<locals>.preprocess_textr3    )r   setr   r7   r   r   token_set_ratio)r'   video_title
video_tagsr9   search_criteria_stemmedvideo_title_stemmed
video_infomatch_scorer   r.   r   calculate_fuzzy_match_scoreD   s   	rC   c                 C   s>   t  jd| d }td |dg r|dg d S i S )z7
    Fetches channel information from YouTube API.
    zsnippet,statisticspartidchannels.listitemsr   )r   channelslistexecuter#   get)
channel_idchannel_responser   r   r   fetch_channel_info`   s    rO   c                 C   s   i d| d|ddddddddddd	dd
ddddddddddddddddd||ddddg dddd
gS )z,
    Creates header row for Excel file.
    Search CriteriaSearch Type%Matchr:   Full Playlist NameSplit Playlist1Split Playlist2Split Playlist3Split Playlist4Split Playlist5
Full TitleSplit Title1Split Title2Split Title3Split Title4Split Title5
Video LinkPublished At)
Channel TitleChannel Total VideosLengthViewsLikesCommentsTagsDescriptionVideo LanguageLicenser   )r'   search_typechannel_titletotal_videosr   r   r   create_header_rowk   s^   	
rn   c                 C   s  | d }| d }| d }| d }	| d }
|d }| dd}| dd}|d	gd
t|   |d	gd
t|   |d }|d }|d }|	d }|dd}|dd}|dd}|dd	}|dg }|
dd	}|dd	}t|}t| d }t| d }d| }|du rt|}|di dd}|di dd}i d|d|d|d |d! d"|d# d$|d% d&|d' d(|d d)|d*|d! d+|d# d,|d% d-|d' d.|d d/|d0|d1|||| d2|d3|||||||d4
S )5z*
    Processes a single video detail.
    rF   snippet
statisticscontentDetailsstatustitle|   r:      publishedAt	channelIdchannelTitleduration	viewCount0	likeCountcommentCountdescriptiontagslicensedefaultAudioLanguage<   z https://www.youtube.com/watch?v=NUnknown Channel
videoCountUnknownrP   rQ   rS   rT   r   rU      rV      rW      rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   :02d)
z
Channel IDrb   rc   rd   re   rf   rg   rh   ri   rj   )r5   extendlenrL   r   inttotal_secondsrO   )video_detail
group_namerM   rm   r'   rk   video_idro   rp   content_detailsrr   r=   title_segmentsplaylist_segmentsvideo_published_atvideo_channel_idvideo_channel_titlevideo_lengthvideo_view_countvideo_like_countvideo_comment_countr   r   r   languagerz   minutesseconds
video_linkchannel_info_cachedrl   r   r   r   process_single_video   s   
	
r   c                    s  j r
td S fdd |dkrKdjv rId  fddd< jdd	d
jddj }||	d|
|	d | S |dkrTdjv rdjv rt| d fddd< i | D ]\}}dd t|D |< qsjfddddd<  fdd| D }d  fddd< dd   }d |d< d |d< dd }	j|	ddd< jg dg dd
jddddg}
fd d!|
D }|rj|d"j }d|v rTd|v rTd|v rTd|v rTd|v rT||	d|
|	d ||	d|
|	d ||	d|
|	d ||	d|
|	d | S )#a  
    Processes the DataFrame based on the search type.

    Args:
        df (pandas.DataFrame): The DataFrame containing video details.
        search_type (str): Type of search ('Videos' or 'Playlists').
        grouped_video_details (dict): Dictionary of video details grouped by playlist.
        search_criteria (str): The criteria used for searching videos.

    Returns:
        pandas.DataFrame: The processed DataFrame.
    z6No videos passed filtering - returning empty DataFramec              
      s   z=t | tr
|  sW dS |   r sW dS dd  D }|s(W dS t fdd|D }|t| d }|W S  ty\ } ztd|  dt|  W Y d }~dS d }~ww )	Nr   c                 S   s(   g | ]}t | d kr|  qS )r   )r   r%   r4   )r,   wr   r   r   r1      s   ( zOprocess_based_on_search_type.<locals>.calculate_match_score.<locals>.<listcomp>c                 3   s    | ]	}| v rd V  qdS )r   Nr   r+   
text_lowerr   r   	<genexpr>   s    zNprocess_based_on_search_type.<locals>.calculate_match_score.<locals>.<genexpr>d   z#Error calculating match score for 'z': )	
isinstancestrr%   r4   r5   sumr   	Exceptionprint)r6   search_wordsmatchesmatch_percentageer&   r   r   calculate_match_score   s"   z;process_based_on_search_type.<locals>.calculate_match_scoreVideosrY   c                        | S Nr   xr   r   r   <lambda>      z.process_based_on_search_type.<locals>.<lambda>rR   F)by	ascendingT)droprS   	Playlistsc                    s
     | S r   )indexr   )original_playlist_orderr   r   r     s   
 Original Playlist Orderc                 S   s   i | ]\}}|d  d |qS )ro   rs   r   )r,   ivideor   r   r   
<dictcomp>  r2   z0process_based_on_search_type.<locals>.<dictcomp>c                    s    | d  | d  S )NrS   rY   r   row)video_ordersr   r   r     s    r   )axisOriginal Video Orderc                    s   i | ]}| |qS r   r   )r,   playlistr   r   r   r     s    c                    r   r   r   r   r   r   r   r     r   zIndividual Video %MatchAverage Video %MatchPlaylist %Matchc                 S   s,   | d dkr
| d S | d d | d d  S )Nr   r   g      ?r   r   r   r   r   r   calculate_combined_score!  s   z>process_based_on_search_type.<locals>.calculate_combined_score)rR   r   r   )FTTc                    s   g | ]	}| j v r|qS r   columns)r,   col)dfr   r   r1   /  s    z0process_based_on_search_type.<locals>.<listcomp>r   )emptyloggerwarningr   applysort_valuesreset_indextolistinsertr   poprJ   keysrH   	enumerategroupbymeanto_dictmapr   )r   rk   grouped_video_detailsr'   r   r   videosplaylist_scoresavg_video_scoresr   temp_columnsexisting_temp_columnsr   )r   r   r   r'   r   r   process_based_on_search_type   sX   


-


2r   c                 C   st   d}t j|dd}| j|ddd |jd }| jd}|  D ]\}}|d }|j|d |||d	 q |  |S )
z3
    Writes processed DataFrame to Excel file.
    video_details.xlsx
xlsxwriterengineFSheet1r   
sheet_namer_   r   )string)	pdExcelWriterto_excelsheetsr   get_lociterrows	write_urlclose)r   excel_file_pathwriter	worksheetvideo_link_col_idxidxr   r   r   r   r   write_excel_file=  s   
r   c                 C   s  g }d}d}d}|dur$t |}|di dd}|di dd}| sGt||||}t|}tjd	d
d}	|j|	ddd |	  |S d}
d}|  D ]\}}|D ]}|
d7 }
|r|di }|dd}|dd}|d | 	 }t
|	 |}|dk rtd| d|  qU|d7 }z|t|||||| W qU ty } z|jjdkrdt|v r W Y d}~qUd}~w ty } ztd|  W Y d}~qUd}~ww qO|r| rtd| d|
 d| d n	td| d t|}t||| |}t| |S )aA  
    Processes video details from a dictionary of video IDs grouped by playlist.

    Args:
        grouped_video_details (dict): A dictionary where keys are playlist titles and values are lists of video details dictionaries.
        channel_id (str): The ID of the YouTube channel.
        search_criteria (str): The criteria used for searching videos.
        search_type (str): Type of search ('Videos' or 'Playlists').

    Returns:
        pandas.DataFrame: A DataFrame containing video details.

    Raises:
        HttpError: If there's an error in the API request.
    Nr:   ro   rs   r   rp   r   r   r   r   r   Fr   r   r   r   r   r3   r   zFiltered out video: 'z' - fuzzy score   quotaExceededz$Unexpected error processing video - zSmart keyword filtering: kept z out of z videos for: ''z)No keyword filtering applied - processed z videos)rO   rL   rn   r   	DataFramer   r   r   rH   r4   r   r<   r   debugappendr   r   resprr   r   r   	exceptionr%   infor   r   )r   rM   r'   rk   video_details_listr   rl   rm   r   r   original_count
kept_countr   video_detailsr   ro   rs   r   r6   scoreher   r   r   r   process_video_detailsO  sd   
 
r  2   c           
      C   s  t d| d|   g }z|rjt d d}|du s!t||k rit jd|r1td|t| ndt| dd||||d		 }t	d
 |
dg }|sMn|dd |D  |
d}|s_n
|du s!t||k s!nXt d d}|du s{t||k rt jd|rtd|t| ndt| dd|||d }t	d
 |
dg }|sn|dd |D  |
d}|sn
|du s{t||k s{t dt| d |si W S d|du r|n|d| iW S  ty }	 z	t d|	   d}	~	ww )u   
    Updated video discovery:
    - If channel_id provided → fetch from channel uploads playlist (reliable)
    - Else → fallback to search API (original behavior)
    Returns SAME format: {"Video Without Playlist": [ids]}
    z$Starting video discovery | Channel: z | Criteria: zHUsing Search API with channelId filter for channel-specific video searchNro   r  r   	relevance)	rE   
maxResultsqtypeorderrx   videoLicensevideoDuration	pageTokensearch.listrH   c                 S      g | ]}|d  d qS rF   videoIdr   r,   itemr   r   r   r1         z-get_video_ids_only_videos.<locals>.<listcomp>nextPageTokenz%No channel provided, using search API)rE   r  r  r  r  r  r  r  c                 S   r  r  r   r  r   r   r   r1     r  zDiscovered z
 video IDszVideo Without PlaylistzError during video discovery: )r   r  r   r   searchrJ   minr(   rK   r#   rL   r   r   r  )
r'   max_resultsrM   r  r  all_video_idsnext_page_tokensearch_responserH   r   r   r   r   get_video_ids_only_videos  sz   



	
(r$  r   
   c              	   C   s  t d|   dd }ddd}d}g }d}i }	t d| d	 	 t d|  |||| }
|
d }|
d }|D ];}|d }|d }t d|  |||}||	|< |t|7 }t dt| d| d|  |durv||krv nq;|r|dur||krt d| d 	 |	S q#)a  
    Retrieves video IDs from playlists based on the given search criteria and parameters.

    Args:
        search_criteria (str): The search query string.
        max_videos_total (int, optional): Maximum total number of videos to fetch. Defaults to 100.
        max_videos_per_playlist (int, optional): Maximum number of videos to fetch per playlist. Defaults to 10.
        channel_id (str, optional): Optional parameter to restrict searches to a specific channel. Defaults to None.

    Returns:
        dict: A dictionary where keys are playlist titles and values are lists of video IDs.

    Raises:
        HttpError: If there's an error in the API request.
    z4Starting playlist video ID retrieval with criteria: c                 S   s   t d|   td|}g }d}	 t jd|| |d }td dd	 |d
g D }|	| |d}|r@t
||krAnqt dt
| d|   |S )a%  
        Fetches video IDs from a specific playlist.

        Args:
            playlist_id (str): The ID of the playlist.
            max_videos_per_playlist (int): Maximum number of videos to fetch per playlist.

        Returns:
            list: List of video IDs in the playlist.
        zFetching videos from playlist: r  NTro   )rE   r  
playlistIdr  playlistItems.listc                 S   s   g | ]
}|d  d d qS )ro   
resourceIdr  r   r  r   r   r   r1   .      zUget_video_ids_playlist_videos.<locals>.fetch_videos_from_playlist.<locals>.<listcomp>rH   r  Fetched  videos from playlist: )r   r  r  r   playlistItemsrJ   rK   r#   rL   r   r   )playlist_idmax_videos_per_playlistmax_results_per_requestr!  
page_tokenplaylist_items_responsevideo_ids_in_playlistr   r   r   fetch_videos_from_playlist  s,   


zAget_video_ids_playlist_videos.<locals>.fetch_videos_from_playlistNc              	   S   s   t d| d| d|   |dur=t jddt|dd|| d	 }td
 |dg }dd |D }||ddS t	 jd|d| d }td |dg }dd |D }||ddS )a  
        Fetches playlists based on the given criteria.

        Args:
            next_page_token (str, optional): Token for pagination. Defaults to None.
            channel_id (str, optional): Channel ID to restrict searches. Defaults to None.
            search_criteria (str, optional): Search query string. Defaults to None.

        Returns:
            dict: Dictionary containing playlists and next page token.
        z"Fetching playlists with criteria: z, channel_id: z, page_token: Nro   r  r   r  )rE   r  r  r  r  rx   r  r  rH   c                 S   s&   g | ]}|d  d |d d dqS )rF   r&  ro   rs   rF   rs   r   r  r   r   r   r1   X  s   & zJget_video_ids_playlist_videos.<locals>.fetch_playlists.<locals>.<listcomp>r  )	playlistsr"  )rE   rx   r  r  zplaylist.listc                 S   s"   g | ]}|d  |d d dqS )rF   ro   rs   r4  r   r  r   r   r   r1   f  s   " )
r   r  r   r  rJ   r(   rK   r#   rL   r5  )r"  rM   r'   r#  r5  custom_playlistsplaylist_responser   r   r   fetch_playlists;  s:   	z6get_video_ids_playlist_videos.<locals>.fetch_playlistsr   z5Starting main loop to fetch videos (max_videos_total=)TzFetching playlists with token: r"  r5  rF   rs   zProcessing playlist: r*  r+  z. Total fetched: zCompleted fetching 'z' playlist videos)NNN)r   r  r  r   )r'   max_videos_totalr.  rM   r3  r8  total_videos_fetchedall_playlistsr"  playlist_videosplaylistandtokenr  r-  playlist_titler2  r   r   r   get_video_ids_playlist_videos  s:   
*.
 r@  c              	      s~  t d g  t }|  D ]}|D ]}||vr# | || qt d| d qq fddtdt dD }t dt| d	 g }|D ];}d
	|}t 
d|dd  d t jd|d }td ||dg  t 
dt|dg  d qLi }	|  D ]\}
fdd|D |	|
< t 
dt|	|
  d|
  qt dt| dt|	 d |	S )a  
    Processes video details from a dictionary of video IDs grouped by playlist.

    Args:
        videos (dict): A dictionary where keys are playlist titles and values are lists of video IDs.

    Returns:
        dict: A dictionary where keys are playlist titles and values are lists of video detail dictionaries.

    Raises:
        HttpError: If there's an error in the API request.
    z!Starting video details processingz
Video id 'z' already processedc                    s   g | ]
} ||d   qS )r  r   )r,   r   )r!  r   r   r1     r)  z%get_video_details.<locals>.<listcomp>r   r  zCreated z chunks of video IDs,zFetching details for chunk: Nz...z(snippet,statistics,contentDetails,statusrD   zvideos.listrH   r*  z video detailsc                    s   g | ]
}|d   v r|qS )rF   r   )r,   detail)	video_idsr   r   r1     r)  zGrouped z videos for playlist: z
Processed z video details across z
 playlists)r   r  r;   valuesr  addr   ranger   r   r  r   r   rJ   rK   r#   r   rL   rH   )r   processed_video_idsr=  r   all_video_ids_chunksr  chunk	chunk_idsvideo_details_responser   r?  r   )r!  rC  r   get_video_details  s>   


   rL  c                 C   sR  t d|  d| d t jd| d }td |dg }|s-t d|   i S |d	 d d
 d }t d|  g }d}	 |rL|t	| nd}t
d|}t jd|||d }	td |	dg D ]}
||
d d  qjt dt	| d |rt	||krt d|  n	|	d}|snqDt dt	|  d|iS )ac  
    Retrieves video IDs from a YouTube channel via its uploads playlist.
    No search criteria or filters are applied.

    Args:
        channel_id (str): The YouTube channel ID.
        max_results (int, optional): Maximum number of videos to fetch. If None, fetches all videos.

    Returns:
        dict: {"All Channel Videos": [video_id, ...]}
    zFetching videos for channel: z# via uploads playlist (max_results=r9  rq   rD   rG   rH   z4Channel not found or missing contentDetails for ID: r   relatedPlaylistsuploadszFound uploads playlist ID: NTr  )rE   r&  r  r  r'  r  r*  z videos so far...zReached max_results cap of r  z#Total videos fetched from channel: zAll Channel Videos)r   r  r   rI   rJ   rK   r#   rL   r   r   r  r,  r  )rM   r   rN   rH   uploads_playlist_idr!  r"  	remaining	page_sizer7  r  r   r   r   get_all_channel_videos  sP   

rR  c                 C   s   t d|   t jdd| dd }td d|v r9t|d dkr9|d d d	 d
 }t d|  |S t d|   dS )a  
    Retrieves the channel ID for a given channel name.

    Args:
        channel_name (str): The name of the YouTube channel.

    Returns:
        str or None: The channel ID if found, None otherwise.

    Raises:
        HttpError: If there's an error in the API request.
    zSearching for channel: ro   rv   channel)rE   r  r  r  r  rH   r   rF   rx   zFound channel ID: z!No channels found matching name: N)	r   r  r   r  rJ   rK   r#   r   r   )channel_namer#  rM   r   r   r   get_channel_id_from_name  s    rU    c                 C   s  zt d|  d| d| d| d| d| d|  t  t d |r)t|}| d	kr;t d
 t|||||}n0| dkrLt d t||||}n| dkrc|sVtdd}t d t||}ntd|  dt d t|}t d t	|||| t
jfW S  ty }	 z	t d|	   d}	~	w ty }
 zt d|
  |
jjdkrdt|
v rt d  d}
~
w ty } z	t d|   d}~ww )a  
    Fetches video metadata based on the given search criteria and parameters.

    Args:
        search_type (str): Type of search. Can be 'Videos', 'Playlists', or 'AllChannelVideos'.
        search_criteria (str): The search query string.
        max_results (int, optional): Maximum number of results to return. Defaults to 1000.
        max_videos_per_playlist (int, optional): Maximum number of videos to fetch per playlist. Defaults to 50.
        channel_id (str, optional): Optional parameter to restrict searches to a specific channel. Defaults to None.
        videoLicense (str, optional): Filter videos by license. Defaults to None.
        videoDuration (str, optional): Filter videos by duration. Defaults to None.

    Returns:
        tuple: A tuple containing the processed video details and the quota usage.

    Raises:
        ValueError: If the search_type is invalid or if channel_id is None for 'AllChannelVideos' search type.
        HttpError: If there's an error in the API request.
    z<Starting video metadata retrieval. Parameters:
search_type: z,
search_criteria: z,
max_results: z,
max_videos_per_playlist: z,
channel_id: z,
videoLicense: z,
videoDuration: z3Initialized thread-local storage for quota trackingr   zPerforming video searchr   zPerforming playlist searchAllChannelVideosz:Channel ID is required for 'AllChannelVideos' search type.Nz9Performing all channel videos search via uploads playlistzInvalid search_type 'z3'. Must be 'Videos'/'Playlists'/'AllChannelVideos'.zFetching video detailszProcessing video detailszValueError: zHttpError: r   r   zQuota exceededzUnexpected error: )r   r  r   rU  r$  r@  
ValueErrorrR  rL  r  r   r   r  r   r  rr   r   r   )rk   r'   r   r.  rM   r  r  r   r	  ver  r   r   r   r   get_video_metadata3  sh   







rZ  r   )r  NNN)r   r%  N)rV  r  NNN);googleapiclient.discoveryr   isodater   pandasr   r   r   
fuzzywuzzyr   nltk	nltk.stemr   nltk.corpusr   googleapiclient.errorsr   logging	threadingr   localr   basicConfigINFO	getLogger__name__r   r   r   r   r   api_key_pathr   
api_configr   youtube_configr   datafindLookupErrordownloadr   r!   r   r#   r(   rC   rO   rn   r   r   r   r  r$  r@  rL  rR  rU  rZ  r   r   r   r   <module>   sf    

$Ie
X
W 
?>
'R