o
    i;                  
   @  s0  U d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlmZmZmZmZmZ ddlmZ ddlZddlmZmZmZ ddlmZ dgZd	Zi Zd
ed< dd Z dd Z!dHddZ"ej#fddZ$dIdJddZ%G dd dZ&G dd  d e&Z'G d!d" d"e&Z(G d#d$ d$e(Z)G d%d& d&e(Z*G d'd( d(e(Z+G d)d* d*e&Z,G d+d, d,e(Z-G d-d. d.e&Z.e& Z/d/d0 Z0dKd1d2Z1d3d4 Z2dId5d6Z3d7d8 Z4dId9d:Z5dId;d<Z6d=d> Z7dKd?d@Z8e0e1e2e3e5e7e8e4e6dA	Z9dBdC Z:G dDd dZ;dEdF Z<e=dGkre<  dS dS )Lz
A commandline tool for semi-automatically converting CSV to RDF.

See also https://github.com/RDFLib/pyTARQL in the RDFlib family of tools

try: ``csv2rdf --help``

    )annotationsN)AnyDictListOptionalTuple)quote)RDFRDFS	split_uri)URIRefCSV2RDFa^  
csv2rdf.py     -b <instance-base>     -p <property-base>     [-D <default>]     [-c <classname>]     [-i <identity column(s)>]     [-l <label columns>]     [-s <N>] [-o <output>]     [-f configfile]     [--col<N> <colspec>]     [--prop<N> <property>]     <[-d <delim>]     [-C] [files...]"

Reads csv files from stdin or given files
if -d is given, use this delimiter
if -s is given, skips N lines at the start
Creates a URI from the columns given to -i, or automatically by numbering if
none is given
Outputs RDFS labels from the columns given to -l
if -c is given adds a type triple with the given classname
if -C is given, the class is defined as rdfs:Class
Outputs one RDF triple per column in each row.
Output is in n3 format.
Output is stdout, unless -o is specified

Long options also supported:     --base,     --propbase,     --ident,     --class,     --label,     --out,     --defineclass

Long options --col0, --col1, ...
can be used to specify conversion for columns.
Conversions can be:
    ignore, float(), int(), split(sep, [more]), uri(base, [class]), date(format)

Long options --prop0, --prop1, ...
can be used to use specific properties, rather than ones auto-generated
from the headers

-D sets the default conversion for columns not listed

-f says to read config from a .ini/config file - the file must contain one
section called csv2rdf, with keys like the long options, i.e.:

[csv2rdf]
out=output.n3
base=http://example.org/
col0=split(";")
col1=split(";", uri("http://example.org/things/",
                    "http://xmlns.com/foaf/0.1/Person"))
col2=float()
col3=int()
col4=date("%Y-%b-%d %H:%M:%S")

z*Dict[Any, Tuple[URIRef, Optional[URIRef]]]urisc                 C  sP   t dd| } t dd| } | d} d| d  gdd | d	d
 D  S )zc
    CamelCase + lowercase initial a string


    FIRST_NM => firstNm

    firstNm => firstNm

    z[^\w] z([a-z])([A-Z])z\1 \2 r   c                 S  s   g | ]}|  qS  )
capitalize.0xr   r   O/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/rdflib/tools/csv2rdf.py
<listcomp>m       ztoProperty.<locals>.<listcomp>   N)resubsplitjoinlowerlabelr   r   r   
toProperty`   s   

*r!   c                 C  s0   | dd   s| dd  | dd   S | S )Nr      r   )isupperr   r   r   r   r   toPropertyLabelp   s   r$   l_	List[int]iTuple[int, ...]returnc                   s   t  fdd|D S )zPreturn a set of indexes from a list
    >>> index([1,2,3],(0,2))
    (1, 3)
    c                   s   g | ]} | qS r   r   r   r%   r   r   r   {   r   zindex.<locals>.<listcomp>)tuple)r%   r'   r   r*   r   indexv   s   r,   c                 k  s,    t j| fd|i|}|D ]}|V  qd S )Ndialect)csvreader)csv_datar-   kwargs
csv_readerrowr   r   r   r2   ~   s
   r2   class_Optional[URIRef]c                 C  sD   |rt |t| ddddd }nt | }||ft| < |S )Nutf8r   _r   safe)rdflibr   r   encodereplacer   )r   prefixr4   rr   r   r   	prefixuri   s
   &
r?   c                   @  s   e Zd Zdd ZdddZdS )		NodeMakerc                 C     t jjS N)r:   r
   Literalselfr   r   r   range      zNodeMaker.ranger   r   c                 C  s
   t |S rB   )r:   rC   rE   r   r   r   r   __call__      
zNodeMaker.__call__N)r   r   )__name__
__module____qualname__rF   rI   r   r   r   r   r@      s    r@   c                   @  $   e Zd Zdd Zdd Zdd ZdS )NodeUric                 C  s$   || _ |rt|| _d S d | _d S rB   )r=   r:   r   r4   )rE   r=   r4   r   r   r   __init__   s   
zNodeUri.__init__c                 C  s   t || j| jS rB   )r?   r=   r4   rH   r   r   r   rI         zNodeUri.__call__c                 C  s   | j ptjjS rB   )r4   r:   r	   ResourcerD   r   r   r   rF      s   zNodeUri.rangeNrK   rL   rM   rP   rI   rF   r   r   r   r   rO      s    rO   c                   @  s   e Zd ZdddZdS )NodeLiteralNc                 C  s
   || _ d S rB   f)rE   rV   r   r   r   rP      rJ   zNodeLiteral.__init__rB   )rK   rL   rM   rP   r   r   r   r   rT      s    rT   c                   @     e Zd Zdd Zdd ZdS )	NodeFloatc                 C  :   | j s
tt|S t| j rtt|  |S td)Nz(Function passed to float is not callable)rV   r:   rC   floatcallable	ExceptionrH   r   r   r   rI      
   
zNodeFloat.__call__c                 C  rA   rB   )r:   XSDdoublerD   r   r   r   rF      rG   zNodeFloat.rangeNrK   rL   rM   rI   rF   r   r   r   r   rX          rX   c                   @  rW   )NodeIntc                 C  rY   )Nz&Function passed to int is not callable)rV   r:   rC   intr[   r\   rH   r   r   r   rI      r]   zNodeInt.__call__c                 C  rA   rB   )r:   r^   rc   rD   r   r   r   rF      rG   zNodeInt.rangeNr`   r   r   r   r   rb      ra   rb   c                   @  rW   )NodeBoolc                 C  rY   )Nz'Function passed to bool is not callable)rV   r:   rC   boolr[   r\   rH   r   r   r   rI      r]   zNodeBool.__call__c                 C  rA   rB   )r:   r^   re   rD   r   r   r   rF      rG   zNodeBool.rangeNr`   r   r   r   r   rd      ra   rd   c                   @  rW   )NodeReplacec                 C     || _ || _d S rB   ab)rE   ri   rj   r   r   r   rP         
zNodeReplace.__init__c                 C  s   | | j| jS rB   )r<   ri   rj   rH   r   r   r   rI      rQ   zNodeReplace.__call__N)rK   rL   rM   rP   rI   r   r   r   r   rf      s    rf   c                   @  rW   )NodeDatec                 C  s   t tj|| jS rB   )r:   rC   datetimestrptimerV   rH   r   r   r   rI      s   zNodeDate.__call__c                 C  rA   rB   )r:   r^   dateTimerD   r   r   r   rF      rG   zNodeDate.rangeNr`   r   r   r   r   rl      s    rl   c                   @  rN   )	NodeSplitc                 C  rg   rB   seprV   )rE   rr   rV   r   r   r   rP      rk   zNodeSplit.__init__c                   s:    j stj _ t j std fdd| jD S )Nz)Function passed to split is not callable!c                   s&   g | ]}|  d kr |  qS )r   )striprV   )r   yrD   r   r   r      s   & z&NodeSplit.__call__.<locals>.<listcomp>)rV   r:   rC   r[   r\   r   rr   rH   r   rD   r   rI      s
   
zNodeSplit.__call__c                 C  s&   | j rt| j tr| j  S t| S rB   )rV   
isinstancer@   rF   rD   r   r   r   rF      s   

zNodeSplit.rangeNrS   r   r   r   r   rp      s    rp   c                  O  s   dS )Nignorer   )argsr1   r   r   r   _config_ignore   s   rx   c                 C  
   t | |S rB   )rO   )r=   r4   r   r   r   _config_uri   rJ   rz   c                   C  s   t  S rB   )rT   r   r   r   r   _config_literal   s   r{   c                 C     t | S rB   )rX   rU   r   r   r   _config_float  rG   r}   c                 C  ry   rB   )rf   rh   r   r   r   _config_replace  rJ   r~   c                 C  r|   rB   )rb   rU   r   r   r   _config_int
  rG   r   c                 C  r|   rB   )rd   rU   r   r   r   _config_bool  rG   r   c                 C  r|   rB   )rl   )format_r   r   r   _config_date  rG   r   c                 C  ry   rB   )rp   rq   r   r   r   _config_split  rJ   r   )	rv   uriliteralrZ   rc   dater   r<   re   c                 C  s
   t | tS )z$Return a function for column mapping)evalconfig_functions)vr   r   r   column'  s   
r   c                   @  rN   )r   c                 C  sT   d | _ d | _d | _d| _d | _d| _d| _d| _d | _i | _	i | _
tj| _d| _d S )NautoFr   ,)CLASSBASEPROPBASEIDENTLABELDEFINECLASSSKIPDELIMDEFAULTCOLUMNSPROPSsysstdoutOUTtriplesrD   r   r   r   rP   .  s   
zCSV2RDF.__init__c                 C  s4   | j d| | | f  |  jd7  _d S )Nz%s %s %s .
r   )r   writen3r   )rE   spor   r   r   triple@  s   "zCSV2RDF.triplec                   s0  t   } jrtjd jj   jdkr!t jts! jf _ j	s/t
d td _	 js=t
d td _t jD ]}t| qBtt|}tt fdd|D } j D ]\}}|||< t|d	 ||< qa jrӈ  jtjtj tt|D ]M}|| || }	}
|	d
ks|
d
krq j !| j"dkrq |	tjtj#  |	tj$t%t&|
  |	tj' j  |	tj j !|t(  qd}|D ]}
zψ jdkr j	d|  }n j	d)dd t*|
 jD  } j+r |tj$t%d)t*|
 j+  jr |tj j t|
D ]l\}}|, }|d
kr j !| j"dkr9qz, j !|tj%|}t|trZ|D ]} ||| | qLn	 ||| | W q t-y } zt
dd||| ||j.f   W Y d }~qd }~ww q|d	7 }|d dkrtjd| j/t   | f  W q t-y   tjd|   w t0 }t1 D ])\}
}|\}} |tj$t%|
 |rt2|}|3|  |tj| q|D ]} |tjtj q j4  tjd| j/f  tjdt   |   d S )NzOutput to %s
r   z2No base given, using http://example.org/instances/zhttp://example.org/instances/z:No property base given, using http://example.org/property/zhttp://example.org/props/c                   s   g | ]	} j t| qS r   )r   r!   r   rD   r   r   r   [  s    z#CSV2RDF.convert.<locals>.<listcomp>r   r   rv   r   z%dr7   c                 S  s&   g | ]}t |d ddddqS )r6   r   r7   r   r8   )r   r;   r<   r   r   r   r   r   y  s    r   z#Could not process value for column z%d:%s in row %d, ignoring: %s i z$%d rows, %d triples, elapsed %.2fs.
zError processing line: %d
z#Converted %d rows into %d triples.
zTook %.2f seconds.
)5timer   r   stderrr   namer   ru   r+   r   warningswarnr:   	Namespacer   rF   r   nextlistdict	enumerater   itemsr   r   r   r   r	   typer
   Classlenr   getr   Propertyr    rC   r$   domaindefault_node_maker   r,   r   rs   r\   messager   setr   r   addclose)rE   	csvreaderstartr   header_labelsheaderskr   r'   hr%   rowsr   r   _oeclassesucr   rD   r   convertD  s   





	



zCSV2RDF.convertN)rK   rL   rM   rP   r   r   r   r   r   r   r   -  s    c                  C  s  t  } ttjdd  dg d\}}t|}d|v sd|v r(tt td d|v rt	 }|
t|d  |dD ]\}}|d	krOt|d
d| _q>|dkrZt|| _q>|dkret|| _q>|dkrpt|| _q>|dkrzt|| _q>|dkrt|| _q>|dkrt|| _q>|dkr|| _q>|dkrt|| _q>|dkrt|| _q>|drt|| j t|dd  < q>|drt|| j!t|dd  < q>d|v rt|d d
d| _d|v rt|d d
d| _d|v rt|d | _d|v rt|d | _d|v r|d | _d|v r|d | _d|v r&t|d | _d |v r2t|d  | _d!|v r?t|d! | _d"|v rLt|d" | _d#|v rXt|d# | _d$|v rdt|d$ | _d%|v rpt|d% | _d&|v r|t|d& | _d'|v rt|d' | _d(|v rt|d( | _d)|v rt|d) | _d*|v rt|d* | _| D ].\}}|d+rt|| j t|d,d  < q|d-rt|| j!t|d.d  < q| jrd/|v sd0|v rd1| _| "t#t$%|| jd2 d S )3Nr   zhc:b:p:i:o:Cf:l:s:d:D:)
zout=zbase=zdelim=z	propbase=zclass=zdefault=ident=zlabel=zskip=defineclasshelpz-hz--helpz-fcsv2rdfoutwzutf-8basepropbaseclassr   identr    delimskipdefaultcol   prop   z-oz--outz-bz--basez-dz--delimz-Dz	--defaultz-pz
--propbasez-lz--labelz-iz--identz-sz--skipz-cz--classz--col   z--prop   z-Cz--defineclassT)	delimiter)&r   getoptr   argvr   printHELPexitconfigparserConfigParserreadfpopenr   codecsr   r:   r   r   r   r   r   re   r   r   r   r   r   rc   r   r   r   
startswithr   r   r   r2   	fileinputinput)r   optsfilesconfigr   r   r   r   r   main  s   



















r   __main__)r%   r&   r'   r(   r)   r(   rB   )r4   r5   )NN)>__doc__
__future__r   r   r   r.   rm   r   r   r   r   r   r   typingr   r   r   r   r   urllib.parser   r:   rdflib.namespacer	   r
   r   rdflib.termr   __all__r   r   __annotations__r!   r$   r,   excelr2   r?   r@   rO   rT   rX   rb   rd   rf   rl   rp   r   rx   rz   r{   r}   r~   r   r   r   r   r   r   r   r   rK   r   r   r   r   <module>   sz    ?
	




 
o
