
    4Ysh9:              
       6   d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlmZ ddlZddlmZmZ ddlmZ dgZdZi Zd Zd	 Zd
 Zej0                  fdZd-dZ G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z  G d de      Z! G d de      Z" G d de      Z# G d de      Z$ e       Z%d Z&d.d Z'd! Z(d-d"Z)d# Z*d-d$Z+d-d%Z,d& Z-d.d'Z.e&e'e(e)e+e-e.e*e,d(	Z/d) Z0 G d* de      Z1d+ Z2e3d,k(  r e2        yy)/z
A commandline tool for semi-automatically converting CSV to RDF.

See also https://github.com/RDFLib/pyTARQL in the RDFlib family of tools

try: ``csv2rdf --help``

    N)quote)RDFRDFS)	split_uriCSV2RDFa^  
csv2rdf.py     -b <instance-base>     -p <property-base>     [-D <default>]     [-c <classname>]     [-i <identity column(s)>]     [-l <label columns>]     [-s <N>] [-o <output>]     [-f configfile]     [--col<N> <colspec>]     [--prop<N> <property>]     <[-d <delim>]     [-C] [files...]"

Reads csv files from stdin or given files
if -d is given, use this delimiter
if -s is given, skips N lines at the start
Creates a URI from the columns given to -i, or automatically by numbering if
none is given
Outputs RDFS labels from the columns given to -l
if -c is given adds a type triple with the given classname
if -C is given, the class is defined as rdfs:Class
Outputs one RDF triple per column in each row.
Output is in n3 format.
Output is stdout, unless -o is specified

Long options also supported:     --base,     --propbase,     --ident,     --class,     --label,     --out,     --defineclass

Long options --col0, --col1, ...
can be used to specify conversion for columns.
Conversions can be:
    ignore, float(), int(), split(sep, [more]), uri(base, [class]), date(format)

Long options --prop0, --prop1, ...
can be used to use specific properties, rather than ones auto-generated
from the headers

-D sets the default conversion for columns not listed

-f says to read config from a .ini/config file - the file must contain one
section called csv2rdf, with keys like the long options, i.e.:

[csv2rdf]
out=output.n3
base=http://example.org/
col0=split(";")
col1=split(";", uri("http://example.org/things/",
                    "http://xmlns.com/foaf/0.1/Person"))
col2=float()
col3=int()
col4=date("%Y-%b-%d %H:%M:%S")

c                    t        j                  dd|       } t        j                  dd|       } | j                  d      } dj                  | d   j	                         g| dd D cg c]  }|j                          c}z         S c c}w )	zc
    CamelCase + lowercase initial a string


    FIRST_NM => firstNm

    firstNm => firstNm

    z[^\w] z([a-z])([A-Z])z\1 \2 r      N)resubsplitjoinlower
capitalize)labelxs     t/var/www/sten-cake5-migrate2.hellocrow.space/lexinfo-master/env/lib/python3.12/site-packages/rdflib/tools/csv2rdf.py
toPropertyr   _   ss     FF8S%(EFF#Y6EKKE77E!HNN$%qr(KA(KKLL(Ks   %B
c                 ^    | dd j                         s| dd j                         | dd  z   S | S )Nr      r   )isupperr   )r   s    r   toPropertyLabelr   o   s9    1:Qqz!E!"I--L    c                 D    t        |D cg c]  }| |   	 c}      S c c}w )zPreturn a set of indexes from a list
    >>> index([1,2,3],(0,2))
    (1, 3)
    )tuple)l_ir   s      r   indexr   u   s!    
 #A"Q%#$$#s   c              +   R   K   t        j                  | fd|i|}|D ]  }|  y w)Ndialect)csvreader)csv_datar!   kwargs
csv_readerrows        r   r&   r&   }   s2     H@g@@J 	s   %'c           	          |rCt        j                  |t        | j                  d      j	                  dd      d      z         }nt        j                  |       }||ft
        | <   |S )Nutf8r	   _r
   safe)rdflibURIRefr   encodereplaceuris)r   prefixclass_rs       r   	prefixurir5      sV    MM&5&)9)A)A#s)KRT#UUVMM!&kDGHr   c                       e Zd Zd Zd Zy)	NodeMakerc                 6    t         j                  j                  S N)r-   r   Literalselfs    r   rangezNodeMaker.range   s    {{"""r   c                 ,    t        j                  |      S r9   )r-   r:   r<   r   s     r   __call__zNodeMaker.__call__   s    ~~a  r   N)__name__
__module____qualname__r=   r@    r   r   r7   r7      s    #!r   r7   c                       e Zd Zd Zd Zd Zy)NodeUric                 Z    || _         |rt        j                  |      | _        y d | _        y r9   )r2   r-   r.   r3   )r<   r2   r3   s      r   __init__zNodeUri.__init__   s#     --/DKDKr   c                 D    t        || j                  | j                        S r9   )r5   r2   r3   r?   s     r   r@   zNodeUri.__call__   s    DKK55r   c                 R    | j                   xs t        j                  j                  S r9   )r3   r-   r   Resourcer;   s    r   r=   zNodeUri.range   s    {{1fjj111r   NrA   rB   rC   rH   r@   r=   rD   r   r   rF   rF      s    62r   rF   c                       e Zd ZddZy)NodeLiteralNc                     || _         y r9   f)r<   rQ   s     r   rH   zNodeLiteral.__init__   s	    r   r9   )rA   rB   rC   rH   rD   r   r   rN   rN      s    r   rN   c                       e Zd Zd Zd Zy)	NodeFloatc                     | j                   st        j                  t        |            S t	        | j                         r-t        j                  t        | j                  |                  S t        d      )Nz(Function passed to float is not callable)rQ   r-   r:   floatcallable	Exceptionr?   s     r   r@   zNodeFloat.__call__   sP    vv>>%(++DFF>>%q	"233BCCr   c                 6    t         j                  j                  S r9   )r-   XSDdoubler;   s    r   r=   zNodeFloat.range   s    zz   r   NrA   rB   rC   r@   r=   rD   r   r   rS   rS      s    D!r   rS   c                       e Zd Zd Zd Zy)NodeIntc                     | j                   st        j                  t        |            S t	        | j                         r-t        j                  t        | j                  |                  S t        d      )Nz&Function passed to int is not callable)rQ   r-   r:   intrV   rW   r?   s     r   r@   zNodeInt.__call__   sO    vv>>#a&))DFF>>#dffQi.11@AAr   c                 6    t         j                  j                  S r9   )r-   rY   r_   r;   s    r   r=   zNodeInt.range   s    zz~~r   Nr[   rD   r   r   r]   r]      s    Br   r]   c                       e Zd Zd Zd Zy)NodeBoolc                     | j                   st        j                  t        |            S t	        | j                         r-t        j                  t        | j                  |                  S t        d      )Nz'Function passed to bool is not callable)rQ   r-   r:   boolrV   rW   r?   s     r   r@   zNodeBool.__call__   sO    vv>>$q'**DFF>>$tvvay/22ABBr   c                 6    t         j                  j                  S r9   )r-   rY   rd   r;   s    r   r=   zNodeBool.range   s    zzr   Nr[   rD   r   r   rb   rb      s    Cr   rb   c                       e Zd Zd Zd Zy)NodeReplacec                      || _         || _        y r9   ab)r<   rj   rk   s      r   rH   zNodeReplace.__init__   s    r   c                 N    |j                  | j                  | j                        S r9   )r0   rj   rk   r?   s     r   r@   zNodeReplace.__call__   s    yy((r   N)rA   rB   rC   rH   r@   rD   r   r   rg   rg      s    )r   rg   c                       e Zd Zd Zd Zy)NodeDatec                 |    t        j                  t        j                  j                  || j                              S r9   )r-   r:   datetimestrptimerQ   r?   s     r   r@   zNodeDate.__call__   s(    ~~h//88DFFCDDr   c                 6    t         j                  j                  S r9   )r-   rY   dateTimer;   s    r   r=   zNodeDate.range   s    zz"""r   Nr[   rD   r   r   rn   rn      s    E#r   rn   c                       e Zd Zd Zd Zd Zy)	NodeSplitc                      || _         || _        y r9   seprQ   )r<   rx   rQ   s      r   rH   zNodeSplit.__init__   s    r   c                 B   | j                   st        j                  | _         t        | j                         st	        d      |j                  | j                        D cg c]5  }|j                         dk7  s| j                  |j                               7 c}S c c}w )Nz)Function passed to split is not callable!r
   )rQ   r-   r:   rV   rW   r   rx   strip)r<   r   ys      r   r@   zNodeSplit.__call__   sh    vv^^DFGHH+,77488+<Pa	Rqwwy!PPPs   B8!Bc                     | j                   r4t        | j                   t              r| j                   j                         S t        j                  |       S r9   )rQ   
isinstancer7   r=   r;   s    r   r=   zNodeSplit.range   s5    66j366<<>!t$$r   NrL   rD   r   r   ru   ru      s    Q%r   ru   c                       y)NignorerD   )argsr%   s     r   _config_ignorer      s    r   c                     t        | |      S r9   )rF   )r2   r3   s     r   _config_urir      s    66""r   c                      t               S r9   )rN   rD   r   r   _config_literalr      s
    =r   c                     t        |       S r9   )rS   rP   s    r   _config_floatr     s    Q<r   c                     t        | |      S r9   )rg   ri   s     r   _config_replacer     s    q!r   c                     t        |       S r9   )r]   rP   s    r   _config_intr   
  s    1:r   c                     t        |       S r9   )rb   rP   s    r   _config_boolr     s    A;r   c                     t        |       S r9   )rn   )format_s    r   _config_dater     s    Gr   c                     t        | |      S r9   )ru   rw   s     r   _config_splitr     s    S!r   )	r   uriliteralrU   r_   dater   r0   rd   c                 "    t        | t              S )z$Return a function for column mapping)evalconfig_functions)vs    r   columnr   '  s     #$$r   c                       e Zd Zd Zd Zd Zy)r   c                     d | _         d | _        d | _        d| _        d | _        d| _        d| _        d| _        d | _        i | _	        i | _
        t        j                  | _        d| _        y )NautoFr   ,)CLASSBASEPROPBASEIDENTLABELDEFINECLASSSKIPDELIMDEFAULTCOLUMNSPROPSsysstdoutOUTtriplesr;   s    r   rH   zCSV2RDF.__init__.  sb    
	

 	

::r   c                     | j                   j                  |j                         d|j                         d|j                         d       | xj                  dz  c_        y )Nr	   z .
r   )r   writen3r   )r<   spos       r   triplezCSV2RDF.tripleA  s9    @Ar   c                    t        j                          }| j                  r6t        j                  j	                  d| j                  j
                  z         | j                  dk7  r,t        | j                  t              s| j                  f| _        | j                  s/t        j                  d       t        j                  d      | _	        | j                  s/t        j                  d       t        j                  d      | _        t        | j                         D ]  }t#        |        t%        t#        |            }t'        t)        |D cg c]  }| j                  t+        |          c}            }| j,                  j/                         D ]  \  }}|||<   t1        |      d   ||<    | j2                  rw| j5                  | j6                  t8        j:                  t<        j>                         t        tA        |            D ]&  }||   ||   }
}	|	dk(  s|
dk(  r| jB                  jE                  || jF                        d	k(  rC| j5                  |	t8        j:                  t8        jH                         | j5                  |	t<        jJ                  t        jL                  tO        |
                   | j5                  |	t<        jP                  | j6                         | j5                  |	t<        j                  | jB                  jE                  |tR              j                                ) d
}|D ]C  }
	 | j                  dk(  r| j                  d|z     }nj| j                  djU                  tW        |
| j                        D cg c].  }tY        |j[                  d      j]                  dd      d      0 c}         }| j^                  rW| j5                  |t<        jJ                  t        jL                  djU                  tW        |
| j^                                           | j6                  r+| j5                  |t8        j:                  | j6                         t)        |
      D ]  \  }}|ja                         }|dk7  s| jB                  jE                  || jF                        d	k(  rF	  | jB                  jE                  |t        jL                        |      }t        |t$              r|D ]  }| j5                  |||   |        n| j5                  |||   |        |dz  }|dz  d
k(  rDt        j                  j	                  d|| jf                  t        j                          |z
  fz         F ti               }tj        j/                         D ]  \  }
}|\  }}| j5                  |t<        jJ                  t        jL                  |
             |sBt        jl                  |      }|jo                  |       | j5                  |t8        j:                  |        |D ]1  }| j5                  |t8        j:                  t<        j>                         3 | j                  jq                          t        j                  j	                  d|| jf                  fz         t        j                  j	                  dt        j                          |z
  z         y c c}w c c}w # tb        $ r7}t        j                  dd|||   ||jd                  fz  z          Y d }~d }~ww xY w#  t        j                  j	                  d|z          xY w)NzOutput to %s
r   z2No base given, using http://example.org/instances/zhttp://example.org/instances/z:No property base given, using http://example.org/property/zhttp://example.org/props/r   r
   r   r   z%dr*   r)   r	   r+   z#Could not process value for column z%d:%s in row %d, ignoring: %s i z$%d rows, %d triples, elapsed %.2fs.
zError processing line: %d
z#Converted %d rows into %d triples.
zTook %.2f seconds.
)9timer   r   stderrr   namer   r}   r   r   warningswarnr-   	Namespacer   r=   r   nextlistdict	enumerater   r   itemsr   r   r   r   r   typer   Classlenr   getr   Propertyr   r:   r   domaindefault_node_maker   r   r   r/   r0   r   rz   rW   messager   setr1   r.   addclose)r<   	csvreaderstartr   header_labelsheaderskr   r   hr   rowsr   r   _oeclassesucs                      r   convertzCSV2RDF.convertE  s   		88JJ-=>::
4::u(E**DJyyMMNO(()HIDI}}MMVW",,-HIDM tyy! 	AO	 T)_-y!V1$--
1">!VWXJJ$$& 	/DAqGAJ(|AM!	/ KK

CHHdjj93w<( 
M!$427bBh<<##At||4@Asxx6Atzz6>>/":M+NOAt{{DJJ7tzz4<<#3#3A7H#I#O#O#Q  3	B2::'))D4K0C)) */r4::)>$% !&ahhv&6&>&>sC&Hr RC ::KKTZZr4::AV8W)X ::KKSXXtzz:%bM DAq	ABw<<++At||<H$ C 0 0FNN CA FA)!T2*+ !EB$(KKWQZ$D!E !%CQ ?( 	&=A%JJ$$?tyy{U/BCD[3	l %ZZ\ 	,EBDAqKK4::v~~b'9:MM!$AAsxx+	,  	1AKK388TZZ0	1 	

?4BVVW

/499;3FGHw "W<:  ) $MM E"B#$gaj$		"B#C!C 

  !>!EFsX   [>A\,3[$C	\,*\,6A4[)*A\,$\,)	\)2,\$\,$\))\,,%]N)rA   rB   rC   rH   r   r   rD   r   r   r   r   -  s    &sIr   c                  	   t               } t        j                  t        j                  dd  dg d      \  }}t	        |      }d|v sd|v r$t        t               t        j                  d       d|v rt        j                         }|j                  t        |d                |j                  d      D ]|  \  }}|d	k(  rt        j                  |d
d      | _        )|dk(  rt        j                   |      | _        I|dk(  rt        j                   |      | _        i|dk(  rt        j&                  |      | _        |dk(  rt+        |      | _        |dk(  rt/        |      | _        |dk(  rt/        |      | _        |dk(  r|| _        |dk(  rt7        |      | _        |dk(  rt;        |      | _        |j?                  d      r&t;        |      | j@                  t7        |dd        <   <|j?                  d      sOt        j&                  |      | jB                  t7        |dd        <    d|v rt        j                  |d   d
d      | _        d|v rt        j                  |d   d
d      | _        d|v rt        j                   |d         | _        d|v rt        j                   |d         | _        d|v r
|d   | _        d|v r
|d   | _        d|v rt;        |d         | _        d |v rt;        |d          | _        d!|v rt        j                   |d!         | _        d"|v rt        j                   |d"         | _        d#|v rt/        |d#         | _        d$|v rt/        |d$         | _        d%|v rt/        |d%         | _        d&|v rt/        |d&         | _        d'|v rt7        |d'         | _        d(|v rt7        |d(         | _        d)|v rt        j&                  |d)         | _        d*|v rt        j&                  |d*         | _        |j                         D ]{  \  }}|j?                  d+      r%t;        |      | j@                  t7        |d,d        <   <|j?                  d-      sNt        j&                  |      | jB                  t7        |d.d        <   } | j(                  rd/|v sd0|v rd1| _        | jE                  tG        tI        jJ                  |      | j4                  2             y )3Nr   zhc:b:p:i:o:Cf:l:s:d:D:)
zout=zbase=zdelim=z	propbase=zclass=zdefault=ident=zlabel=zskip=defineclasshelpz-hz--helpz-fcsv2rdfoutwzutf-8basepropbaseclassr   identr   delimskipdefaultcol   prop   z-oz--outz-bz--basez-dz--delimz-Dz	--defaultz-pz
--propbasez-lz--labelz-iz--identz-sz--skipz-cz--classz--col   z--prop   z-Cz--defineclassT)	delimiter)&r   getoptr   argvr   printHELPexitconfigparserConfigParserreadfpopenr   codecsr   r-   r   r   r   r.   r   rd   r   r   r   r   r   r_   r   r   r   
startswithr   r   r   r&   	fileinputinput)r   optsfilesconfigr   r   s         r   mainr     sB   iG-- 	
KD%  :Dt|x4'dt|**,d4:&'LL+ 	=DAqEz$kk!S':f%//2j#)#3#3A#6 g &a 0m#&*1g#g $Qg $Qg !f"1vi"()e$.4QiAabE
+f%,2MM!,<c!AB%j)1	=4 t|kk$t*c7;$kk$w-g>t|''T
34''X7t|T
DYt| d,d k!23t|!++DJ7t!++D,>?t|T$Z(DT)_-t|T$Z(DT)_-t|4:44>*t|d4j1Dd9o6

 91<< *0)GOOC!"J'\\(#(.a(8GMM#ae*%	9 }}$$,/T*A"OOJyu5OPr   __main__r9   )NN)4__doc__r   r   r"   r   r   r   r   rp   r   r   urllib.parser   r-   r   r   rdflib.namespacer   __all__r   r1   r   r   r   excelr&   r5   objectr7   rF   rN   rS   r]   rb   rg   rn   ru   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rA   rD   r   r   <module>r     si    	 
           &+<~ 
M % "% ! !2i 2) 
	! 	!	k 		{ 	)) )#{ #%	 %$ K #
 
 %KIf KI\lQ^ zF r   