Quick links:
Flags
Verbs
Functions
Glossary
Release docs
DKVP I/O examples¶
DKVP I/O in Python¶
Here are the I/O routines:
#!/usr/bin/env python # ================================================================ # Example of DKVP I/O using Python. # # Key point: Use Miller for what it's good at; pass data into/out of tools in # other languages to do what they're good at. # # bash$ python -i dkvp_io.py # # # READ # >>> map = dkvpline2map('x=1,y=2', '=', ',') # >>> map # OrderedDict([('x', '1'), ('y', '2')]) # # # MODIFY # >>> map['z'] = map['x'] + map['y'] # >>> map # OrderedDict([('x', '1'), ('y', '2'), ('z', 3)]) # # # WRITE # >>> line = map2dkvpline(map, '=', ',') # >>> line # 'x=1,y=2,z=3' # # ================================================================ import re import collections # ---------------------------------------------------------------- # ips and ifs (input pair separator and input field separator) are nominally '=' and ','. def dkvpline2map(line, ips, ifs): pairs = re.split(ifs, line) map = collections.OrderedDict() for pair in pairs: key, value = re.split(ips, pair, 1) # Type inference: try: value = int(value) except: try: value = float(value) except: pass map[key] = value return map # ---------------------------------------------------------------- # ops and ofs (output pair separator and output field separator) are nominally '=' and ','. def map2dkvpline(map , ops, ofs): pairs = [] for key in map: pairs.append(str(key) + ops + str(map[key])) return str.join(ofs, pairs)
And here is an example using them:
cat polyglot-dkvp-io/example.py
#!/usr/bin/env python import sys import re import copy import dkvp_io while True: # Read the original record: line = sys.stdin.readline().strip() if line == '': break map = dkvp_io.dkvpline2map(line, '=', ',') # Drop a field: map.pop('x') # Compute some new fields: map['ab'] = map['a'] + map['b'] map['iy'] = map['i'] + map['y'] # Add new fields which show type of each already-existing field: omap = copy.copy(map) # since otherwise the for-loop will modify what it loops over keys = omap.keys() for key in keys: # Convert "" to just "int", etc.: type_string = str(map[key].__class__) type_string = re.sub(" ", "", type_string) map['t'+key] = type_string # Write the modified record: print(dkvp_io.map2dkvpline(map, '=', ','))
Run as-is:
python polyglot-dkvp-io/example.py < data/small
a=pan,b=pan,i=1,y=0.726802,ab=panpan,iy=1.726802,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float a=eks,b=pan,i=2,y=0.522151,ab=ekspan,iy=2.522151,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float a=wye,b=wye,i=3,y=0.338318,ab=wyewye,iy=3.338318,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float a=eks,b=wye,i=4,y=0.134188,ab=ekswye,iy=4.134188,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float a=wye,b=pan,i=5,y=0.863624,ab=wyepan,iy=5.863624,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
Run as-is, then pipe to Miller for pretty-printing:
python polyglot-dkvp-io/example.py < data/small | mlr --opprint cat
a b i y ab iy ta tb ti ty tab tiy pan pan 1 0.726802 panpan 1.726802 str str int float str float eks pan 2 0.522151 ekspan 2.522151 str str int float str float wye wye 3 0.338318 wyewye 3.338318 str str int float str float eks wye 4 0.134188 ekswye 4.134188 str str int float str float wye pan 5 0.863624 wyepan 5.863624 str str int float str float
DKVP I/O in Ruby¶
Here are the I/O routines:
#!/usr/bin/env ruby # ================================================================ # Example of DKVP I/O using Ruby. # # Key point: Use Miller for what it's good at; pass data into/out of tools in # other languages to do what they're good at. # # bash$ irb -I. -r dkvp_io.rb # # # READ # irb(main):001:0> map = dkvpline2map('x=1,y=2', '=', ',') # => {"x"=>"1", "y"=>"2"} # # # MODIFY # irb(main):001:0> map['z'] = map['x'] + map['y'] # => 3 # # # WRITE # irb(main):002:0> line = map2dkvpline(map, '=', ',') # => "x=1,y=2,z=3" # # ================================================================ # ---------------------------------------------------------------- # ips and ifs (input pair separator and input field separator) are nominally '=' and ','. def dkvpline2map(line, ips, ifs) map = {} line.split(ifs).each do |pair| (k, v) = pair.split(ips, 2) # Type inference: begin v = Integer(v) rescue ArgumentError begin v = Float(v) rescue ArgumentError # Leave as string end end map[k] = v end map end # ---------------------------------------------------------------- # ops and ofs (output pair separator and output field separator) are nominally '=' and ','. def map2dkvpline(map, ops, ofs) map.collect{|k,v| k.to_s + ops + v.to_s}.join(ofs) end
And here is an example using them:
cat polyglot-dkvp-io/example.rb
#!/usr/bin/env ruby require 'dkvp_io' ARGF.each do |line| # Read the original record: map = dkvpline2map(line.chomp, '=', ',') # Drop a field: map.delete('x') # Compute some new fields: map['ab'] = map['a'] + map['b'] map['iy'] = map['i'] + map['y'] # Add new fields which show type of each already-existing field: keys = map.keys keys.each do |key| map['t'+key] = map[key].class end # Write the modified record: puts map2dkvpline(map, '=', ',') end
Run as-is:
ruby -I./polyglot-dkvp-io polyglot-dkvp-io/example.rb data/small
a=pan,b=pan,i=1,y=0.726802,ab=panpan,iy=1.726802,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float a=eks,b=pan,i=2,y=0.522151,ab=ekspan,iy=2.522151,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float a=wye,b=wye,i=3,y=0.338318,ab=wyewye,iy=3.338318,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float a=eks,b=wye,i=4,y=0.134188,ab=ekswye,iy=4.134188,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float a=wye,b=pan,i=5,y=0.863624,ab=wyepan,iy=5.863624,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
Run as-is, then pipe to Miller for pretty-printing:
ruby -I./polyglot-dkvp-io polyglot-dkvp-io/example.rb data/small | mlr --opprint cat
a b i y ab iy ta tb ti ty tab tiy pan pan 1 0.726802 panpan 1.726802 String String Integer Float String Float eks pan 2 0.522151 ekspan 2.522151 String String Integer Float String Float wye wye 3 0.338318 wyewye 3.338318 String String Integer Float String Float eks wye 4 0.134188 ekswye 4.134188 String String Integer Float String Float wye pan 5 0.863624 wyepan 5.863624 String String Integer Float String Float