Here are the I/O routines:

#!/usr/bin/env python

# ================================================================
# Example of DKVP I/O using Python.
#
# Key point: Use Miller for what it's good at; pass data into/out of tools in
# other languages to do what they're good at.
#
#   bash$ python -i dkvp_io.py
#
#   # READ
#   >>> map = dkvpline2map('x=1,y=2', '=', ',')
#   >>> map
#   OrderedDict([('x', '1'), ('y', '2')])
#
#   # MODIFY
#   >>> map['z'] = map['x'] + map['y']
#   >>> map
#   OrderedDict([('x', '1'), ('y', '2'), ('z', 3)])
#
#   # WRITE
#   >>> line = map2dkvpline(map, '=', ',')
#   >>> line
#   'x=1,y=2,z=3'
#
# ================================================================

import re
import collections

# ----------------------------------------------------------------
# ips and ifs (input pair separator and input field separator) are nominally '=' and ','.
def dkvpline2map(line, ips, ifs):
	pairs = re.split(ifs, line)
	map = collections.OrderedDict()
	for pair in pairs:
		key, value = re.split(ips, pair, 1)

		# Type inference:
		try:
			value = int(value)
		except:
			try:
				value = float(value)
			except:
				pass

		map[key] = value
	return map

# ----------------------------------------------------------------
# ops and ofs (output pair separator and output field separator) are nominally '=' and ','.
def map2dkvpline(map , ops, ofs):
	line = ''
	pairs = []
	for key in map:
		pairs.append(str(key) + ops + str(map[key]))
	return str.join(ofs, pairs)

And here is an example using them:

$ cat polyglot-dkvp-io/example.py
#!/usr/bin/env python

import sys
import re
import copy
import dkvp_io

while True:
	# Read the original record:
	line = sys.stdin.readline().strip()
	if line == '':
		break
	map = dkvp_io.dkvpline2map(line, '=', ',')

	# Drop a field:
	map.pop('x')

	# Compute some new fields:
	map['ab'] = map['a'] + map['b']
	map['iy'] = map['i'] + map['y']

	# Add new fields which show type of each already-existing field:
	omap = copy.copy(map) # since otherwise the for-loop will modify what it loops over
	keys = omap.keys()
	for key in keys:
		# Convert "<type 'int'>" to just "int", etc.:
		type_string = str(map[key].__class__)
		type_string = re.sub("<type '", "", type_string) # python2
		type_string = re.sub("<class '", "", type_string) # python3
		type_string = re.sub("'>", "", type_string)
		map['t'+key] = type_string

	# Write the modified record:
	print(dkvp_io.map2dkvpline(map, '=', ','))

Run as-is:

$ python polyglot-dkvp-io/example.py < data/small
a=pan,b=pan,i=1,y=0.7268028627434533,ab=panpan,iy=1.7268028627434533,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
a=eks,b=pan,i=2,y=0.5221511083334797,ab=ekspan,iy=2.5221511083334796,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
a=wye,b=wye,i=3,y=0.33831852551664776,ab=wyewye,iy=3.3383185255166477,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
a=eks,b=wye,i=4,y=0.13418874328430463,ab=ekswye,iy=4.134188743284304,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float
a=wye,b=pan,i=5,y=0.8636244699032729,ab=wyepan,iy=5.863624469903273,ta=str,tb=str,ti=int,ty=float,tab=str,tiy=float

Run as-is, then pipe to Miller for pretty-printing:

$ python polyglot-dkvp-io/example.py < data/small | mlr --opprint cat
a   b   i y                   ab     iy                 ta  tb  ti  ty    tab tiy
pan pan 1 0.7268028627434533  panpan 1.7268028627434533 str str int float str float
eks pan 2 0.5221511083334797  ekspan 2.5221511083334796 str str int float str float
wye wye 3 0.33831852551664776 wyewye 3.3383185255166477 str str int float str float
eks wye 4 0.13418874328430463 ekswye 4.134188743284304  str str int float str float
wye pan 5 0.8636244699032729  wyepan 5.863624469903273  str str int float str float

DKVP I/O in Ruby

Here are the I/O routines:

#!/usr/bin/env ruby

# ================================================================
# Example of DKVP I/O using Ruby.
#
# Key point: Use Miller for what it's good at; pass data into/out of tools in
# other languages to do what they're good at.
#
#   bash$ irb -I. -r dkvp_io.rb
#
#   # READ
#   irb(main):001:0> map = dkvpline2map('x=1,y=2', '=', ',')
#   => {"x"=>"1", "y"=>"2"}
#
#   # MODIFY
#   irb(main):001:0> map['z'] = map['x'] + map['y']
#   => 3
#
#   # WRITE
#   irb(main):002:0> line = map2dkvpline(map, '=', ',')
#   => "x=1,y=2,z=3"
#
# ================================================================

# ----------------------------------------------------------------
# ips and ifs (input pair separator and input field separator) are nominally '=' and ','.
def dkvpline2map(line, ips, ifs)
  map = {}
  line.split(ifs).each do |pair|
    (k, v) = pair.split(ips, 2)

    # Type inference:
    begin
      v = Integer(v)
    rescue ArgumentError
      begin
        v = Float(v)
      rescue ArgumentError
        # Leave as string
      end
    end

    map[k] = v
  end
  map
end

# ----------------------------------------------------------------
# ops and ofs (output pair separator and output field separator) are nominally '=' and ','.
def map2dkvpline(map, ops, ofs)
  map.collect{|k,v| k.to_s + ops + v.to_s}.join(ofs)
end

And here is an example using them:

$ cat polyglot-dkvp-io/example.rb
#!/usr/bin/env ruby

require 'dkvp_io'

ARGF.each do |line|
  # Read the original record:
  map = dkvpline2map(line.chomp, '=', ',')

  # Drop a field:
  map.delete('x')

  # Compute some new fields:
  map['ab'] = map['a'] + map['b']
  map['iy'] = map['i'] + map['y']

  # Add new fields which show type of each already-existing field:
  keys = map.keys
  keys.each do |key|
    map['t'+key] = map[key].class
  end

  # Write the modified record:
  puts map2dkvpline(map, '=', ',')
end

Run as-is:

$ ruby -I./polyglot-dkvp-io polyglot-dkvp-io/example.rb data/small
a=pan,b=pan,i=1,y=0.7268028627434533,ab=panpan,iy=1.7268028627434533,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
a=eks,b=pan,i=2,y=0.5221511083334797,ab=ekspan,iy=2.5221511083334796,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
a=wye,b=wye,i=3,y=0.33831852551664776,ab=wyewye,iy=3.3383185255166477,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
a=eks,b=wye,i=4,y=0.13418874328430463,ab=ekswye,iy=4.134188743284304,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float
a=wye,b=pan,i=5,y=0.8636244699032729,ab=wyepan,iy=5.863624469903273,ta=String,tb=String,ti=Integer,ty=Float,tab=String,tiy=Float

Run as-is, then pipe to Miller for pretty-printing:

$ ruby -I./polyglot-dkvp-io polyglot-dkvp-io/example.rb data/small | mlr --opprint cat
a   b   i y                   ab     iy                 ta     tb     ti      ty    tab    tiy
pan pan 1 0.7268028627434533  panpan 1.7268028627434533 String String Integer Float String Float
eks pan 2 0.5221511083334797  ekspan 2.5221511083334796 String String Integer Float String Float
wye wye 3 0.33831852551664776 wyewye 3.3383185255166477 String String Integer Float String Float
eks wye 4 0.13418874328430463 ekswye 4.134188743284304  String String Integer Float String Float
wye pan 5 0.8636244699032729  wyepan 5.863624469903273  String String Integer Float String Float

SQL-output examples

SQL-input examples

Running shell commands

The system DSL function allows you to run a specific shell command and put its output — minus the final newline — into a record field. The command itself is any string, either a literal string, or a concatenation of strings, perhaps including other field values or what have you.

$ mlr --opprint put '$o = system("echo hello world")' data/small
a   b   i x                   y                   o
pan pan 1 0.3467901443380824  0.7268028627434533  hello world
eks pan 2 0.7586799647899636  0.5221511083334797  hello world
wye wye 3 0.20460330576630303 0.33831852551664776 hello world
eks wye 4 0.38139939387114097 0.13418874328430463 hello world
wye pan 5 0.5732889198020006  0.8636244699032729  hello world

$ mlr --opprint put '$o = system("echo {" . NR . "}")' data/small
a   b   i x                   y                   o
pan pan 1 0.3467901443380824  0.7268028627434533  {1}
eks pan 2 0.7586799647899636  0.5221511083334797  {2}
wye wye 3 0.20460330576630303 0.33831852551664776 {3}
eks wye 4 0.38139939387114097 0.13418874328430463 {4}
wye pan 5 0.5732889198020006  0.8636244699032729  {5}

$ mlr --opprint put '$o = system("echo -n ".$a."| sha1sum")' data/small
a   b   i x                   y                   o
pan pan 1 0.3467901443380824  0.7268028627434533  f29c748220331c273ef16d5115f6ecd799947f13  -
eks pan 2 0.7586799647899636  0.5221511083334797  456d988ecb3bf1b75f057fc6e9fe70db464e9388  -
wye wye 3 0.20460330576630303 0.33831852551664776 eab0de043d67f441c7fd1e335f0ca38708e6ebf7  -
eks wye 4 0.38139939387114097 0.13418874328430463 456d988ecb3bf1b75f057fc6e9fe70db464e9388  -
wye pan 5 0.5732889198020006  0.8636244699032729  eab0de043d67f441c7fd1e335f0ca38708e6ebf7  -

Note that running a subprocess on every record takes a non-trivial amount of time. Comparing asking the system date command for the current time in nanoseconds versus computing it in process:

$ mlr --opprint put '$t=system("date +%s.%N")' then step -a delta -f t data/small
a   b   i x                   y                   t                    t_delta
pan pan 1 0.3467901443380824  0.7268028627434533  1568774318.513903817 0
eks pan 2 0.7586799647899636  0.5221511083334797  1568774318.514722876 0.000819
wye wye 3 0.20460330576630303 0.33831852551664776 1568774318.515618046 0.000895
eks wye 4 0.38139939387114097 0.13418874328430463 1568774318.516547441 0.000929
wye pan 5 0.5732889198020006  0.8636244699032729  1568774318.517518828 0.000971

$ mlr --opprint put '$t=systime()' then step -a delta -f t data/small
a   b   i x                   y                   t                 t_delta
pan pan 1 0.3467901443380824  0.7268028627434533  1568774318.518699 0
eks pan 2 0.7586799647899636  0.5221511083334797  1568774318.518717 0.000018
wye wye 3 0.20460330576630303 0.33831852551664776 1568774318.518723 0.000006
eks wye 4 0.38139939387114097 0.13418874328430463 1568774318.518727 0.000004
wye pan 5 0.5732889198020006  0.8636244699032729  1568774318.518730 0.000003

DKVP I/O in Python

DKVP I/O in Ruby

SQL-output examples

SQL-input examples

Running shell commands