|
| 1 | +<!DOCTYPE html> |
| 2 | +<html> |
| 3 | +<head> |
| 4 | + <title>Caltech Library's Digital Library Development Sandbox</title> |
| 5 | + <link href='https://fonts.googleapis.com/css?family=Open+Sans' rel='stylesheet' type='text/css'> |
| 6 | + <link rel="stylesheet" href="/css/site.css"> |
| 7 | +</head> |
| 8 | +<body> |
| 9 | +<header> |
| 10 | +<a href="http://library.caltech.edu"><img src="/assets/liblogo.gif" alt="Caltech Library logo"></a> |
| 11 | +</header> |
| 12 | +<nav> |
| 13 | +<ul> |
| 14 | +<li> |
| 15 | +<a href="/">Home</a> |
| 16 | +</li> |
| 17 | +<li> |
| 18 | +<a href="index.html">README</a> |
| 19 | +</li> |
| 20 | +<li> |
| 21 | +<a href="license.html">LICENSE</a> |
| 22 | +</li> |
| 23 | +<li> |
| 24 | +<a href="install.html">INSTALL</a> |
| 25 | +</li> |
| 26 | +<li> |
| 27 | +<a href="docs/">Documentation</a> |
| 28 | +</li> |
| 29 | +<li> |
| 30 | +<a href="how-to/">How To</a> |
| 31 | +</li> |
| 32 | +<li> |
| 33 | +<a href="about.html">About</a> |
| 34 | +</li> |
| 35 | +<li> |
| 36 | +<a |
| 37 | +href="https://github.com/caltechlibrary/datatools">Github</a> |
| 38 | +</li> |
| 39 | +</ul> |
| 40 | +</nav> |
| 41 | + |
| 42 | +<section> |
| 43 | +<h1 id="name"> |
| 44 | +NAME |
| 45 | +</h1> |
| 46 | +<p> |
| 47 | +csvcleaner |
| 48 | +</p> |
| 49 | +<h1 id="synopsis"> |
| 50 | +SYNOPSIS |
| 51 | +</h1> |
| 52 | +<p> |
| 53 | +csvcleaner <a href="#options">OPTIONS</a> |
| 54 | +</p> |
| 55 | +<h1 id="description"> |
| 56 | +DESCRIPTION |
| 57 | +</h1> |
| 58 | +<p> |
| 59 | +csvcleaner normalizes a CSV file based on the options selected. It helps |
| 60 | +to address issues like variable number of columns, leading/trailing |
| 61 | +spaces in columns, and non-UTF-8 encoding issues. |
| 62 | +</p> |
| 63 | +<p> |
| 64 | +By default input is expected from standard in and output is sent to |
| 65 | +standard out (errors to standard error). These can be modified by |
| 66 | +appropriate options. The csv file is processed as a stream of rows so |
| 67 | +minimal memory is used to operate on the file. |
| 68 | +</p> |
| 69 | +<h1 id="options"> |
| 70 | +OPTIONS |
| 71 | +</h1> |
| 72 | +<dl> |
| 73 | +<dt> |
| 74 | +-help |
| 75 | +</dt> |
| 76 | +<dd> |
| 77 | +display help |
| 78 | +</dd> |
| 79 | +<dt> |
| 80 | +-license |
| 81 | +</dt> |
| 82 | +<dd> |
| 83 | +display license |
| 84 | +</dd> |
| 85 | +<dt> |
| 86 | +-version |
| 87 | +</dt> |
| 88 | +<dd> |
| 89 | +display version |
| 90 | +</dd> |
| 91 | +<dt> |
| 92 | +-verbose |
| 93 | +</dt> |
| 94 | +<dd> |
| 95 | +write verbose output to standard error |
| 96 | +</dd> |
| 97 | +<dt> |
| 98 | +-comma |
| 99 | +</dt> |
| 100 | +<dd> |
| 101 | +if set use this character in place of a comma for delimiting cells |
| 102 | +</dd> |
| 103 | +<dt> |
| 104 | +-comment-char |
| 105 | +</dt> |
| 106 | +<dd> |
| 107 | +if set, rows starting with this character will be ignored as comments |
| 108 | +</dd> |
| 109 | +<dt> |
| 110 | +-fields-per-row |
| 111 | +</dt> |
| 112 | +<dd> |
| 113 | +set the number of columns to output right padding empty cells as needed |
| 114 | +</dd> |
| 115 | +<dt> |
| 116 | +-i, -input |
| 117 | +</dt> |
| 118 | +<dd> |
| 119 | +input filename |
| 120 | +</dd> |
| 121 | +<dt> |
| 122 | +-left-trim |
| 123 | +</dt> |
| 124 | +<dd> |
| 125 | +left trim spaces on CSV out |
| 126 | +</dd> |
| 127 | +<dt> |
| 128 | +-o, -output |
| 129 | +</dt> |
| 130 | +<dd> |
| 131 | +output filename |
| 132 | +</dd> |
| 133 | +<dt> |
| 134 | +-output-comma |
| 135 | +</dt> |
| 136 | +<dd> |
| 137 | +if set use this character in place of a comma for delimiting output |
| 138 | +cells |
| 139 | +</dd> |
| 140 | +<dt> |
| 141 | +-quiet |
| 142 | +</dt> |
| 143 | +<dd> |
| 144 | +suppress error messages |
| 145 | +</dd> |
| 146 | +<dt> |
| 147 | +-reuse |
| 148 | +</dt> |
| 149 | +<dd> |
| 150 | +if false then a new array is allocated for each row processed, if true |
| 151 | +the array gets reused |
| 152 | +</dd> |
| 153 | +<dt> |
| 154 | +-right-trim |
| 155 | +</dt> |
| 156 | +<dd> |
| 157 | +right trim spaces on CSV out |
| 158 | +</dd> |
| 159 | +<dt> |
| 160 | +-stop-on-error |
| 161 | +</dt> |
| 162 | +<dd> |
| 163 | +exit on error, useful if you’re trying to debug a problematic CSV file |
| 164 | +</dd> |
| 165 | +<dt> |
| 166 | +-trim, -trim-spaces |
| 167 | +</dt> |
| 168 | +<dd> |
| 169 | +trim spaces on CSV out |
| 170 | +</dd> |
| 171 | +<dt> |
| 172 | +-trim-leading-space |
| 173 | +</dt> |
| 174 | +<dd> |
| 175 | +trim leading space from field(s) for CSV input |
| 176 | +</dd> |
| 177 | +<dt> |
| 178 | +-use-crlf |
| 179 | +</dt> |
| 180 | +<dd> |
| 181 | +if set use a charage return and line feed in output |
| 182 | +</dd> |
| 183 | +<dt> |
| 184 | +-use-lazy-quotes |
| 185 | +</dt> |
| 186 | +<dd> |
| 187 | +use lazy quotes for CSV input |
| 188 | +</dd> |
| 189 | +</dl> |
| 190 | +<h1 id="examples"> |
| 191 | +EXAMPLES |
| 192 | +</h1> |
| 193 | +<p> |
| 194 | +Normalizing a spread sheet’s column count to 5 padding columns as needed |
| 195 | +per row. |
| 196 | +</p> |
| 197 | +<pre><code> cat mysheet.csv | csvcleaner -field-per-row=5</code></pre> |
| 198 | +<p> |
| 199 | +Trim leading spaces from output. |
| 200 | +</p> |
| 201 | +<pre><code> cat mysheet.csv | csvcleaner -left-trim</code></pre> |
| 202 | +<p> |
| 203 | +Trim trailing spaces from output. |
| 204 | +</p> |
| 205 | +<pre><code> cat mysheet.csv | csvcleaner -right-trim</code></pre> |
| 206 | +<p> |
| 207 | +Trim leading and trailing spaces from output. |
| 208 | +</p> |
| 209 | +<pre><code> cat mysheet.csv | csvcleaner -trim-space</code></pre> |
| 210 | +<p> |
| 211 | +csvcleaner 1.2.2 |
| 212 | +</p> |
| 213 | +</section> |
| 214 | + |
| 215 | +<footer> |
| 216 | +<span><h1><A href="http://caltech.edu">Caltech</a></h1></span> |
| 217 | +<span>© 2021 <a href="https://www.library.caltech.edu/copyright">Caltech library</a></span> |
| 218 | +<address>1200 E California Blvd, Mail Code 1-32, Pasadena, CA 91125-3200</address> |
| 219 | +<span>Phone: <a href="tel:+1-626-395-3405">(626)395-3405</a></span> |
| 220 | +<span><a href=" mailto:[email protected]" >Email Us </a></span> |
| 221 | +<a class="cl-hide" href="sitemap.xml">Site Map</a> |
| 222 | +</footer> |
| 223 | +</body> |
| 224 | +</html> |
0 commit comments