-
Notifications
You must be signed in to change notification settings - Fork 47
Collapse file tree
Files
Search this repository
/
Copy pathfpsync
More file actions
More file actions
Latest commit
executable file
·2050 lines (1869 loc) · 70.4 KB
/
fpsync
File metadata and controls
executable file
·2050 lines (1869 loc) · 70.4 KB
You must be signed in to make or propose changes
More edit options
Edit and raw actions
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
#!/bin/sh
# Copyright (c) 2014-2025 Ganael LAPLANCHE <ganael.laplanche@martymac.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
# This script is a simple wrapper showing how fpart can be used to migrate data.
# It uses fpart and a copy tool to spawn multiple instances to migrate data from
# src_dir/ to dst_url/. Jobs can execute either locally or over SSH.
FPSYNC_VERSION="1.7.1"
########## Default values for options
# External tool used to copy files
OPT_TOOL_NAME="rsync"
# External tool path
OPT_TOOL_PATH=""
# Number of sync jobs to run in parallel ("workers", -n)
OPT_JOBS=2
# Same, but autodetected
#OPT_JOBS=$(sysctl -n hw.ncpu) # On FreeBSD
#OPT_JOBS=$(nproc) # On Linux
# Maximum files or directories per sync job (-f)
OPT_FPMAXPARTFILES="2000"
# Maximum bytes per sync job (-s)
OPT_FPMAXPARTSIZE="$((4 * 1024 * 1024 * 1024))" # 4 GB
# Work on a per-directory basis (disabled by default)
OPT_DIRSONLY=""
# Pack erroneous dirs apart and enable recursive rsync
OPT_AGGRESSIVE=""
# SSH workers (execute jobs locally if not defined, -w)
OPT_WRKRS=""
# Shared dir (must be shared amongst all workers, -d)
OPT_SHDIR=""
# Temporary dir (local, used for queue management, -t)
OPT_TMPDIR="/tmp/fpsync"
# E-mail report option (-M)
OPT_MAIL=""
# Date format used for log prefix (-Z)
OPT_DATEFORMAT="+%s"
# Prepare mode (-p)
OPT_PREPARERUN=""
# List runs (-l)
OPT_LISTRUNS=""
# Run ID for resume mode (-r)
OPT_RUNID=""
# Replay mode (-R)
OPT_REPLAYRUN=""
# Archive run (-a)
OPT_ARCHIVERUN=""
# Delete run (-D)
OPT_DELETERUN=""
# User-settable tool options (-o)
OPT_TOOL=""
# Fpart options (-O)
OPT_FPART="-x|.zfs|-x|.snapshot*|-x|.ckpt"
# Sudo mode (-S)
OPT_SUDO=""
# Verbose mode (-v)
OPT_VERBOSE="0"
# Source directory
OPT_SRCDIR=""
# Destination directory
OPT_DSTURL=""
########## Internal variables (cannot be set through CLI yet)
# Force color usage (even if stdout and stderr are *not* associated with a terminal)
OPT_FORCECOLORS=""
# POSIX-compliant shell used within generated jobs. It must exist locally
# and remotely (when using workers), as well as support '-c' (commands) and
# '-s' (stdin) options.
# That option may be used to work-around missing 'pipefail' option from '/bin/sh'
# on certain systems (e.g. Debian)
OPT_JOBSSHELL="/bin/sh"
#OPT_JOBSSHELL="/bin/bash"
########## Various functions
#set -o errexit
#set -o nounset
LC_ALL=C
# Our color constants
# See: https://en.wikipedia.org/wiki/ANSI_escape_code
COLOR_BLUE=$(tput setaf 4)
COLOR_GREEN=$(tput setaf 2)
COLOR_ORANGE=$(tput setaf 3)
COLOR_RED=$(tput setaf 1)
COLOR_WHITE=$(tput setaf 7)
COLOR_STOP=$(tput sgr0)
# Disable 'pipefail' option if not supported
QUIRK_PIPEFAIL='set -o pipefail'
${OPT_JOBSSHELL} -c "${QUIRK_PIPEFAIL}" 2>/dev/null || \
QUIRK_PIPEFAIL=':'
# Print version
version () {
cat << EOF
fpsync v${FPSYNC_VERSION} - Sync directories in parallel using fpart
Copyright (c) 2014-2025 Ganael LAPLANCHE <ganael.laplanche@martymac.org>
WWW: https://www.fpart.org, https://contribs.martymac.org
EOF
}
# Print help
usage () {
version
cat << EOF
Usage: $0 [-p] [OPTIONS...] src_dir/ dst_url/
$0 -l
$0 -r runid [-R] [OPTIONS...]
$0 -a runid
$0 -D runid
COMMON OPTIONS:
-t /dir/ set fpsync temp dir to </dir/> (absolute path)
-d /dir/ set fpsync shared dir to </dir/> (absolute path)
This option is mandatory when using SSH workers.
-M mailaddr send an e-mail to mailaddr after a run. Multiple
-space-separated- addresses can be specified.
-Z prefix logs with human-readable dates instead of timestamps
-v verbose mode (default: quiet)
This option can be be specified several times to
increase verbosity level.
-h this help
-V print version
SYNCHRONIZATION OPTIONS:
-m tool external copy tool to use: $(tool_print_supported)
(default: 'rsync')
-T path absolute path of copy tool (default: guessed)
-f y transfer at most <y> files or directories per sync job
-s z transfer at most <z> bytes per sync job
-E work on a per-directory basis ('rsync' tool only)
(WARNING!!! Enables rsync(1)'s --delete option!)
Specify twice to enable "aggressive" mode that will isolate
erroneous directories and enable recursive synchronization for
them ('rsync' tool only)
-o options override default copy tool options with <options>
See fpsync(1) for more details.
-O options override default fpart options with pipe-separated <options>
See fpsync(1) for more details.
-S use sudo for filesystem crawling and synchronizations
src_dir/ source directory (absolute path)
dst_url/ destination directory (or URL, when using 'rsync' tool)
JOB HANDLING AND DISPATCHING OPTIONS:
-n x start <x> concurrent sync jobs per run
-w wrks space-separated list of SSH workers
e.g.: -w 'login@host1 login@host2 login@host3'
or: -w 'login@host1' -w 'login@host2' -w 'login@host3'
Jobs are executed locally if not specified (default).
RUN HANDLING OPTIONS:
-p prepare mode: prepare target(s) and create a resumable
run by crawling filesystem but do not actually start
synchronization jobs.
-l list previous runs and their status.
-r runid resume run <runid>
(options -m, -T, -f, -s, -E, -o, -O, -S, /src_dir/ and
/dst_url/ are ignored when resuming a previous run)
-R replay mode (needs option -r): re-synchronize all
partitions from run <runid> instead of working on
remaining ones only.
-a runid archive run <runid> to temp dir
-D runid delete run <runid>
See fpsync(1) for more details.
EOF
}
# Print a message to stdout and exit with normal exit code
end_ok () {
[ -n "$1" ] && echo "$1"
exit 0
}
# Print a message to stderr and exit with error code 1
end_die () {
[ -n "$1" ] && echo "$1" 1>&2
exit 1
}
# Print (to stdout) and log a message
# $1 = level (0 = quiet, 1 = verbose, >=2 more verbose)
# $2 = message to log
# $3 = color
echo_log () {
local _log_ts=$(date "${OPT_DATEFORMAT}")
local _color_start=''
local _color_stop=''
# Prepare color
if { [ -t 1 ] && [ -t 2 ] ;} || [ -n "${OPT_FORCECOLORS}" ]
then
case "$3" in
"blue")
_color_start="${COLOR_BLUE}"
_color_stop="${COLOR_STOP}"
;;
"green")
_color_start="${COLOR_GREEN}"
_color_stop="${COLOR_STOP}"
;;
"orange")
_color_start="${COLOR_ORANGE}"
_color_stop="${COLOR_STOP}"
;;
"red")
_color_start="${COLOR_RED}"
_color_stop="${COLOR_STOP}"
;;
"white")
_color_start="${COLOR_WHITE}"
_color_stop="${COLOR_STOP}"
;;
esac
fi
is_num "$1" && [ ${OPT_VERBOSE} -ge $1 ] && [ -n "$2" ] && \
printf '%s\n' "${_log_ts} ${_color_start}$2${_color_stop}"
[ -n "$2" ] && \
echo "${_log_ts} $2" >> "${FPSYNC_LOGFILE}"
}
# Check if $1 is an absolute path
is_abs_path() {
echo "$1" | grep -qE '^/'
}
# Check if $1 is a valid rsync URL
# Cf. rsync(1) :
# SSH: [USER@]HOST:DEST
# Rsync: [USER@]HOST::DEST
# Rsync: rsync://[USER@]HOST[:PORT]/DEST
# Simplified as: "anything but slash" followed by at least one ":"
is_remote_path() {
echo "$1" | grep -qE '^[^/]+:'
}
# Check if $1 is a number
is_num () {
echo "$1" | grep -qE '^[0-9]+$'
}
# Check if $1 is an acceptable size argument
# - must be greater than 0
# - may contain 'kKmMgGtTpP' suffix
is_size () {
echo "$1" | grep -qE '^0*[1-9][0-9]*[kKmMgGtTpP]?$'
}
# Convert $1 (in bytes) to a human-readable size
bytes_to_human_size () {
if ! is_num "$1"
then
echo "$1"
return
fi
local _result=$(echo "$1" | bc 2>/dev/null) # Remove leading 0s
local _sufx="B,KiB,MiB,GiB,TiB,PiB"
local _round_max=$(( $(echo ${_sufx} | tr -cd ',' | \
wc -c | awk '{print $1}') + 1 ))
local _round=1
while [ "${_round}" -lt "${_round_max}" ] && \
[ $(echo "${_result} >= 1024" | bc 2>/dev/null) -eq 1 ]
do
_result=$(echo "scale=2; ${_result} / 1024" | bc 2>/dev/null)
echo_log "2" "<=== [QMGR] Queue processed"
else
# We are in prepare mode, just wait for fpart to finish FS crawling
wait
fi
# Display final status
[ ${OPT_VERBOSE} -ge 1 ] && siginfo_handler
if [ -f "${JOBS_QUEUEDIR}/sl_stop" ]
then
echo_log "1" "<=== Fpsync interrupted." "red"
end_die
fi
# Examine results and prepare report
if [ -n "${OPT_PREPARERUN}" ]
then
echo_log "0" "<=== Successfully prepared run: ${FPSYNC_RUNID}" "green"
_report_subj="Fpsync run ${FPSYNC_RUNID} (prepared)"
else
_report_subj="Fpsync run ${FPSYNC_RUNID}"
fi
_ts_now=$(date '+%s')
_total_run_elapsed_time="$(( ${_ts_now} - ${_run_start_time} ))"
_report_logs_fatal=$(run_logs_list_by_ext "${FPSYNC_RUNID}" "ret" | logfiles_filter_ret_with_errors)
_report_logs_fatal_stderr=$(echo "${_report_logs_fatal}" | logfiles_filter_ret_to_stderr)
_report_logs_additional_stderr=$(run_logs_list_by_ext "${FPSYNC_RUNID}" "stderr" | logfiles_filter_stderr_without_ret)
_report_body=$(
if [ -z "${_report_logs_fatal}" ]
then
echo "Fpsync completed without error in ${_total_run_elapsed_time}s."
else
echo "Fpsync completed with errors in ${_total_run_elapsed_time}s"
echo ""
echo "Some jobs did NOT return 0 (success):"
echo "${_report_logs_fatal}"
if [ -n "${_report_logs_fatal_stderr}" ]
then
echo ""
echo "You may find more information in the following logs:"
echo "${_report_logs_fatal_stderr}"
fi
fi
if [ -n "${_report_logs_additional_stderr}" ]
then
echo ""
echo "Some jobs reported non-fatal problems to stderr:"
echo "${_report_logs_additional_stderr}"
fi
if [ -n "${OPT_MAIL}" ]
then
echo ""
echo "--"
echo "Logs generated by fpsync(1) tool."
echo "See https://www.fpart.org for more information."
fi
)
# Print report
echo_log "1" "<=== ${_report_body}"
echo_log "2" "<=== End time: $(date)" "blue"
# Send mail if required
[ -n "${OPT_MAIL}" ] && \
printf "Sync ${OPT_SRCDIR} => ${OPT_DSTURL}\n\n${_report_body}\n" | ${MAIL_BIN} -s "${_report_subj}" ${OPT_MAIL}
if [ -n "${_report_logs_fatal}" ]
then
echo_log "1" "Info: Fpsync stopped (with errors)" "red"
end_die
fi
echo_log "1" "Info: Fpsync stopped (with success)" "green"
end_ok