Discussion:
[patch] scp + UTF-8
(too old to reply)
Ingo Schwarze
2016-01-19 21:48:53 UTC
Permalink
Hi,

Martijn sent the following patch to me in private and agreed that i post
it here.

In any other program in OpenBSD base, i'd probably agree with the
basic approach. Regarding OpenSSH, however, i worry whether wcwidth(3)
can be used. While wcwidth(3) is POSIX, it is not ISO C. Does
OpenSSH target platforms that don't provide wcwidth(3)? If so,
do you think the problem can be solved by simply providing US-ASCII
support only on such platforms, but no UTF-8 support at all?

If you think we can require wcwidth(3), or we can ditch UTF-8 support
where wcwidth(3) it isn't available, i will work with Martijn to
iron out a few style issues such that we can submit a patch that
is ready for commit.

If you think we cannot require wcwidth(3) but need UTF-8 support
everywhere, i suggest to postpone this until we get djm@'s ssh(1)
banner patch in. I sent some feedback on that earlier, proposing
to use a whitelist rather than the blacklist proposed by djm@ which
seems dangerous to me. Should i integrate that suggestion into Damien's
patch, repost the modified patch, and then continue review? I suspect
there might be one or two other things that could be improved, but i'm
not quite sure yet.

Once that is in, we can do something similar for wcwidth(3).

Yours,
Ingo

P.S.
This patch also uses mbtowc(3), but i assume that's no problem
because that's ANSI C.

----- Forwarded message from Martijn van Duren -----

From: Martijn van Duren
Date: Sun, 17 Jan 2016 11:13:01 +0100
To: Ingo Schwarze <***@usta.de>
Subject: [patch] scp + UTF-8

[...]

I've tested this under the following conditions:
- It lines out the same way the current scp does for ascii.
- when shrinking the terminal it prints just as much characters
(width) of the filename as ascii would.
- To support terminals larger then MAX_WINSIZE and still be properly
indented I increased the buf size to 4x the size of MAX_WINSIZE,
since the maximum size of an UTF-8 char <should> be 4 bytes.
It's quite a lot more memory, but I reckon it's better then the
horrible indentation we have now.

I primarily developed this with scp and only minimally tested it with
sftp, but it should work with both. sftp already called setlocale,
so no patch is needed for sftp.c.

[...]

Index: progressmeter.c
===================================================================
RCS file: /cvs/src/usr.bin/ssh/progressmeter.c,v
retrieving revision 1.41
diff -u -p -r1.41 progressmeter.c
--- progressmeter.c 14 Jan 2015 13:54:13 -0000 1.41
+++ progressmeter.c 17 Jan 2016 09:07:51 -0000
@@ -30,9 +30,11 @@
#include <errno.h>
#include <signal.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
+#include <wchar.h>

#include "progressmeter.h"
#include "atomicio.h"
@@ -117,7 +119,7 @@ format_size(char *buf, int size, off_t b
void
refresh_progress_meter(void)
{
- char buf[MAX_WINSIZE + 1];
+ char buf[(MAX_WINSIZE * 4) + 1];
time_t now;
off_t transferred;
double elapsed;
@@ -125,8 +127,10 @@ refresh_progress_meter(void)
off_t bytes_left;
int cur_speed;
int hours, minutes, seconds;
- int i, len;
- int file_len;
+ int width, size, buf_width, buf_size;
+ int i;
+ int file_width;
+ wchar_t wc;

transferred = *counter - (cur_pos ? cur_pos : start_pos);
cur_pos = *counter;
@@ -157,16 +161,33 @@ refresh_progress_meter(void)

/* filename */
buf[0] = '\0';
- file_len = win_size - 35;
- if (file_len > 0) {
- len = snprintf(buf, file_len + 1, "\r%s", file);
- if (len < 0)
- len = 0;
- if (len >= file_len + 1)
- len = file_len;
- for (i = len; i < file_len; i++)
- buf[i] = ' ';
- buf[file_len] = '\0';
+ file_width = win_size - 36;
+ if (file_width > 0) {
+ buf[0] = '\r';
+ for (i = 0, buf_width = 0, buf_size = 1;
+ file[i] != '\0';) {
+ if ((size = mbtowc(&wc, &(file[i]), MB_CUR_MAX)) == -1) {
+ (void)mbtowc(NULL, NULL, MB_CUR_MAX);
+ buf[buf_size++] = '?';
+ buf_width++;
+ i++;
+ } else if ((width = wcwidth(wc)) == -1) {
+ buf[buf_size++] = '?';
+ buf_width++;
+ i++;
+ } else if (buf_width + width <= file_width &&
+ buf_size + size <= (int) sizeof(buf) - 35) {
+ memcpy(&(buf[buf_size]), &(file[i]), size);
+ i += size;
+ buf_size += size;
+ buf_width += width;
+ } else
+ break;
+ }
+ for (; buf_width < file_width &&
+ buf_size < (int) sizeof(buf) - 35; buf_width++)
+ buf[buf_size++] = ' ';
+ buf[buf_size] = '\0';
}

/* percent of transfer done */
@@ -174,18 +195,18 @@ refresh_progress_meter(void)
percent = ((float)cur_pos / end_pos) * 100;
else
percent = 100;
- snprintf(buf + strlen(buf), win_size - strlen(buf),
+ snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
" %3d%% ", percent);

/* amount transferred */
- format_size(buf + strlen(buf), win_size - strlen(buf),
+ format_size(buf + strlen(buf), sizeof(buf) - strlen(buf),
cur_pos);
- strlcat(buf, " ", win_size);
+ strlcat(buf, " ", sizeof(buf));

/* bandwidth usage */
- format_rate(buf + strlen(buf), win_size - strlen(buf),
+ format_rate(buf + strlen(buf), sizeof(buf) - strlen(buf),
(off_t)bytes_per_second);
- strlcat(buf, "/s ", win_size);
+ strlcat(buf, "/s ", sizeof(buf));

/* ETA */
if (!transferred)
@@ -194,9 +215,9 @@ refresh_progress_meter(void)
stalled = 0;

if (stalled >= STALL_TIME)
- strlcat(buf, "- stalled -", win_size);
+ strlcat(buf, "- stalled -", sizeof(buf));
else if (bytes_per_second == 0 && bytes_left)
- strlcat(buf, " --:-- ETA", win_size);
+ strlcat(buf, " --:-- ETA", sizeof(buf));
else {
if (bytes_left > 0)
seconds = bytes_left / bytes_per_second;
@@ -209,19 +230,21 @@ refresh_progress_meter(void)
seconds -= minutes * 60;

if (hours != 0)
- snprintf(buf + strlen(buf), win_size - strlen(buf),
+ snprintf(buf + strlen(buf),
+ sizeof(buf) - strlen(buf),
"%d:%02d:%02d", hours, minutes, seconds);
else
- snprintf(buf + strlen(buf), win_size - strlen(buf),
+ snprintf(buf + strlen(buf),
+ sizeof(buf) - strlen(buf),
" %02d:%02d", minutes, seconds);

if (bytes_left > 0)
- strlcat(buf, " ETA", win_size);
+ strlcat(buf, " ETA", sizeof(buf));
else
- strlcat(buf, " ", win_size);
+ strlcat(buf, " ", sizeof(buf));
}

- atomicio(vwrite, STDOUT_FILENO, buf, win_size - 1);
+ atomicio(vwrite, STDOUT_FILENO, buf, strlen(buf));
last_update = now;
}

Index: scp.c
===================================================================
RCS file: /cvs/src/usr.bin/ssh/scp.c,v
retrieving revision 1.184
diff -u -p -r1.184 scp.c
--- scp.c 27 Nov 2015 00:49:31 -0000 1.184
+++ scp.c 17 Jan 2016 09:07:52 -0000
@@ -83,6 +83,7 @@
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
+#include <locale.h>
#include <pwd.h>
#include <signal.h>
#include <stdarg.h>
@@ -501,6 +502,8 @@ main(int argc, char **argv)
targetshouldbedirectory ? " -d" : "");

(void) signal(SIGPIPE, lostconn);
+
+ (void) setlocale(LC_CTYPE, "");

if ((targ = colon(argv[argc - 1]))) /* Dest is remote host. */
toremote(targ, argc, argv);

----- End forwarded message -----
Darren Tucker
2016-01-19 22:39:33 UTC
Permalink
Post by Ingo Schwarze
Hi,
Martijn sent the following patch to me in private and agreed that i post
it here.
In any other program in OpenBSD base, i'd probably agree with the
basic approach. Regarding OpenSSH, however, i worry whether wcwidth(3)
can be used. While wcwidth(3) is POSIX, it is not ISO C. Does
OpenSSH target platforms that don't provide wcwidth(3)?
OpenSSH nominally targets POSIX, but it builds on a wide enough range
of platforms that it's likely at least some don't have it.

Our general approach is to target POSIX then implement any needed
missing bits either by stealing the implementation from OpenBSD, some
other BSD licensed source or writing from scratch. If we have to
we'll ifdef stuff but prefer not to.
Post by Ingo Schwarze
If so,
do you think the problem can be solved by simply providing US-ASCII
support only on such platforms, but no UTF-8 support at all?
Yes. That's what I did with mblen when we picked up a need for that
via libedit for platforms with no wide character support.

$ grep -i mblen openbsd-compat/*.h
openbsd-compat/openbsd-compat.h:#ifndef HAVE_MBLEN
openbsd-compat/openbsd-compat.h:# define mblen(x, y) (1)

Is there any reason the same approach would not work with wcwidth?

[...]
Post by Ingo Schwarze
P.S.
This patch also uses mbtowc(3), but i assume that's no problem
because that's ANSI C.
I would not assume that its existence in the standard is equal to its
existence in all deployments :-) That said it looks like we can
implement it in libcompat if needed.
--
Darren Tucker (dtucker at zip.com.au)
GPG key 8FF4FA69 / D9A3 86E9 7EEE AF4B B2D4 37C9 C982 80C7 8FF4 FA69
Good judgement comes with experience. Unfortunately, the experience
usually comes from bad judgement.
Roland Mainz
2016-01-20 00:13:10 UTC
Permalink
Post by Ingo Schwarze
Martijn sent the following patch to me in private and agreed that i post
it here.
In any other program in OpenBSD base, i'd probably agree with the
basic approach. Regarding OpenSSH, however, i worry whether wcwidth(3)
can be used. While wcwidth(3) is POSIX, it is not ISO C. Does
OpenSSH target platforms that don't provide wcwidth(3)? If so,
do you think the problem can be solved by simply providing US-ASCII
support only on such platforms, but no UTF-8 support at all?
If you think we can require wcwidth(3), or we can ditch UTF-8 support
where wcwidth(3) it isn't available, i will work with Martijn to
iron out a few style issues such that we can submit a patch that
is ready for commit.
Some generic portability comments:
1. There are other modern encodings like GB18030 (support is even
mandatory for software sold to the goverment in PRC China) currently
in use and many "legacy" ones, so the current locale may be multibyte
but does not use UTF-8 as encoding
2. |wcwidth()| counts in terminal cells and not number of characters
(where one character might occupy one or more bytes), e.g. there are
characters which may occupy from zero to four terminal cells (acual
number of cells is a bit (not much) OS specific).
3. I am not sure whether there is a specific byte limit for UTF-8 in
any of the standards, e.g. "- To support terminals larger then
MAX_WINSIZE and still be properly indented I increased the buf size to
4x the size of MAX_WINSIZE, since the maximum size of an UTF-8 char
<should> be 4 bytes." might not be a portable assumption and I would
at least safeguard it.

----

Bye,
Roland
--
__ . . __
(o.\ \/ /.o) ***@nrubsig.org
\__\/\/__/ MPEG specialist, C&&JAVA&&Sun&&Unix programmer
/O /==\ O\ TEL +49 641 3992797
(;O/ \/ \O;)
Ingo Schwarze
2016-01-19 23:54:41 UTC
Permalink
Hi Darren,
Post by Darren Tucker
Post by Ingo Schwarze
Martijn sent the following patch to me in private and agreed that i
post it here.
In any other program in OpenBSD base, i'd probably agree with the
basic approach. Regarding OpenSSH, however, i worry whether wcwidth(3)
can be used. While wcwidth(3) is POSIX, it is not ISO C. Does
OpenSSH target platforms that don't provide wcwidth(3)?
OpenSSH nominally targets POSIX, but it builds on a wide enough range
of platforms that it's likely at least some don't have it.
Our general approach is to target POSIX then implement any needed
missing bits either by stealing the implementation from OpenBSD, some
other BSD licensed source or writing from scratch. If we have to
we'll ifdef stuff but prefer not to.
Sure, that's what i expected.
Post by Darren Tucker
Post by Ingo Schwarze
If so, do you think the problem can be solved by simply providing
US-ASCII support only on such platforms, but no UTF-8 support at all?
Yes. That's what I did with mblen when we picked up a need for that
via libedit for platforms with no wide character support.
$ grep -i mblen openbsd-compat/*.h
openbsd-compat/openbsd-compat.h:#ifndef HAVE_MBLEN
openbsd-compat/openbsd-compat.h:# define mblen(x, y) (1)
Uh oh. I'm not quite sure what consequences that might entail in
libedit for sftp(1), which does use setlocale(LC_CTYPE, "")?
Did you audit those consequences?
Post by Darren Tucker
Is there any reason the same approach would not work with wcwidth?
#define wcwidth(x) (1) /* NO!! */

would be a security risk. One purpose of wcwidth(3) is to weed out
non-printable characters. Whatever replacement we use, we have to
make sure it returns -1 for every non-printable character on every
platform. We MUST NOT let the scp(1) progressmeter spew random
Unicode characters taken from the network at the user's terminal.
They might be control codes.

#define wcwidth(x) (-1) /* not really */

is not a security issue, but it would completely break filename
display even with the C/POSIX locale on those platforms. I briefly
considered

int
wcwidth(wchar_t wc) /* might break? */
{
if (wc < 0x20 || wc > 0x7e)
return -1;
return isprint((unsigned char)wc) ? 1 : -1;
}

But that isn't ideal either because as far as i know, ISO C
doesn't require that wchar_t is internally represented in a way
that puts ASCII in the range 0x00 to 0x7f. Using iswprint(3)
is not a very good idea either because that is C99, not C89,
and may not be available either.

So if we can't get a real implementation of wcwidth(3) on some
platform, it's better to completely disable UTF-8 and only allow
US-ASCII.

A real replacement implementation of wcwidth(3) is MUCH harder
than a real replacement implementation of mbtowc(3) and mblen(3).
It needs a big table of character ranges (see tmux(1)), while
the latter can be done in 50 lines (see mandoc(1)).

That's why i said: If we want full UTF-8 support on all platforms
no matter what and must have a complete replacement wcwidth(3), we
Post by Darren Tucker
Post by Ingo Schwarze
P.S.
This patch also uses mbtowc(3), but i assume that's no problem
because that's ANSI C.
I would not assume that its existence in the standard is equal to its
existence in all deployments :-) That said it looks like we can
implement it in libcompat if needed.
Yes, mbtowc(3) and mblen(3), certainly, but even those only for UTF-8,
not for any other locale.

Yours,
Ingo
Darren Tucker
2016-01-20 00:29:41 UTC
Permalink
Post by Ingo Schwarze
Post by Darren Tucker
$ grep -i mblen openbsd-compat/*.h
openbsd-compat/openbsd-compat.h:#ifndef HAVE_MBLEN
openbsd-compat/openbsd-compat.h:# define mblen(x, y) (1)
Uh oh. I'm not quite sure what consequences that might entail in
libedit for sftp(1), which does use setlocale(LC_CTYPE, "")?
Did you audit those consequences?
Audit? No. I spent a lot of time reading man pages but it's not
something I've dealt with before so I don't understand it very well.
That said, the behaviour on systems without mblen should be the same
as what it was before my change so it should be no worse.

The history is at https://bugzilla.mindrot.org/show_bug.cgi?id=1990
--
Darren Tucker (dtucker at zip.com.au)
GPG key 8FF4FA69 / D9A3 86E9 7EEE AF4B B2D4 37C9 C982 80C7 8FF4 FA69
Good judgement comes with experience. Unfortunately, the experience
usually comes from bad judgement.
Ingo Schwarze
2016-01-20 01:05:00 UTC
Permalink
Hi Roland,
Post by Roland Mainz
1. There are other modern encodings like GB18030
Yes, but there are no plans to support any other encodings except
UTF-8 in the OpenBSD base system, so supporting other encodings
would be a matter for the portable version, if at all. I will
consider whether it is possible to write multibyte character support
in a way that doesn't result in obfuscation (and hence loss of
security) on OpenBSD and yet supports other encodings elsewhere,
but i'm not yet sure that will be possible. In case of the slightest
doubt, i expect OpenSSH developers will prioritize security over
additonal encoding support.
Post by Roland Mainz
(support is even mandatory for software sold to the goverment in
PRC China)
I'm not aware of any plans to sell OpenSSH to the government of
China, but they are of course welcome to use it for free.
Post by Roland Mainz
2. |wcwidth()| counts in terminal cells and not number of characters
(where one character might occupy one or more bytes), e.g. there are
characters which may occupy from zero to four terminal cells (acual
number of cells is a bit (not much) OS specific).
I never heard about any characters occupying more than three cells.
As far as i know, the result of wcwidth(3) is not specified by the
Unicode standard, so i'm usually looking at the Perl implementation
as a reference. Last time i looked there, i didn't find any actual
characters occupying more than two cells, even though characters
of width three might in principle be possible.
Post by Roland Mainz
3. I am not sure whether there is a specific byte limit for UTF-8
in any of the standards,
Yes, current Unicode limits codepoints to U+0000 to U+10FFFF, which
limits UTF-8 to one to four bytes. But five and six byte UTF-8
sequences were considered in the past, so you are right that we
should make sure that nothing breaks if some system has bogus
support for those.
Post by Roland Mainz
e.g. "- To support terminals larger then MAX_WINSIZE and still be
properly indented I increased the buf size to 4x the size
of MAX_WINSIZE, since the maximum size of an UTF-8 char <should>
be 4 bytes." might not be a portable assumption and I would
at least safeguard it.
Yes, thank you for your comments, i have taken notes in my TODO file
to check that they will not be forgotten when reviewing future patches.
In particular the last one is quite important:

* scp(1) comments by Roland Mainz:
try to make things work even with non-UTF-8 outside OpenBSD, if easy
make sure nothing breaks for wcwidth(...) > 2
make sure nothing breaks for MB_CUR_MAX > 4

Yours,
Ingo
Carson Gaspar
2016-01-20 02:31:44 UTC
Permalink
Post by Ingo Schwarze
that puts ASCII in the range 0x00 to 0x7f. Using iswprint(3)
is not a very good idea either because that is C99, not C89,
and may not be available either.
iswprint() is SUSv3/POSIX.1-2001 as well as C99. So is wcwidth(). So the
odds are if you have one, you have the other.

Historical perspective:
Solaris 2.5 (1995) had support for both in libw
Solaris 2.6 (1997) moved them to libc
Visual Studio 6 (1998) had support, possibly earlier versions
Red Hat Linux in 6.0 (1999)
NetBSD appears to have added it in 1.6 (2002)
FreeBSD in 5.0 (2003)
Mac OS X in 10.3 (2003)
Minix only got them with the 3.2.x NetBSD userland (2012?)

I got tired of looking up specific versions, but late enough IRIX, HPUX,
OpenVMS, Tru64, OSF1, VxWorks, and QNX Neutrino all support them.

ULTRIX never did.

Which platforms does the project care about that _don't_ support them?

For non-supporting platforms, GNU Gettext just does:

iswprint (__wctype_wint_t wc)
{
return wc >= ' ' && wc <= '~';
}

Glibc has some bit-shifting voodoo that does iswprint for UTF-8 only -
the code is simple enough to re-implement.

A full locale-aware version is a lot of work :-(
Michael Stone
2016-01-20 12:50:03 UTC
Permalink
Post by Roland Mainz
3. I am not sure whether there is a specific byte limit for UTF-8 in
any of the standards, e.g. "- To support terminals larger then
MAX_WINSIZE and still be properly indented I increased the buf size to
4x the size of MAX_WINSIZE, since the maximum size of an UTF-8 char
<should> be 4 bytes." might not be a portable assumption and I would
at least safeguard it.
Isn't that assumption completely broken in the presence of combining
characters?

Mike Stone
Ingo Schwarze
2016-01-20 14:53:13 UTC
Permalink
Hi Michael,
Post by Michael Stone
Post by Roland Mainz
3. I am not sure whether there is a specific byte limit for UTF-8 in
any of the standards, e.g. "- To support terminals larger then
MAX_WINSIZE and still be properly indented I increased the buf size to
4x the size of MAX_WINSIZE, since the maximum size of an UTF-8 char
<should> be 4 bytes." might not be a portable assumption and I would
at least safeguard it.
Isn't that assumption completely broken in the presence of combining
characters?
It is. As far as i understand, given any natural number N > 0,
you can construct a valid Unicode string of display width 1
such that its UTF-8 encoding consists of N bytes.

Not that such strings are terribly useful for large N, but we do
indeed have to keep in mind that the code must not break when it
encouters them. Martijn's code already seems safe in *that*
respect, it just cuts the string early and pads with blanks if
the buffer is too short, but i'll re-check before deciding on a
final version.

Thanks for the reminder!

Yours,
Ingo
Damien Miller
2016-01-27 08:05:41 UTC
Permalink
POSIX is fine, but why not prepare a filtered version of the filename
once instead of doing it on every output?
Post by Ingo Schwarze
Hi,
Martijn sent the following patch to me in private and agreed that i post
it here.
In any other program in OpenBSD base, i'd probably agree with the
basic approach. Regarding OpenSSH, however, i worry whether wcwidth(3)
can be used. While wcwidth(3) is POSIX, it is not ISO C. Does
OpenSSH target platforms that don't provide wcwidth(3)? If so,
do you think the problem can be solved by simply providing US-ASCII
support only on such platforms, but no UTF-8 support at all?
If you think we can require wcwidth(3), or we can ditch UTF-8 support
where wcwidth(3) it isn't available, i will work with Martijn to
iron out a few style issues such that we can submit a patch that
is ready for commit.
If you think we cannot require wcwidth(3) but need UTF-8 support
banner patch in. I sent some feedback on that earlier, proposing
seems dangerous to me. Should i integrate that suggestion into Damien's
patch, repost the modified patch, and then continue review? I suspect
there might be one or two other things that could be improved, but i'm
not quite sure yet.
Once that is in, we can do something similar for wcwidth(3).
Yours,
Ingo
P.S.
This patch also uses mbtowc(3), but i assume that's no problem
because that's ANSI C.
----- Forwarded message from Martijn van Duren -----
From: Martijn van Duren
Date: Sun, 17 Jan 2016 11:13:01 +0100
Subject: [patch] scp + UTF-8
[...]
- It lines out the same way the current scp does for ascii.
- when shrinking the terminal it prints just as much characters
(width) of the filename as ascii would.
- To support terminals larger then MAX_WINSIZE and still be properly
indented I increased the buf size to 4x the size of MAX_WINSIZE,
since the maximum size of an UTF-8 char <should> be 4 bytes.
It's quite a lot more memory, but I reckon it's better then the
horrible indentation we have now.
I primarily developed this with scp and only minimally tested it with
sftp, but it should work with both. sftp already called setlocale,
so no patch is needed for sftp.c.
[...]
Index: progressmeter.c
===================================================================
RCS file: /cvs/src/usr.bin/ssh/progressmeter.c,v
retrieving revision 1.41
diff -u -p -r1.41 progressmeter.c
--- progressmeter.c 14 Jan 2015 13:54:13 -0000 1.41
+++ progressmeter.c 17 Jan 2016 09:07:51 -0000
@@ -30,9 +30,11 @@
#include <errno.h>
#include <signal.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
+#include <wchar.h>
#include "progressmeter.h"
#include "atomicio.h"
@@ -117,7 +119,7 @@ format_size(char *buf, int size, off_t b
void
refresh_progress_meter(void)
{
- char buf[MAX_WINSIZE + 1];
+ char buf[(MAX_WINSIZE * 4) + 1];
time_t now;
off_t transferred;
double elapsed;
@@ -125,8 +127,10 @@ refresh_progress_meter(void)
off_t bytes_left;
int cur_speed;
int hours, minutes, seconds;
- int i, len;
- int file_len;
+ int width, size, buf_width, buf_size;
+ int i;
+ int file_width;
+ wchar_t wc;
transferred = *counter - (cur_pos ? cur_pos : start_pos);
cur_pos = *counter;
@@ -157,16 +161,33 @@ refresh_progress_meter(void)
/* filename */
buf[0] = '\0';
- file_len = win_size - 35;
- if (file_len > 0) {
- len = snprintf(buf, file_len + 1, "\r%s", file);
- if (len < 0)
- len = 0;
- if (len >= file_len + 1)
- len = file_len;
- for (i = len; i < file_len; i++)
- buf[i] = ' ';
- buf[file_len] = '\0';
+ file_width = win_size - 36;
+ if (file_width > 0) {
+ buf[0] = '\r';
+ for (i = 0, buf_width = 0, buf_size = 1;
+ file[i] != '\0';) {
+ if ((size = mbtowc(&wc, &(file[i]), MB_CUR_MAX)) == -1) {
+ (void)mbtowc(NULL, NULL, MB_CUR_MAX);
+ buf[buf_size++] = '?';
+ buf_width++;
+ i++;
+ } else if ((width = wcwidth(wc)) == -1) {
+ buf[buf_size++] = '?';
+ buf_width++;
+ i++;
+ } else if (buf_width + width <= file_width &&
+ buf_size + size <= (int) sizeof(buf) - 35) {
+ memcpy(&(buf[buf_size]), &(file[i]), size);
+ i += size;
+ buf_size += size;
+ buf_width += width;
+ } else
+ break;
+ }
+ for (; buf_width < file_width &&
+ buf_size < (int) sizeof(buf) - 35; buf_width++)
+ buf[buf_size++] = ' ';
+ buf[buf_size] = '\0';
}
/* percent of transfer done */
@@ -174,18 +195,18 @@ refresh_progress_meter(void)
percent = ((float)cur_pos / end_pos) * 100;
else
percent = 100;
- snprintf(buf + strlen(buf), win_size - strlen(buf),
+ snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
" %3d%% ", percent);
/* amount transferred */
- format_size(buf + strlen(buf), win_size - strlen(buf),
+ format_size(buf + strlen(buf), sizeof(buf) - strlen(buf),
cur_pos);
- strlcat(buf, " ", win_size);
+ strlcat(buf, " ", sizeof(buf));
/* bandwidth usage */
- format_rate(buf + strlen(buf), win_size - strlen(buf),
+ format_rate(buf + strlen(buf), sizeof(buf) - strlen(buf),
(off_t)bytes_per_second);
- strlcat(buf, "/s ", win_size);
+ strlcat(buf, "/s ", sizeof(buf));
/* ETA */
if (!transferred)
@@ -194,9 +215,9 @@ refresh_progress_meter(void)
stalled = 0;
if (stalled >= STALL_TIME)
- strlcat(buf, "- stalled -", win_size);
+ strlcat(buf, "- stalled -", sizeof(buf));
else if (bytes_per_second == 0 && bytes_left)
- strlcat(buf, " --:-- ETA", win_size);
+ strlcat(buf, " --:-- ETA", sizeof(buf));
else {
if (bytes_left > 0)
seconds = bytes_left / bytes_per_second;
@@ -209,19 +230,21 @@ refresh_progress_meter(void)
seconds -= minutes * 60;
if (hours != 0)
- snprintf(buf + strlen(buf), win_size - strlen(buf),
+ snprintf(buf + strlen(buf),
+ sizeof(buf) - strlen(buf),
"%d:%02d:%02d", hours, minutes, seconds);
else
- snprintf(buf + strlen(buf), win_size - strlen(buf),
+ snprintf(buf + strlen(buf),
+ sizeof(buf) - strlen(buf),
" %02d:%02d", minutes, seconds);
if (bytes_left > 0)
- strlcat(buf, " ETA", win_size);
+ strlcat(buf, " ETA", sizeof(buf));
else
- strlcat(buf, " ", win_size);
+ strlcat(buf, " ", sizeof(buf));
}
- atomicio(vwrite, STDOUT_FILENO, buf, win_size - 1);
+ atomicio(vwrite, STDOUT_FILENO, buf, strlen(buf));
last_update = now;
}
Index: scp.c
===================================================================
RCS file: /cvs/src/usr.bin/ssh/scp.c,v
retrieving revision 1.184
diff -u -p -r1.184 scp.c
--- scp.c 27 Nov 2015 00:49:31 -0000 1.184
+++ scp.c 17 Jan 2016 09:07:52 -0000
@@ -83,6 +83,7 @@
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
+#include <locale.h>
#include <pwd.h>
#include <signal.h>
#include <stdarg.h>
@@ -501,6 +502,8 @@ main(int argc, char **argv)
targetshouldbedirectory ? " -d" : "");
(void) signal(SIGPIPE, lostconn);
+
+ (void) setlocale(LC_CTYPE, "");
if ((targ = colon(argv[argc - 1]))) /* Dest is remote host. */
toremote(targ, argc, argv);
----- End forwarded message -----
_______________________________________________
openssh-unix-dev mailing list
https://lists.mindrot.org/mailman/listinfo/openssh-unix-dev
Loading...